Updated scanner to infer when it has reached the end of results.
All checks were successful
ci / build-test (push) Successful in 2m18s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-03-07 01:26:04 -05:00
parent 1e01edf1b7
commit 62c2efab01
18 changed files with 193 additions and 79 deletions

View File

@@ -0,0 +1,6 @@
namespace JSMR.Application.Scanning.Contracts;
public record VoiceWorkScanResult(
DLSiteWork[] Works,
bool EndOfResults
);

View File

@@ -4,5 +4,5 @@ namespace JSMR.Application.Scanning.Ports;
public interface IVoiceWorksScanner
{
Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default);
Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default);
}

View File

@@ -18,8 +18,11 @@ public sealed class ScanVoiceWorksHandler(
IVoiceWorksScanner? scanner = scannerRepository.GetScanner(request.Locale);
IVoiceWorkUpdater? updater = updaterRepository.GetUpdater(request.Locale);
if (scanner is null || updater is null)
return new();
if (scanner is null)
throw new InvalidOperationException($"No scanner registered for locale {request.Locale}.");
if (updater is null)
throw new InvalidOperationException($"No updater registered for locale {request.Locale}.");
VoiceWorkScanOptions options = new(
PageNumber: request.PageNumber,
@@ -29,15 +32,20 @@ public sealed class ScanVoiceWorksHandler(
ExcludeAIGeneratedWorks: true
);
IReadOnlyList<DLSiteWork> works = await scanner.ScanPageAsync(options, cancellationToken);
VoiceWorkScanResult scanResult = await scanner.ScanPageAsync(options, cancellationToken);
if (works.Count == 0)
return new();
if (scanResult.EndOfResults)
{
return new ScanVoiceWorksResponse(
Results: [],
EndOfResults: true
);
}
string[] productIds = [.. works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)];
string[] productIds = [.. scanResult.Works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)];
VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken);
VoiceWorkIngest[] ingests = [.. works.Select(work =>
VoiceWorkIngest[] ingests = [.. scanResult.Works.Select(work =>
{
voiceWorkDetails.TryGetValue(work.ProductId!, out VoiceWorkDetails? value);
return VoiceWorkIngest.From(work, value);
@@ -48,9 +56,9 @@ public sealed class ScanVoiceWorksHandler(
await searchUpdater.UpdateAsync(voiceWorkIds, cancellationToken);
return new()
{
Results = upsertResults
};
return new ScanVoiceWorksResponse(
Results: upsertResults,
EndOfResults: false
);
}
}

View File

@@ -2,9 +2,7 @@
namespace JSMR.Application.Scanning;
public sealed class ScanVoiceWorksResponse
{
public int Inserted { get; init; }
public int Updated { get; init; }
public VoiceWorkUpsertResult[] Results { get; init; } = [];
}
public sealed record ScanVoiceWorksResponse(
VoiceWorkUpsertResult[] Results,
bool EndOfResults
);

View File

@@ -10,9 +10,12 @@ public abstract class ApiClient(IHttpService http, ILogger logger, JsonSerialize
{
protected async Task<T> GetJsonAsync<T>(string url, CancellationToken cancellationToken = default)
{
string response = await http.GetStringAsync(url, cancellationToken);
HttpStringResponse response = await http.GetAsync(url, cancellationToken);
return JsonSerializer.Deserialize<T>(response, json)
if (response.Content is null)
throw new Exception("No content to deserialize");
return JsonSerializer.Deserialize<T>(response.Content, json)
?? throw new InvalidOperationException($"Failed to deserialize JSON to {typeof(T).Name} from {url}.");
}

View File

@@ -0,0 +1,12 @@
using HtmlAgilityPack;
using System.Net;
namespace JSMR.Infrastructure.Http;
public sealed class HtmlLoadResult
{
public required HttpStatusCode StatusCode { get; init; }
public HtmlDocument? Document { get; init; }
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
}

View File

@@ -4,13 +4,26 @@ namespace JSMR.Infrastructure.Http;
public class HtmlLoader(IHttpService httpService) : IHtmlLoader
{
public async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
public async Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{
string html = await httpService.GetStringAsync(url, cancellationToken);
HttpStringResponse response = await httpService.GetAsync(url, cancellationToken);
if (!response.IsSuccessStatusCode)
{
return new HtmlLoadResult
{
StatusCode = response.StatusCode,
Document = null
};
}
HtmlDocument document = new();
document.LoadHtml(html);
document.LoadHtml(response.Content ?? string.Empty);
return document;
return new HtmlLoadResult
{
StatusCode = response.StatusCode,
Document = document
};
}
}

View File

@@ -2,10 +2,10 @@
public class HttpService(HttpClient httpClient) : IHttpService
{
public Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
=> GetStringAsync(url, new Dictionary<string, string>(), cancellationToken);
public Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken)
=> GetAsync(url, new Dictionary<string, string>(), cancellationToken);
public async Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken)
public async Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken)
{
using HttpRequestMessage request = new(HttpMethod.Get, url);
@@ -14,11 +14,18 @@ public class HttpService(HttpClient httpClient) : IHttpService
request.Headers.TryAddWithoutValidation(header.Key, header.Value);
}
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0");
request.Headers.UserAgent.ParseAdd("JSMR/1.0");
using HttpResponseMessage response = await httpClient.SendAsync(request, cancellationToken);
response.EnsureSuccessStatusCode();
using HttpResponseMessage response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
return await response.Content.ReadAsStringAsync(cancellationToken);
string? content = response.Content is null
? null
: await response.Content.ReadAsStringAsync(cancellationToken);
return new HttpStringResponse
{
StatusCode = response.StatusCode,
Content = content
};
}
}

View File

@@ -0,0 +1,11 @@
using System.Net;
namespace JSMR.Infrastructure.Http;
public sealed class HttpStringResponse
{
public required HttpStatusCode StatusCode { get; init; }
public string? Content { get; init; }
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
}

View File

@@ -1,8 +1,6 @@
using HtmlAgilityPack;
namespace JSMR.Infrastructure.Http;
namespace JSMR.Infrastructure.Http;
public interface IHtmlLoader
{
Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken);
Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken);
}

View File

@@ -2,6 +2,6 @@
public interface IHttpService
{
Task<string> GetStringAsync(string url, CancellationToken cancellationToken);
Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken);
Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken);
Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken);
}

View File

@@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning.Extensions;
using JSMR.Infrastructure.Scanning.Models;
using System.Globalization;
using System.Net;
using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning;
@@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
protected abstract DateOnly? GetSalesDate(string salesDate);
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
public async Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
{
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
string url = GetUrl(options);
HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
// Expected boundary: past the last search page
if (result.StatusCode == HttpStatusCode.NotFound)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
// Unexpected non-success response
if (!result.IsSuccessStatusCode || result.Document is null)
{
throw new HttpRequestException(
$"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}");
}
DLSiteHtmlDocument document = new(result.Document);
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
// Defensive fallback in case DLsite changes from 404 to 200 with empty page
if (nodes.Length == 0)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
works.InferAndUpdateExpectedDates();
return works;
}
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
{
string url = GetUrl(options);
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
return new DLSiteHtmlDocument(document);
return new VoiceWorkScanResult(
Works: works,
EndOfResults: false
);
}
protected string GetUrl(VoiceWorkScanOptions options)

View File

@@ -0,0 +1,20 @@
using JSMR.Infrastructure.Http;
using NSubstitute;
using System.Net;
namespace JSMR.Tests.Extensions;
internal static class HttpServiceTestExtensions
{
public static void ReturnsContent(this IHttpService httpService, string content, HttpStatusCode statusCode = HttpStatusCode.OK)
{
HttpStringResponse response = new()
{
StatusCode = statusCode,
Content = content
};
httpService.GetAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(Task.FromResult(response));
}
}

View File

@@ -0,0 +1,17 @@
using JSMR.Application.Scanning.Contracts;
using JSMR.Application.Scanning.Ports;
using Shouldly;
namespace JSMR.Tests.Extensions;
internal static class ScannerTestExtensions
{
public static async Task<IReadOnlyList<DLSiteWork>> ScanWorksAsync(this IVoiceWorksScanner scanner, VoiceWorkScanOptions options)
{
VoiceWorkScanResult result = await scanner.ScanPageAsync(options, CancellationToken.None);
result.EndOfResults.ShouldBeFalse();
return result.Works;
}
}

View File

@@ -5,6 +5,7 @@ using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Integrations.DLSite;
using JSMR.Infrastructure.Integrations.DLSite.Mapping;
using JSMR.Infrastructure.Integrations.DLSite.Models;
using JSMR.Tests.Extensions;
using JSMR.Tests.Utilities;
using Microsoft.Extensions.Logging;
using NSubstitute;
@@ -25,9 +26,7 @@ public class DLSiteClientTests
string productInfoJson = await ReadJsonResourceAsync("Product-Info.json");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(productInfoJson));
httpService.ReturnsContent(productInfoJson);
var logger = Substitute.For<ILogger<DLSiteClient>>();
var client = new DLSiteClient(httpService, logger);

View File

@@ -4,6 +4,7 @@ using JSMR.Application.Scanning.Contracts;
using JSMR.Application.Scanning.Ports;
using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning;
using JSMR.Tests.Extensions;
using JSMR.Tests.Utilities;
using NSubstitute;
using Shouldly;
@@ -23,9 +24,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("Japanese-Page.html");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
httpService.ReturnsContent(html);
HtmlLoader loader = new(httpService);
JapaneseVoiceWorksScanner scanner = new(loader);
@@ -38,7 +37,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: []
);
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(1);
@@ -61,9 +60,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("Japanese-Page-Updated.html");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
httpService.ReturnsContent(html);
HtmlLoader loader = new(httpService);
JapaneseVoiceWorksScanner scanner = new(loader);
@@ -76,7 +73,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: []
);
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(2);
@@ -110,7 +107,7 @@ public class VoiceWorkScannerTests
{
IVoiceWorksScanner scanner = Substitute.For<IVoiceWorksScanner>();
IReadOnlyList<DLSiteWork> scannedWorks =
DLSiteWork[] scannedWorks =
[
new()
{
@@ -123,8 +120,13 @@ public class VoiceWorkScannerTests
}
];
VoiceWorkScanResult scanResult = new(
Works: scannedWorks,
EndOfResults: false
);
scanner.ScanPageAsync(Arg.Any<VoiceWorkScanOptions>(), CancellationToken.None)
.Returns(Task.FromResult(scannedWorks));
.Returns(Task.FromResult(scanResult));
IDLSiteClient dlsiteClient = Substitute.For<IDLSiteClient>();
@@ -155,9 +157,7 @@ public class VoiceWorkScannerTests
string englishPageHtml = await ReadResourceAsync("English-Page.html");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(englishPageHtml));
httpService.ReturnsContent(englishPageHtml);
HtmlLoader loader = new(httpService);
EnglishVoiceWorksScanner scanner = new(loader);
@@ -170,7 +170,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: []
);
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(2);
@@ -197,9 +197,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("English-Page-Updated.html");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
httpService.ReturnsContent(html);
HtmlLoader loader = new(httpService);
EnglishVoiceWorksScanner scanner = new(loader);
@@ -212,7 +210,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: []
);
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(1);

View File

@@ -7,6 +7,7 @@ using JSMR.Worker.Options;
using JSMR.Worker.UI;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
using Serilog.Core;
using Spectre.Console;
using System.Globalization;
@@ -45,15 +46,6 @@ public sealed class PagedScanRunner(
//log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale);
CliUi.PageHeader(currentPage, end);
//AnsiConsole.Status()
//.Start($"[grey]Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]...[/]", ctx =>
//{
// // Simulate grinding
// Thread.Sleep(3000);
//});
//AnsiConsole.MarkupLine($"[green]✓ Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]... DONE[/]");
ScanVoiceWorksRequest request = new(
PageNumber: currentPage,
PageSize: 100,
@@ -62,6 +54,12 @@ public sealed class PagedScanRunner(
ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken);
if (response.EndOfResults)
{
CliUi.Information($"Reached end of results at page {currentPage}. Stopping scan.");
break;
}
int newUpcoming = response.Results.Count(x => x.UpdateStatus == VoiceWorkStatus.NewAndUpcoming);
//if (newUpcoming > 0)

View File

@@ -50,6 +50,9 @@ public static class CliUi
AnsiConsole.Write(panel);
}
public static void Information(string message) =>
AnsiConsole.MarkupLine($"[blue]🛈 {Escape(message)}[/]");
public static void Warning(string message) =>
AnsiConsole.MarkupLine($"[yellow]⚠ {Escape(message)}[/]");