Updated scanner to infer when it has reached the end of results.
All checks were successful
ci / build-test (push) Successful in 2m18s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-03-07 01:26:04 -05:00
parent 1e01edf1b7
commit 62c2efab01
18 changed files with 193 additions and 79 deletions

View File

@@ -0,0 +1,6 @@
namespace JSMR.Application.Scanning.Contracts;
public record VoiceWorkScanResult(
DLSiteWork[] Works,
bool EndOfResults
);

View File

@@ -4,5 +4,5 @@ namespace JSMR.Application.Scanning.Ports;
public interface IVoiceWorksScanner public interface IVoiceWorksScanner
{ {
Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default); Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default);
} }

View File

@@ -18,8 +18,11 @@ public sealed class ScanVoiceWorksHandler(
IVoiceWorksScanner? scanner = scannerRepository.GetScanner(request.Locale); IVoiceWorksScanner? scanner = scannerRepository.GetScanner(request.Locale);
IVoiceWorkUpdater? updater = updaterRepository.GetUpdater(request.Locale); IVoiceWorkUpdater? updater = updaterRepository.GetUpdater(request.Locale);
if (scanner is null || updater is null) if (scanner is null)
return new(); throw new InvalidOperationException($"No scanner registered for locale {request.Locale}.");
if (updater is null)
throw new InvalidOperationException($"No updater registered for locale {request.Locale}.");
VoiceWorkScanOptions options = new( VoiceWorkScanOptions options = new(
PageNumber: request.PageNumber, PageNumber: request.PageNumber,
@@ -29,15 +32,20 @@ public sealed class ScanVoiceWorksHandler(
ExcludeAIGeneratedWorks: true ExcludeAIGeneratedWorks: true
); );
IReadOnlyList<DLSiteWork> works = await scanner.ScanPageAsync(options, cancellationToken); VoiceWorkScanResult scanResult = await scanner.ScanPageAsync(options, cancellationToken);
if (works.Count == 0) if (scanResult.EndOfResults)
return new(); {
return new ScanVoiceWorksResponse(
Results: [],
EndOfResults: true
);
}
string[] productIds = [.. works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)]; string[] productIds = [.. scanResult.Works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)];
VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken); VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken);
VoiceWorkIngest[] ingests = [.. works.Select(work => VoiceWorkIngest[] ingests = [.. scanResult.Works.Select(work =>
{ {
voiceWorkDetails.TryGetValue(work.ProductId!, out VoiceWorkDetails? value); voiceWorkDetails.TryGetValue(work.ProductId!, out VoiceWorkDetails? value);
return VoiceWorkIngest.From(work, value); return VoiceWorkIngest.From(work, value);
@@ -48,9 +56,9 @@ public sealed class ScanVoiceWorksHandler(
await searchUpdater.UpdateAsync(voiceWorkIds, cancellationToken); await searchUpdater.UpdateAsync(voiceWorkIds, cancellationToken);
return new() return new ScanVoiceWorksResponse(
{ Results: upsertResults,
Results = upsertResults EndOfResults: false
}; );
} }
} }

View File

@@ -2,9 +2,7 @@
namespace JSMR.Application.Scanning; namespace JSMR.Application.Scanning;
public sealed class ScanVoiceWorksResponse public sealed record ScanVoiceWorksResponse(
{ VoiceWorkUpsertResult[] Results,
public int Inserted { get; init; } bool EndOfResults
public int Updated { get; init; } );
public VoiceWorkUpsertResult[] Results { get; init; } = [];
}

View File

@@ -10,9 +10,12 @@ public abstract class ApiClient(IHttpService http, ILogger logger, JsonSerialize
{ {
protected async Task<T> GetJsonAsync<T>(string url, CancellationToken cancellationToken = default) protected async Task<T> GetJsonAsync<T>(string url, CancellationToken cancellationToken = default)
{ {
string response = await http.GetStringAsync(url, cancellationToken); HttpStringResponse response = await http.GetAsync(url, cancellationToken);
return JsonSerializer.Deserialize<T>(response, json) if (response.Content is null)
throw new Exception("No content to deserialize");
return JsonSerializer.Deserialize<T>(response.Content, json)
?? throw new InvalidOperationException($"Failed to deserialize JSON to {typeof(T).Name} from {url}."); ?? throw new InvalidOperationException($"Failed to deserialize JSON to {typeof(T).Name} from {url}.");
} }

View File

@@ -0,0 +1,12 @@
using HtmlAgilityPack;
using System.Net;
namespace JSMR.Infrastructure.Http;
public sealed class HtmlLoadResult
{
public required HttpStatusCode StatusCode { get; init; }
public HtmlDocument? Document { get; init; }
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
}

View File

@@ -4,13 +4,26 @@ namespace JSMR.Infrastructure.Http;
public class HtmlLoader(IHttpService httpService) : IHtmlLoader public class HtmlLoader(IHttpService httpService) : IHtmlLoader
{ {
public async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken) public async Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{ {
string html = await httpService.GetStringAsync(url, cancellationToken); HttpStringResponse response = await httpService.GetAsync(url, cancellationToken);
if (!response.IsSuccessStatusCode)
{
return new HtmlLoadResult
{
StatusCode = response.StatusCode,
Document = null
};
}
HtmlDocument document = new(); HtmlDocument document = new();
document.LoadHtml(html); document.LoadHtml(response.Content ?? string.Empty);
return document; return new HtmlLoadResult
{
StatusCode = response.StatusCode,
Document = document
};
} }
} }

View File

@@ -2,10 +2,10 @@
public class HttpService(HttpClient httpClient) : IHttpService public class HttpService(HttpClient httpClient) : IHttpService
{ {
public Task<string> GetStringAsync(string url, CancellationToken cancellationToken) public Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken)
=> GetStringAsync(url, new Dictionary<string, string>(), cancellationToken); => GetAsync(url, new Dictionary<string, string>(), cancellationToken);
public async Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken) public async Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken)
{ {
using HttpRequestMessage request = new(HttpMethod.Get, url); using HttpRequestMessage request = new(HttpMethod.Get, url);
@@ -14,11 +14,18 @@ public class HttpService(HttpClient httpClient) : IHttpService
request.Headers.TryAddWithoutValidation(header.Key, header.Value); request.Headers.TryAddWithoutValidation(header.Key, header.Value);
} }
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0"); request.Headers.UserAgent.ParseAdd("JSMR/1.0");
using HttpResponseMessage response = await httpClient.SendAsync(request, cancellationToken); using HttpResponseMessage response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
response.EnsureSuccessStatusCode();
return await response.Content.ReadAsStringAsync(cancellationToken); string? content = response.Content is null
? null
: await response.Content.ReadAsStringAsync(cancellationToken);
return new HttpStringResponse
{
StatusCode = response.StatusCode,
Content = content
};
} }
} }

View File

@@ -0,0 +1,11 @@
using System.Net;
namespace JSMR.Infrastructure.Http;
public sealed class HttpStringResponse
{
public required HttpStatusCode StatusCode { get; init; }
public string? Content { get; init; }
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
}

View File

@@ -1,8 +1,6 @@
using HtmlAgilityPack; namespace JSMR.Infrastructure.Http;
namespace JSMR.Infrastructure.Http;
public interface IHtmlLoader public interface IHtmlLoader
{ {
Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken); Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken);
} }

View File

@@ -2,6 +2,6 @@
public interface IHttpService public interface IHttpService
{ {
Task<string> GetStringAsync(string url, CancellationToken cancellationToken); Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken);
Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken); Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken);
} }

View File

@@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning.Extensions; using JSMR.Infrastructure.Scanning.Extensions;
using JSMR.Infrastructure.Scanning.Models; using JSMR.Infrastructure.Scanning.Models;
using System.Globalization; using System.Globalization;
using System.Net;
using System.Text.RegularExpressions; using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning; namespace JSMR.Infrastructure.Scanning;
@@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate); protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
protected abstract DateOnly? GetSalesDate(string salesDate); protected abstract DateOnly? GetSalesDate(string salesDate);
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default) public async Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
{ {
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken); string url = GetUrl(options);
HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
// Expected boundary: past the last search page
if (result.StatusCode == HttpStatusCode.NotFound)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
// Unexpected non-success response
if (!result.IsSuccessStatusCode || result.Document is null)
{
throw new HttpRequestException(
$"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}");
}
DLSiteHtmlDocument document = new(result.Document);
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes(); DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
// Defensive fallback in case DLsite changes from 404 to 200 with empty page
if (nodes.Length == 0)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
DLSiteWork[] works = GetDLSiteWorks(nodes, options); DLSiteWork[] works = GetDLSiteWorks(nodes, options);
works.InferAndUpdateExpectedDates(); works.InferAndUpdateExpectedDates();
return works; return new VoiceWorkScanResult(
} Works: works,
EndOfResults: false
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken) );
{
string url = GetUrl(options);
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
return new DLSiteHtmlDocument(document);
} }
protected string GetUrl(VoiceWorkScanOptions options) protected string GetUrl(VoiceWorkScanOptions options)

View File

@@ -0,0 +1,20 @@
using JSMR.Infrastructure.Http;
using NSubstitute;
using System.Net;
namespace JSMR.Tests.Extensions;
internal static class HttpServiceTestExtensions
{
public static void ReturnsContent(this IHttpService httpService, string content, HttpStatusCode statusCode = HttpStatusCode.OK)
{
HttpStringResponse response = new()
{
StatusCode = statusCode,
Content = content
};
httpService.GetAsync(Arg.Any<string>(), Arg.Any<CancellationToken>())
.Returns(Task.FromResult(response));
}
}

View File

@@ -0,0 +1,17 @@
using JSMR.Application.Scanning.Contracts;
using JSMR.Application.Scanning.Ports;
using Shouldly;
namespace JSMR.Tests.Extensions;
internal static class ScannerTestExtensions
{
public static async Task<IReadOnlyList<DLSiteWork>> ScanWorksAsync(this IVoiceWorksScanner scanner, VoiceWorkScanOptions options)
{
VoiceWorkScanResult result = await scanner.ScanPageAsync(options, CancellationToken.None);
result.EndOfResults.ShouldBeFalse();
return result.Works;
}
}

View File

@@ -5,6 +5,7 @@ using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Integrations.DLSite; using JSMR.Infrastructure.Integrations.DLSite;
using JSMR.Infrastructure.Integrations.DLSite.Mapping; using JSMR.Infrastructure.Integrations.DLSite.Mapping;
using JSMR.Infrastructure.Integrations.DLSite.Models; using JSMR.Infrastructure.Integrations.DLSite.Models;
using JSMR.Tests.Extensions;
using JSMR.Tests.Utilities; using JSMR.Tests.Utilities;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using NSubstitute; using NSubstitute;
@@ -25,9 +26,7 @@ public class DLSiteClientTests
string productInfoJson = await ReadJsonResourceAsync("Product-Info.json"); string productInfoJson = await ReadJsonResourceAsync("Product-Info.json");
IHttpService httpService = Substitute.For<IHttpService>(); IHttpService httpService = Substitute.For<IHttpService>();
httpService.ReturnsContent(productInfoJson);
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(productInfoJson));
var logger = Substitute.For<ILogger<DLSiteClient>>(); var logger = Substitute.For<ILogger<DLSiteClient>>();
var client = new DLSiteClient(httpService, logger); var client = new DLSiteClient(httpService, logger);

View File

@@ -4,6 +4,7 @@ using JSMR.Application.Scanning.Contracts;
using JSMR.Application.Scanning.Ports; using JSMR.Application.Scanning.Ports;
using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning; using JSMR.Infrastructure.Scanning;
using JSMR.Tests.Extensions;
using JSMR.Tests.Utilities; using JSMR.Tests.Utilities;
using NSubstitute; using NSubstitute;
using Shouldly; using Shouldly;
@@ -23,9 +24,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("Japanese-Page.html"); string html = await ReadResourceAsync("Japanese-Page.html");
IHttpService httpService = Substitute.For<IHttpService>(); IHttpService httpService = Substitute.For<IHttpService>();
httpService.ReturnsContent(html);
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
HtmlLoader loader = new(httpService); HtmlLoader loader = new(httpService);
JapaneseVoiceWorksScanner scanner = new(loader); JapaneseVoiceWorksScanner scanner = new(loader);
@@ -38,7 +37,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: [] ExcludedMakerIds: []
); );
var result = await scanner.ScanPageAsync(options, CancellationToken.None); var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(1); result.Count.ShouldBe(1);
@@ -61,9 +60,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("Japanese-Page-Updated.html"); string html = await ReadResourceAsync("Japanese-Page-Updated.html");
IHttpService httpService = Substitute.For<IHttpService>(); IHttpService httpService = Substitute.For<IHttpService>();
httpService.ReturnsContent(html);
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
HtmlLoader loader = new(httpService); HtmlLoader loader = new(httpService);
JapaneseVoiceWorksScanner scanner = new(loader); JapaneseVoiceWorksScanner scanner = new(loader);
@@ -76,7 +73,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: [] ExcludedMakerIds: []
); );
var result = await scanner.ScanPageAsync(options, CancellationToken.None); var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(2); result.Count.ShouldBe(2);
@@ -110,7 +107,7 @@ public class VoiceWorkScannerTests
{ {
IVoiceWorksScanner scanner = Substitute.For<IVoiceWorksScanner>(); IVoiceWorksScanner scanner = Substitute.For<IVoiceWorksScanner>();
IReadOnlyList<DLSiteWork> scannedWorks = DLSiteWork[] scannedWorks =
[ [
new() new()
{ {
@@ -123,8 +120,13 @@ public class VoiceWorkScannerTests
} }
]; ];
VoiceWorkScanResult scanResult = new(
Works: scannedWorks,
EndOfResults: false
);
scanner.ScanPageAsync(Arg.Any<VoiceWorkScanOptions>(), CancellationToken.None) scanner.ScanPageAsync(Arg.Any<VoiceWorkScanOptions>(), CancellationToken.None)
.Returns(Task.FromResult(scannedWorks)); .Returns(Task.FromResult(scanResult));
IDLSiteClient dlsiteClient = Substitute.For<IDLSiteClient>(); IDLSiteClient dlsiteClient = Substitute.For<IDLSiteClient>();
@@ -155,9 +157,7 @@ public class VoiceWorkScannerTests
string englishPageHtml = await ReadResourceAsync("English-Page.html"); string englishPageHtml = await ReadResourceAsync("English-Page.html");
IHttpService httpService = Substitute.For<IHttpService>(); IHttpService httpService = Substitute.For<IHttpService>();
httpService.ReturnsContent(englishPageHtml);
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(englishPageHtml));
HtmlLoader loader = new(httpService); HtmlLoader loader = new(httpService);
EnglishVoiceWorksScanner scanner = new(loader); EnglishVoiceWorksScanner scanner = new(loader);
@@ -170,7 +170,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: [] ExcludedMakerIds: []
); );
var result = await scanner.ScanPageAsync(options, CancellationToken.None); var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(2); result.Count.ShouldBe(2);
@@ -197,9 +197,7 @@ public class VoiceWorkScannerTests
string html = await ReadResourceAsync("English-Page-Updated.html"); string html = await ReadResourceAsync("English-Page-Updated.html");
IHttpService httpService = Substitute.For<IHttpService>(); IHttpService httpService = Substitute.For<IHttpService>();
httpService.ReturnsContent(html);
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(html));
HtmlLoader loader = new(httpService); HtmlLoader loader = new(httpService);
EnglishVoiceWorksScanner scanner = new(loader); EnglishVoiceWorksScanner scanner = new(loader);
@@ -212,7 +210,7 @@ public class VoiceWorkScannerTests
ExcludedMakerIds: [] ExcludedMakerIds: []
); );
var result = await scanner.ScanPageAsync(options, CancellationToken.None); var result = await scanner.ScanWorksAsync(options);
result.Count.ShouldBe(1); result.Count.ShouldBe(1);

View File

@@ -7,6 +7,7 @@ using JSMR.Worker.Options;
using JSMR.Worker.UI; using JSMR.Worker.UI;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging; using Microsoft.Extensions.Logging;
using Serilog.Core;
using Spectre.Console; using Spectre.Console;
using System.Globalization; using System.Globalization;
@@ -45,15 +46,6 @@ public sealed class PagedScanRunner(
//log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale); //log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale);
CliUi.PageHeader(currentPage, end); CliUi.PageHeader(currentPage, end);
//AnsiConsole.Status()
//.Start($"[grey]Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]...[/]", ctx =>
//{
// // Simulate grinding
// Thread.Sleep(3000);
//});
//AnsiConsole.MarkupLine($"[green]✓ Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]... DONE[/]");
ScanVoiceWorksRequest request = new( ScanVoiceWorksRequest request = new(
PageNumber: currentPage, PageNumber: currentPage,
PageSize: 100, PageSize: 100,
@@ -62,6 +54,12 @@ public sealed class PagedScanRunner(
ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken); ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken);
if (response.EndOfResults)
{
CliUi.Information($"Reached end of results at page {currentPage}. Stopping scan.");
break;
}
int newUpcoming = response.Results.Count(x => x.UpdateStatus == VoiceWorkStatus.NewAndUpcoming); int newUpcoming = response.Results.Count(x => x.UpdateStatus == VoiceWorkStatus.NewAndUpcoming);
//if (newUpcoming > 0) //if (newUpcoming > 0)

View File

@@ -50,6 +50,9 @@ public static class CliUi
AnsiConsole.Write(panel); AnsiConsole.Write(panel);
} }
public static void Information(string message) =>
AnsiConsole.MarkupLine($"[blue]🛈 {Escape(message)}[/]");
public static void Warning(string message) => public static void Warning(string message) =>
AnsiConsole.MarkupLine($"[yellow]⚠ {Escape(message)}[/]"); AnsiConsole.MarkupLine($"[yellow]⚠ {Escape(message)}[/]");