Updated scanner to infer when it has reached the end of results.
All checks were successful
ci / build-test (push) Successful in 2m18s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-03-07 01:26:04 -05:00
parent 1e01edf1b7
commit 62c2efab01
18 changed files with 193 additions and 79 deletions

View File

@@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning.Extensions;
using JSMR.Infrastructure.Scanning.Models;
using System.Globalization;
using System.Net;
using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning;
@@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
protected abstract DateOnly? GetSalesDate(string salesDate);
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
public async Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
{
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
string url = GetUrl(options);
HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
// Expected boundary: past the last search page
if (result.StatusCode == HttpStatusCode.NotFound)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
// Unexpected non-success response
if (!result.IsSuccessStatusCode || result.Document is null)
{
throw new HttpRequestException(
$"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}");
}
DLSiteHtmlDocument document = new(result.Document);
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
// Defensive fallback in case DLsite changes from 404 to 200 with empty page
if (nodes.Length == 0)
{
return new VoiceWorkScanResult(
Works: [],
EndOfResults: true
);
}
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
works.InferAndUpdateExpectedDates();
return works;
}
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
{
string url = GetUrl(options);
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
return new DLSiteHtmlDocument(document);
return new VoiceWorkScanResult(
Works: works,
EndOfResults: false
);
}
protected string GetUrl(VoiceWorkScanOptions options)