Updated scanner to infer when it has reached the end of results.
This commit is contained in:
@@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http;
|
||||
using JSMR.Infrastructure.Scanning.Extensions;
|
||||
using JSMR.Infrastructure.Scanning.Models;
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
@@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
|
||||
protected abstract DateOnly? GetSalesDate(string salesDate);
|
||||
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
public async Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
{
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||
string url = GetUrl(options);
|
||||
HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
// Expected boundary: past the last search page
|
||||
if (result.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
return new VoiceWorkScanResult(
|
||||
Works: [],
|
||||
EndOfResults: true
|
||||
);
|
||||
}
|
||||
|
||||
// Unexpected non-success response
|
||||
if (!result.IsSuccessStatusCode || result.Document is null)
|
||||
{
|
||||
throw new HttpRequestException(
|
||||
$"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}");
|
||||
}
|
||||
|
||||
DLSiteHtmlDocument document = new(result.Document);
|
||||
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
|
||||
|
||||
// Defensive fallback in case DLsite changes from 404 to 200 with empty page
|
||||
if (nodes.Length == 0)
|
||||
{
|
||||
return new VoiceWorkScanResult(
|
||||
Works: [],
|
||||
EndOfResults: true
|
||||
);
|
||||
}
|
||||
|
||||
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
|
||||
works.InferAndUpdateExpectedDates();
|
||||
|
||||
return works;
|
||||
}
|
||||
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
||||
{
|
||||
string url = GetUrl(options);
|
||||
|
||||
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
return new DLSiteHtmlDocument(document);
|
||||
return new VoiceWorkScanResult(
|
||||
Works: works,
|
||||
EndOfResults: false
|
||||
);
|
||||
}
|
||||
|
||||
protected string GetUrl(VoiceWorkScanOptions options)
|
||||
|
||||
Reference in New Issue
Block a user