Updated scanner to infer when it has reached the end of results.
This commit is contained in:
@@ -10,9 +10,12 @@ public abstract class ApiClient(IHttpService http, ILogger logger, JsonSerialize
|
||||
{
|
||||
protected async Task<T> GetJsonAsync<T>(string url, CancellationToken cancellationToken = default)
|
||||
{
|
||||
string response = await http.GetStringAsync(url, cancellationToken);
|
||||
HttpStringResponse response = await http.GetAsync(url, cancellationToken);
|
||||
|
||||
return JsonSerializer.Deserialize<T>(response, json)
|
||||
if (response.Content is null)
|
||||
throw new Exception("No content to deserialize");
|
||||
|
||||
return JsonSerializer.Deserialize<T>(response.Content, json)
|
||||
?? throw new InvalidOperationException($"Failed to deserialize JSON to {typeof(T).Name} from {url}.");
|
||||
}
|
||||
|
||||
|
||||
12
JSMR.Infrastructure/Http/HtmlLoadResult.cs
Normal file
12
JSMR.Infrastructure/Http/HtmlLoadResult.cs
Normal file
@@ -0,0 +1,12 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Net;
|
||||
|
||||
namespace JSMR.Infrastructure.Http;
|
||||
|
||||
public sealed class HtmlLoadResult
|
||||
{
|
||||
public required HttpStatusCode StatusCode { get; init; }
|
||||
public HtmlDocument? Document { get; init; }
|
||||
|
||||
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
|
||||
}
|
||||
@@ -4,13 +4,26 @@ namespace JSMR.Infrastructure.Http;
|
||||
|
||||
public class HtmlLoader(IHttpService httpService) : IHtmlLoader
|
||||
{
|
||||
public async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||
public async Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
string html = await httpService.GetStringAsync(url, cancellationToken);
|
||||
HttpStringResponse response = await httpService.GetAsync(url, cancellationToken);
|
||||
|
||||
if (!response.IsSuccessStatusCode)
|
||||
{
|
||||
return new HtmlLoadResult
|
||||
{
|
||||
StatusCode = response.StatusCode,
|
||||
Document = null
|
||||
};
|
||||
}
|
||||
|
||||
HtmlDocument document = new();
|
||||
document.LoadHtml(html);
|
||||
document.LoadHtml(response.Content ?? string.Empty);
|
||||
|
||||
return document;
|
||||
return new HtmlLoadResult
|
||||
{
|
||||
StatusCode = response.StatusCode,
|
||||
Document = document
|
||||
};
|
||||
}
|
||||
}
|
||||
@@ -2,10 +2,10 @@
|
||||
|
||||
public class HttpService(HttpClient httpClient) : IHttpService
|
||||
{
|
||||
public Task<string> GetStringAsync(string url, CancellationToken cancellationToken)
|
||||
=> GetStringAsync(url, new Dictionary<string, string>(), cancellationToken);
|
||||
public Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken)
|
||||
=> GetAsync(url, new Dictionary<string, string>(), cancellationToken);
|
||||
|
||||
public async Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken)
|
||||
public async Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken)
|
||||
{
|
||||
using HttpRequestMessage request = new(HttpMethod.Get, url);
|
||||
|
||||
@@ -14,11 +14,18 @@ public class HttpService(HttpClient httpClient) : IHttpService
|
||||
request.Headers.TryAddWithoutValidation(header.Key, header.Value);
|
||||
}
|
||||
|
||||
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0");
|
||||
request.Headers.UserAgent.ParseAdd("JSMR/1.0");
|
||||
|
||||
using HttpResponseMessage response = await httpClient.SendAsync(request, cancellationToken);
|
||||
response.EnsureSuccessStatusCode();
|
||||
using HttpResponseMessage response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
|
||||
|
||||
return await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
string? content = response.Content is null
|
||||
? null
|
||||
: await response.Content.ReadAsStringAsync(cancellationToken);
|
||||
|
||||
return new HttpStringResponse
|
||||
{
|
||||
StatusCode = response.StatusCode,
|
||||
Content = content
|
||||
};
|
||||
}
|
||||
}
|
||||
11
JSMR.Infrastructure/Http/HttpStringResponse.cs
Normal file
11
JSMR.Infrastructure/Http/HttpStringResponse.cs
Normal file
@@ -0,0 +1,11 @@
|
||||
using System.Net;
|
||||
|
||||
namespace JSMR.Infrastructure.Http;
|
||||
|
||||
public sealed class HttpStringResponse
|
||||
{
|
||||
public required HttpStatusCode StatusCode { get; init; }
|
||||
public string? Content { get; init; }
|
||||
|
||||
public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299;
|
||||
}
|
||||
@@ -1,8 +1,6 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace JSMR.Infrastructure.Http;
|
||||
namespace JSMR.Infrastructure.Http;
|
||||
|
||||
public interface IHtmlLoader
|
||||
{
|
||||
Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken);
|
||||
Task<HtmlLoadResult> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -2,6 +2,6 @@
|
||||
|
||||
public interface IHttpService
|
||||
{
|
||||
Task<string> GetStringAsync(string url, CancellationToken cancellationToken);
|
||||
Task<string> GetStringAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken);
|
||||
Task<HttpStringResponse> GetAsync(string url, CancellationToken cancellationToken);
|
||||
Task<HttpStringResponse> GetAsync(string url, IDictionary<string, string> headers, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http;
|
||||
using JSMR.Infrastructure.Scanning.Extensions;
|
||||
using JSMR.Infrastructure.Scanning.Models;
|
||||
using System.Globalization;
|
||||
using System.Net;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
@@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
|
||||
protected abstract DateOnly? GetSalesDate(string salesDate);
|
||||
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
public async Task<VoiceWorkScanResult> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
{
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||
string url = GetUrl(options);
|
||||
HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
// Expected boundary: past the last search page
|
||||
if (result.StatusCode == HttpStatusCode.NotFound)
|
||||
{
|
||||
return new VoiceWorkScanResult(
|
||||
Works: [],
|
||||
EndOfResults: true
|
||||
);
|
||||
}
|
||||
|
||||
// Unexpected non-success response
|
||||
if (!result.IsSuccessStatusCode || result.Document is null)
|
||||
{
|
||||
throw new HttpRequestException(
|
||||
$"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}");
|
||||
}
|
||||
|
||||
DLSiteHtmlDocument document = new(result.Document);
|
||||
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
|
||||
|
||||
// Defensive fallback in case DLsite changes from 404 to 200 with empty page
|
||||
if (nodes.Length == 0)
|
||||
{
|
||||
return new VoiceWorkScanResult(
|
||||
Works: [],
|
||||
EndOfResults: true
|
||||
);
|
||||
}
|
||||
|
||||
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
|
||||
works.InferAndUpdateExpectedDates();
|
||||
|
||||
return works;
|
||||
}
|
||||
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
||||
{
|
||||
string url = GetUrl(options);
|
||||
|
||||
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
return new DLSiteHtmlDocument(document);
|
||||
return new VoiceWorkScanResult(
|
||||
Works: works,
|
||||
EndOfResults: false
|
||||
);
|
||||
}
|
||||
|
||||
protected string GetUrl(VoiceWorkScanOptions options)
|
||||
|
||||
Reference in New Issue
Block a user