diff --git a/JSMR.Application/Scanning/Contracts/VoiceWorkScanResult.cs b/JSMR.Application/Scanning/Contracts/VoiceWorkScanResult.cs new file mode 100644 index 0000000..48cb9cf --- /dev/null +++ b/JSMR.Application/Scanning/Contracts/VoiceWorkScanResult.cs @@ -0,0 +1,6 @@ +namespace JSMR.Application.Scanning.Contracts; + +public record VoiceWorkScanResult( + DLSiteWork[] Works, + bool EndOfResults +); \ No newline at end of file diff --git a/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs b/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs index b6eec0d..b43a56c 100644 --- a/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs +++ b/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs @@ -4,5 +4,5 @@ namespace JSMR.Application.Scanning.Ports; public interface IVoiceWorksScanner { - Task> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default); + Task ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default); } \ No newline at end of file diff --git a/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs b/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs index dfeab61..44a80a0 100644 --- a/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs +++ b/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs @@ -18,8 +18,11 @@ public sealed class ScanVoiceWorksHandler( IVoiceWorksScanner? scanner = scannerRepository.GetScanner(request.Locale); IVoiceWorkUpdater? updater = updaterRepository.GetUpdater(request.Locale); - if (scanner is null || updater is null) - return new(); + if (scanner is null) + throw new InvalidOperationException($"No scanner registered for locale {request.Locale}."); + + if (updater is null) + throw new InvalidOperationException($"No updater registered for locale {request.Locale}."); VoiceWorkScanOptions options = new( PageNumber: request.PageNumber, @@ -29,15 +32,20 @@ public sealed class ScanVoiceWorksHandler( ExcludeAIGeneratedWorks: true ); - IReadOnlyList works = await scanner.ScanPageAsync(options, cancellationToken); + VoiceWorkScanResult scanResult = await scanner.ScanPageAsync(options, cancellationToken); - if (works.Count == 0) - return new(); + if (scanResult.EndOfResults) + { + return new ScanVoiceWorksResponse( + Results: [], + EndOfResults: true + ); + } - string[] productIds = [.. works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)]; + string[] productIds = [.. scanResult.Works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)]; VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken); - VoiceWorkIngest[] ingests = [.. works.Select(work => + VoiceWorkIngest[] ingests = [.. scanResult.Works.Select(work => { voiceWorkDetails.TryGetValue(work.ProductId!, out VoiceWorkDetails? value); return VoiceWorkIngest.From(work, value); @@ -48,9 +56,9 @@ public sealed class ScanVoiceWorksHandler( await searchUpdater.UpdateAsync(voiceWorkIds, cancellationToken); - return new() - { - Results = upsertResults - }; + return new ScanVoiceWorksResponse( + Results: upsertResults, + EndOfResults: false + ); } } \ No newline at end of file diff --git a/JSMR.Application/Scanning/ScanVoiceWorksResponse.cs b/JSMR.Application/Scanning/ScanVoiceWorksResponse.cs index 8e62841..5365c72 100644 --- a/JSMR.Application/Scanning/ScanVoiceWorksResponse.cs +++ b/JSMR.Application/Scanning/ScanVoiceWorksResponse.cs @@ -2,9 +2,7 @@ namespace JSMR.Application.Scanning; -public sealed class ScanVoiceWorksResponse -{ - public int Inserted { get; init; } - public int Updated { get; init; } - public VoiceWorkUpsertResult[] Results { get; init; } = []; -} \ No newline at end of file +public sealed record ScanVoiceWorksResponse( + VoiceWorkUpsertResult[] Results, + bool EndOfResults +); \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/ApiClient.cs b/JSMR.Infrastructure/Http/ApiClient.cs index 5365e1e..2d8c43b 100644 --- a/JSMR.Infrastructure/Http/ApiClient.cs +++ b/JSMR.Infrastructure/Http/ApiClient.cs @@ -10,9 +10,12 @@ public abstract class ApiClient(IHttpService http, ILogger logger, JsonSerialize { protected async Task GetJsonAsync(string url, CancellationToken cancellationToken = default) { - string response = await http.GetStringAsync(url, cancellationToken); + HttpStringResponse response = await http.GetAsync(url, cancellationToken); - return JsonSerializer.Deserialize(response, json) + if (response.Content is null) + throw new Exception("No content to deserialize"); + + return JsonSerializer.Deserialize(response.Content, json) ?? throw new InvalidOperationException($"Failed to deserialize JSON to {typeof(T).Name} from {url}."); } diff --git a/JSMR.Infrastructure/Http/HtmlLoadResult.cs b/JSMR.Infrastructure/Http/HtmlLoadResult.cs new file mode 100644 index 0000000..eb07499 --- /dev/null +++ b/JSMR.Infrastructure/Http/HtmlLoadResult.cs @@ -0,0 +1,12 @@ +using HtmlAgilityPack; +using System.Net; + +namespace JSMR.Infrastructure.Http; + +public sealed class HtmlLoadResult +{ + public required HttpStatusCode StatusCode { get; init; } + public HtmlDocument? Document { get; init; } + + public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299; +} \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/HtmlLoader.cs b/JSMR.Infrastructure/Http/HtmlLoader.cs index fc9091a..aea1229 100644 --- a/JSMR.Infrastructure/Http/HtmlLoader.cs +++ b/JSMR.Infrastructure/Http/HtmlLoader.cs @@ -4,13 +4,26 @@ namespace JSMR.Infrastructure.Http; public class HtmlLoader(IHttpService httpService) : IHtmlLoader { - public async Task GetHtmlDocumentAsync(string url, CancellationToken cancellationToken) + public async Task GetHtmlDocumentAsync(string url, CancellationToken cancellationToken) { - string html = await httpService.GetStringAsync(url, cancellationToken); + HttpStringResponse response = await httpService.GetAsync(url, cancellationToken); + + if (!response.IsSuccessStatusCode) + { + return new HtmlLoadResult + { + StatusCode = response.StatusCode, + Document = null + }; + } HtmlDocument document = new(); - document.LoadHtml(html); + document.LoadHtml(response.Content ?? string.Empty); - return document; + return new HtmlLoadResult + { + StatusCode = response.StatusCode, + Document = document + }; } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/HttpService.cs b/JSMR.Infrastructure/Http/HttpService.cs index d495618..8a18f52 100644 --- a/JSMR.Infrastructure/Http/HttpService.cs +++ b/JSMR.Infrastructure/Http/HttpService.cs @@ -2,10 +2,10 @@ public class HttpService(HttpClient httpClient) : IHttpService { - public Task GetStringAsync(string url, CancellationToken cancellationToken) - => GetStringAsync(url, new Dictionary(), cancellationToken); + public Task GetAsync(string url, CancellationToken cancellationToken) + => GetAsync(url, new Dictionary(), cancellationToken); - public async Task GetStringAsync(string url, IDictionary headers, CancellationToken cancellationToken) + public async Task GetAsync(string url, IDictionary headers, CancellationToken cancellationToken) { using HttpRequestMessage request = new(HttpMethod.Get, url); @@ -14,11 +14,18 @@ public class HttpService(HttpClient httpClient) : IHttpService request.Headers.TryAddWithoutValidation(header.Key, header.Value); } - httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0"); + request.Headers.UserAgent.ParseAdd("JSMR/1.0"); - using HttpResponseMessage response = await httpClient.SendAsync(request, cancellationToken); - response.EnsureSuccessStatusCode(); + using HttpResponseMessage response = await httpClient.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); - return await response.Content.ReadAsStringAsync(cancellationToken); + string? content = response.Content is null + ? null + : await response.Content.ReadAsStringAsync(cancellationToken); + + return new HttpStringResponse + { + StatusCode = response.StatusCode, + Content = content + }; } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/HttpStringResponse.cs b/JSMR.Infrastructure/Http/HttpStringResponse.cs new file mode 100644 index 0000000..4770a0b --- /dev/null +++ b/JSMR.Infrastructure/Http/HttpStringResponse.cs @@ -0,0 +1,11 @@ +using System.Net; + +namespace JSMR.Infrastructure.Http; + +public sealed class HttpStringResponse +{ + public required HttpStatusCode StatusCode { get; init; } + public string? Content { get; init; } + + public bool IsSuccessStatusCode => (int)StatusCode is >= 200 and <= 299; +} \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/IHtmlLoader.cs b/JSMR.Infrastructure/Http/IHtmlLoader.cs index c1753bf..7c811e4 100644 --- a/JSMR.Infrastructure/Http/IHtmlLoader.cs +++ b/JSMR.Infrastructure/Http/IHtmlLoader.cs @@ -1,8 +1,6 @@ -using HtmlAgilityPack; - -namespace JSMR.Infrastructure.Http; +namespace JSMR.Infrastructure.Http; public interface IHtmlLoader { - Task GetHtmlDocumentAsync(string url, CancellationToken cancellationToken); + Task GetHtmlDocumentAsync(string url, CancellationToken cancellationToken); } \ No newline at end of file diff --git a/JSMR.Infrastructure/Http/IHttpService.cs b/JSMR.Infrastructure/Http/IHttpService.cs index 1cb8d5e..7f04940 100644 --- a/JSMR.Infrastructure/Http/IHttpService.cs +++ b/JSMR.Infrastructure/Http/IHttpService.cs @@ -2,6 +2,6 @@ public interface IHttpService { - Task GetStringAsync(string url, CancellationToken cancellationToken); - Task GetStringAsync(string url, IDictionary headers, CancellationToken cancellationToken); + Task GetAsync(string url, CancellationToken cancellationToken); + Task GetAsync(string url, IDictionary headers, CancellationToken cancellationToken); } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs index 9366d07..e05142b 100644 --- a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs +++ b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs @@ -8,6 +8,7 @@ using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Scanning.Extensions; using JSMR.Infrastructure.Scanning.Models; using System.Globalization; +using System.Net; using System.Text.RegularExpressions; namespace JSMR.Infrastructure.Scanning; @@ -20,24 +21,46 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate); protected abstract DateOnly? GetSalesDate(string salesDate); - public async Task> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default) + public async Task ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default) { - DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken); + string url = GetUrl(options); + HtmlLoadResult result = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken); + + // Expected boundary: past the last search page + if (result.StatusCode == HttpStatusCode.NotFound) + { + return new VoiceWorkScanResult( + Works: [], + EndOfResults: true + ); + } + + // Unexpected non-success response + if (!result.IsSuccessStatusCode || result.Document is null) + { + throw new HttpRequestException( + $"Unexpected response status code {(int)result.StatusCode} ({result.StatusCode}) while scanning {url}"); + } + + DLSiteHtmlDocument document = new(result.Document); DLSiteHtmlNode[] nodes = document.GetDLSiteNodes(); + // Defensive fallback in case DLsite changes from 404 to 200 with empty page + if (nodes.Length == 0) + { + return new VoiceWorkScanResult( + Works: [], + EndOfResults: true + ); + } + DLSiteWork[] works = GetDLSiteWorks(nodes, options); works.InferAndUpdateExpectedDates(); - return works; - } - - private async Task GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken) - { - string url = GetUrl(options); - - HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken); - - return new DLSiteHtmlDocument(document); + return new VoiceWorkScanResult( + Works: works, + EndOfResults: false + ); } protected string GetUrl(VoiceWorkScanOptions options) diff --git a/JSMR.Tests/Extensions/HttpServiceTestExtensions.cs b/JSMR.Tests/Extensions/HttpServiceTestExtensions.cs new file mode 100644 index 0000000..35a8b49 --- /dev/null +++ b/JSMR.Tests/Extensions/HttpServiceTestExtensions.cs @@ -0,0 +1,20 @@ +using JSMR.Infrastructure.Http; +using NSubstitute; +using System.Net; + +namespace JSMR.Tests.Extensions; + +internal static class HttpServiceTestExtensions +{ + public static void ReturnsContent(this IHttpService httpService, string content, HttpStatusCode statusCode = HttpStatusCode.OK) + { + HttpStringResponse response = new() + { + StatusCode = statusCode, + Content = content + }; + + httpService.GetAsync(Arg.Any(), Arg.Any()) + .Returns(Task.FromResult(response)); + } +} \ No newline at end of file diff --git a/JSMR.Tests/Extensions/ScannerTestExtensions.cs b/JSMR.Tests/Extensions/ScannerTestExtensions.cs new file mode 100644 index 0000000..64f16da --- /dev/null +++ b/JSMR.Tests/Extensions/ScannerTestExtensions.cs @@ -0,0 +1,17 @@ +using JSMR.Application.Scanning.Contracts; +using JSMR.Application.Scanning.Ports; +using Shouldly; + +namespace JSMR.Tests.Extensions; + +internal static class ScannerTestExtensions +{ + public static async Task> ScanWorksAsync(this IVoiceWorksScanner scanner, VoiceWorkScanOptions options) + { + VoiceWorkScanResult result = await scanner.ScanPageAsync(options, CancellationToken.None); + + result.EndOfResults.ShouldBeFalse(); + + return result.Works; + } +} \ No newline at end of file diff --git a/JSMR.Tests/Integrations/DLSite/DLSiteClientTests.cs b/JSMR.Tests/Integrations/DLSite/DLSiteClientTests.cs index 3378aba..ea36f7c 100644 --- a/JSMR.Tests/Integrations/DLSite/DLSiteClientTests.cs +++ b/JSMR.Tests/Integrations/DLSite/DLSiteClientTests.cs @@ -5,6 +5,7 @@ using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Integrations.DLSite; using JSMR.Infrastructure.Integrations.DLSite.Mapping; using JSMR.Infrastructure.Integrations.DLSite.Models; +using JSMR.Tests.Extensions; using JSMR.Tests.Utilities; using Microsoft.Extensions.Logging; using NSubstitute; @@ -25,9 +26,7 @@ public class DLSiteClientTests string productInfoJson = await ReadJsonResourceAsync("Product-Info.json"); IHttpService httpService = Substitute.For(); - - httpService.GetStringAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(productInfoJson)); + httpService.ReturnsContent(productInfoJson); var logger = Substitute.For>(); var client = new DLSiteClient(httpService, logger); diff --git a/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs index 4d18f45..bc847a2 100644 --- a/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs +++ b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs @@ -4,6 +4,7 @@ using JSMR.Application.Scanning.Contracts; using JSMR.Application.Scanning.Ports; using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Scanning; +using JSMR.Tests.Extensions; using JSMR.Tests.Utilities; using NSubstitute; using Shouldly; @@ -23,9 +24,7 @@ public class VoiceWorkScannerTests string html = await ReadResourceAsync("Japanese-Page.html"); IHttpService httpService = Substitute.For(); - - httpService.GetStringAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(html)); + httpService.ReturnsContent(html); HtmlLoader loader = new(httpService); JapaneseVoiceWorksScanner scanner = new(loader); @@ -38,7 +37,7 @@ public class VoiceWorkScannerTests ExcludedMakerIds: [] ); - var result = await scanner.ScanPageAsync(options, CancellationToken.None); + var result = await scanner.ScanWorksAsync(options); result.Count.ShouldBe(1); @@ -61,9 +60,7 @@ public class VoiceWorkScannerTests string html = await ReadResourceAsync("Japanese-Page-Updated.html"); IHttpService httpService = Substitute.For(); - - httpService.GetStringAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(html)); + httpService.ReturnsContent(html); HtmlLoader loader = new(httpService); JapaneseVoiceWorksScanner scanner = new(loader); @@ -76,7 +73,7 @@ public class VoiceWorkScannerTests ExcludedMakerIds: [] ); - var result = await scanner.ScanPageAsync(options, CancellationToken.None); + var result = await scanner.ScanWorksAsync(options); result.Count.ShouldBe(2); @@ -110,7 +107,7 @@ public class VoiceWorkScannerTests { IVoiceWorksScanner scanner = Substitute.For(); - IReadOnlyList scannedWorks = + DLSiteWork[] scannedWorks = [ new() { @@ -123,8 +120,13 @@ public class VoiceWorkScannerTests } ]; + VoiceWorkScanResult scanResult = new( + Works: scannedWorks, + EndOfResults: false + ); + scanner.ScanPageAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(scannedWorks)); + .Returns(Task.FromResult(scanResult)); IDLSiteClient dlsiteClient = Substitute.For(); @@ -155,9 +157,7 @@ public class VoiceWorkScannerTests string englishPageHtml = await ReadResourceAsync("English-Page.html"); IHttpService httpService = Substitute.For(); - - httpService.GetStringAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(englishPageHtml)); + httpService.ReturnsContent(englishPageHtml); HtmlLoader loader = new(httpService); EnglishVoiceWorksScanner scanner = new(loader); @@ -170,7 +170,7 @@ public class VoiceWorkScannerTests ExcludedMakerIds: [] ); - var result = await scanner.ScanPageAsync(options, CancellationToken.None); + var result = await scanner.ScanWorksAsync(options); result.Count.ShouldBe(2); @@ -197,9 +197,7 @@ public class VoiceWorkScannerTests string html = await ReadResourceAsync("English-Page-Updated.html"); IHttpService httpService = Substitute.For(); - - httpService.GetStringAsync(Arg.Any(), CancellationToken.None) - .Returns(Task.FromResult(html)); + httpService.ReturnsContent(html); HtmlLoader loader = new(httpService); EnglishVoiceWorksScanner scanner = new(loader); @@ -212,7 +210,7 @@ public class VoiceWorkScannerTests ExcludedMakerIds: [] ); - var result = await scanner.ScanPageAsync(options, CancellationToken.None); + var result = await scanner.ScanWorksAsync(options); result.Count.ShouldBe(1); diff --git a/JSMR.Worker/Services/ScanRunner.cs b/JSMR.Worker/Services/ScanRunner.cs index f3eee6a..1686654 100644 --- a/JSMR.Worker/Services/ScanRunner.cs +++ b/JSMR.Worker/Services/ScanRunner.cs @@ -7,6 +7,7 @@ using JSMR.Worker.Options; using JSMR.Worker.UI; using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.Logging; +using Serilog.Core; using Spectre.Console; using System.Globalization; @@ -45,15 +46,6 @@ public sealed class PagedScanRunner( //log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale); CliUi.PageHeader(currentPage, end); - //AnsiConsole.Status() - //.Start($"[grey]Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]...[/]", ctx => - //{ - // // Simulate grinding - // Thread.Sleep(3000); - //}); - - //AnsiConsole.MarkupLine($"[green]✓ Scanning page[/] [bold]{currentPage}[/] [grey]of[/] [bold]{end}[/][grey]... DONE[/]"); - ScanVoiceWorksRequest request = new( PageNumber: currentPage, PageSize: 100, @@ -62,6 +54,12 @@ public sealed class PagedScanRunner( ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken); + if (response.EndOfResults) + { + CliUi.Information($"Reached end of results at page {currentPage}. Stopping scan."); + break; + } + int newUpcoming = response.Results.Count(x => x.UpdateStatus == VoiceWorkStatus.NewAndUpcoming); //if (newUpcoming > 0) diff --git a/JSMR.Worker/UI/CliUi.cs b/JSMR.Worker/UI/CliUi.cs index d7b0a4c..e5ee0f3 100644 --- a/JSMR.Worker/UI/CliUi.cs +++ b/JSMR.Worker/UI/CliUi.cs @@ -50,6 +50,9 @@ public static class CliUi AnsiConsole.Write(panel); } + public static void Information(string message) => + AnsiConsole.MarkupLine($"[blue]🛈 {Escape(message)}[/]"); + public static void Warning(string message) => AnsiConsole.MarkupLine($"[yellow]⚠ {Escape(message)}[/]");