From 646cf414769b79acd1fdec510260f73d17b4edde Mon Sep 17 00:00:00 2001 From: Brian Bicknell Date: Sun, 14 Sep 2025 21:12:00 -0400 Subject: [PATCH] Updated scanner logic, and added initial scanner tests. --- JSMR.Api/Program.cs | 2 +- JSMR.Application/JSMR.Application.csproj | 8 +- .../Scanning/Contracts/DLSiteWork.cs | 4 +- .../Contracts/VoiceWorkScanOptions.cs | 3 + .../Scanning/Ports/IVoiceWorksScanner.cs | 2 +- .../Scanning/ScanVoiceWorksHandler.cs | 61 +++-- .../Scanning/ScanVoiceWorksRequest.cs | 2 +- ...frastructureServiceCollectionExtensions.cs | 10 + .../JSMR.Infrastructure.csproj | 1 + .../Scanning/DLSiteSearchFilterBuilder.cs | 10 + .../Scanning/EnglishVoiceWorksScanner.cs | 150 +++--------- .../Scanning/JapaneseVoiceWorksScanner.cs | 14 +- .../Scanning/VoiceWorksScanner.cs | 55 ++--- JSMR.Tests/JSMR.Tests.csproj | 2 + JSMR.Tests/Scanning/English-Page.html | 227 ++++++++++++++++++ JSMR.Tests/Scanning/VoiceWorkScannerTests.cs | 53 ++++ 16 files changed, 412 insertions(+), 192 deletions(-) create mode 100644 JSMR.Application/Scanning/Contracts/VoiceWorkScanOptions.cs create mode 100644 JSMR.Tests/Scanning/English-Page.html create mode 100644 JSMR.Tests/Scanning/VoiceWorkScannerTests.cs diff --git a/JSMR.Api/Program.cs b/JSMR.Api/Program.cs index 64f9e2f..d044abb 100644 --- a/JSMR.Api/Program.cs +++ b/JSMR.Api/Program.cs @@ -22,7 +22,7 @@ builder.Services var cs = builder.Configuration.GetConnectionString("AppDb") ?? throw new InvalidOperationException("Missing ConnectionStrings:AppDb2"); -builder.Services.AddDbContext(opt => +builder.Services.AddDbContextFactory(opt => opt.UseMySql(cs, ServerVersion.AutoDetect(cs)) .EnableSensitiveDataLogging(false)); diff --git a/JSMR.Application/JSMR.Application.csproj b/JSMR.Application/JSMR.Application.csproj index 44ead7b..82df376 100644 --- a/JSMR.Application/JSMR.Application.csproj +++ b/JSMR.Application/JSMR.Application.csproj @@ -6,12 +6,12 @@ enable - - - - + + + + diff --git a/JSMR.Application/Scanning/Contracts/DLSiteWork.cs b/JSMR.Application/Scanning/Contracts/DLSiteWork.cs index b1867a8..ffc2fce 100644 --- a/JSMR.Application/Scanning/Contracts/DLSiteWork.cs +++ b/JSMR.Application/Scanning/Contracts/DLSiteWork.cs @@ -8,8 +8,8 @@ public class DLSiteWork public string? ProductUrl { get; set; } public string? ProductId { get; set; } public DateOnly? AnnouncedDate { get; set; } - public DateTime? ExpectedDate { get; set; } - public DateTime? SalesDate { get; set; } + public DateOnly? ExpectedDate { get; set; } + public DateOnly? SalesDate { get; set; } public int? Downloads { get; set; } public byte? StarRating { get; set; } public int? Votes { get; set; } diff --git a/JSMR.Application/Scanning/Contracts/VoiceWorkScanOptions.cs b/JSMR.Application/Scanning/Contracts/VoiceWorkScanOptions.cs new file mode 100644 index 0000000..f3588fb --- /dev/null +++ b/JSMR.Application/Scanning/Contracts/VoiceWorkScanOptions.cs @@ -0,0 +1,3 @@ +namespace JSMR.Application.Scanning.Contracts; + +public record VoiceWorkScanOptions(int PageNumber, int PageSize, string[] ExcludedMakerIds, bool ExcludePartiallyAIGeneratedWorks, bool ExcludeAIGeneratedWorks); \ No newline at end of file diff --git a/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs b/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs index ea12fb3..b6eec0d 100644 --- a/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs +++ b/JSMR.Application/Scanning/Ports/IVoiceWorksScanner.cs @@ -4,5 +4,5 @@ namespace JSMR.Application.Scanning.Ports; public interface IVoiceWorksScanner { - Task> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default); + Task> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default); } \ No newline at end of file diff --git a/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs b/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs index 0554d6c..5f8549e 100644 --- a/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs +++ b/JSMR.Application/Scanning/ScanVoiceWorksHandler.cs @@ -1,26 +1,53 @@ -using JSMR.Application.Scanning.Ports; +using JSMR.Application.Common.Caching; +using JSMR.Application.Integrations.DLSite.Models; +using JSMR.Application.Integrations.Ports; +using JSMR.Application.Scanning.Contracts; +using JSMR.Application.Scanning.Ports; +using Microsoft.Extensions.DependencyInjection; namespace JSMR.Application.Scanning; -public sealed class ScanVoiceWorksHandler(IVoiceWorksScanner scanner) +public sealed class ScanVoiceWorksHandler(IServiceProvider serviceProvider, IDLSiteClient dlsiteClient, ISpamCircleCache spamCircleCache) { - //public async Task HandleAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken) - //{ - // var works = await scanner.ScanPageAsync(request, cancellationToken); + public async Task HandleAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken) + { + IVoiceWorksScanner? scanner = serviceProvider.GetKeyedService(request.Locale); - // if (works.Count == 0) - // return new ScanVoiceWorksResponse(); + if (scanner is null) + return new(); - // var ingests = works.Select(VoiceWorkIngest.From).ToList(); - // var upsert = await _writer.UpsertAsync(ingests, ct); + VoiceWorkScanOptions options = new( + PageNumber: request.PageNumber, + PageSize: request.PageSize, + ExcludedMakerIds: await spamCircleCache.GetAsync(cancellationToken), + ExcludePartiallyAIGeneratedWorks: true, + ExcludeAIGeneratedWorks: true + ); - // // only update search text for affected rows - // await _search.UpdateAsync(upsert.AffectedVoiceWorkIds, ct); + IReadOnlyList works = await scanner.ScanPageAsync(options, cancellationToken); - // return new ScanVoiceWorksResponse - // { - // Inserted = upsert.Inserted, - // Updated = upsert.Updated - // }; - //} + if (works.Count == 0) + return new(); + + string[] productIds = [.. works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)]; + VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken); + + // TODO + + /* + var ingests = works.Select(VoiceWorkIngest.From).ToList(); + var upsert = await _writer.UpsertAsync(ingests, ct); + + // only update search text for affected rows + await _search.UpdateAsync(upsert.AffectedVoiceWorkIds, ct); + + return new ScanVoiceWorksResponse + { + Inserted = upsert.Inserted, + Updated = upsert.Updated + }; + */ + + return new(); + } } \ No newline at end of file diff --git a/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs b/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs index 226b7e7..accae70 100644 --- a/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs +++ b/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs @@ -2,4 +2,4 @@ namespace JSMR.Application.Scanning; -public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale); \ No newline at end of file +public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale, string[] ExcludedMakerIds); \ No newline at end of file diff --git a/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs b/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs index a86f366..3fcd8fb 100644 --- a/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs +++ b/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs @@ -1,10 +1,12 @@ using JSMR.Application.Circles.Queries.GetCreators; using JSMR.Application.Circles.Queries.GetTags; using JSMR.Application.Circles.Queries.Search; +using JSMR.Application.Common; using JSMR.Application.Common.Caching; using JSMR.Application.Creators.Ports; using JSMR.Application.Creators.Queries.Search.Ports; using JSMR.Application.Integrations.Ports; +using JSMR.Application.Scanning.Ports; using JSMR.Application.Tags.Ports; using JSMR.Application.Tags.Queries.Search.Ports; using JSMR.Application.VoiceWorks.Ports; @@ -17,6 +19,7 @@ using JSMR.Infrastructure.Data.Repositories.Tags; using JSMR.Infrastructure.Data.Repositories.VoiceWorks; using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Integrations.DLSite; +using JSMR.Infrastructure.Scanning; using Microsoft.Extensions.DependencyInjection; namespace JSMR.Infrastructure.DI; @@ -32,6 +35,8 @@ public static class InfrastructureServiceCollectionExtensions services.AddScoped(); services.AddScoped(); services.AddScoped(); + services.AddKeyedScoped(Locale.Japanese); + services.AddKeyedScoped(Locale.English); services.AddScoped(); services.AddScoped(); @@ -42,6 +47,11 @@ public static class InfrastructureServiceCollectionExtensions services.AddSingleton(); services.AddSingleton(); + services.AddHttpClient(client => + { + client.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0"); + }); + services.AddScoped(); services.AddScoped(); diff --git a/JSMR.Infrastructure/JSMR.Infrastructure.csproj b/JSMR.Infrastructure/JSMR.Infrastructure.csproj index 6b50e65..c633582 100644 --- a/JSMR.Infrastructure/JSMR.Infrastructure.csproj +++ b/JSMR.Infrastructure/JSMR.Infrastructure.csproj @@ -11,6 +11,7 @@ + diff --git a/JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs b/JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs index b060383..b83e88b 100644 --- a/JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs +++ b/JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs @@ -34,6 +34,16 @@ public class DLSiteSearchFilterBuilder return this; } + public DLSiteSearchFilterBuilder IncludeSupportedLanguages(ISupportedLanguage[] languages) + { + foreach (ISupportedLanguage language in languages) + { + IncludeSupportedLanguage(language); + } + + return this; + } + public DLSiteSearchFilterBuilder IncludeSupportedLanguage(ISupportedLanguage language) { AddToOptionsAnd(language.Code); diff --git a/JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs b/JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs index ae96e70..0abba7e 100644 --- a/JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs +++ b/JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs @@ -1,15 +1,14 @@ -using JSMR.Application.Common.Caching; -using JSMR.Infrastructure.Common.Locales; +using JSMR.Infrastructure.Common.Locales; using JSMR.Infrastructure.Common.SupportedLanguages; using JSMR.Infrastructure.Http; +using System.Globalization; using System.Text.RegularExpressions; namespace JSMR.Infrastructure.Scanning; -public partial class EnglishVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache) - : VoiceWorksScanner(loader, spamCircleCache) +public partial class EnglishVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader) { - [GeneratedRegex(@"Release: (.*?)[/](\d{2})[/](\d{4})", RegexOptions.IgnoreCase, "en-US")] + [GeneratedRegex(@"Release date: (.*?)[/](\d{1,2})[/](\d{4})", RegexOptions.IgnoreCase, "en-US")] private static partial Regex SalesDateRegex(); [GeneratedRegex(@"^(Early|Middle|Late)\s(.*?)\s(\d{4})", RegexOptions.IgnoreCase, "en-US")] @@ -24,140 +23,47 @@ public partial class EnglishVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCac new AlingualLanguage() ]; - protected override DateTime? GetEstimatedReleaseDate(string expectedDate) + protected override DateOnly? GetEstimatedReleaseDate(string expectedDate) { - if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定")) + if (expectedDate.Contains("Release Date: TBC", StringComparison.OrdinalIgnoreCase)) return null; - Regex textRegex = EstimatedDateRegex(); - MatchCollection textMatches = textRegex.Matches(expectedDate); + Match match = EstimatedDateRegex().Match(expectedDate); - if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4) + if (match.Success == false) return null; - GroupCollection groups = textMatches[0].Groups; + GroupCollection groups = match.Groups; - int releaseYear = Convert.ToInt32(groups[3].Value); - - int releaseMonth = 1; - int releaseDay = 1; - - string releaseTime = groups[1].Value; - string releaseMonthText = groups[2].Value; - - switch (releaseTime) + int day = groups[1].Value.ToLowerInvariant() switch { - case "Early": - releaseDay = 1; - break; - case "Middle": - releaseDay = 11; - break; - case "Late": - releaseDay = 21; - break; - } + "early" => 1, + "middle" => 11, + "late" => 21, + _ => 1 + }; - switch (releaseMonthText) - { - case "Jan.": - releaseMonth = 1; - break; - case "Feb.": - releaseMonth = 2; - break; - case "Mar.": - releaseMonth = 3; - break; - case "Apr.": - releaseMonth = 4; - break; - case "May.": - releaseMonth = 5; - break; - case "Jun.": - releaseMonth = 6; - break; - case "Jul.": - releaseMonth = 7; - break; - case "Aug.": - releaseMonth = 8; - break; - case "Sep.": - releaseMonth = 9; - break; - case "Oct.": - releaseMonth = 10; - break; - case "Nov.": - releaseMonth = 11; - break; - case "Dec.": - releaseMonth = 12; - break; - } + string monthAbbreviation = groups[2].Value.Replace(".", ""); + int month = DateTime.ParseExact(monthAbbreviation, "MMM", CultureInfo.InvariantCulture).Month; - return new DateTime(releaseYear, releaseMonth, releaseDay); + int year = Convert.ToInt32(groups[3].Value); + + return new DateOnly(year, month, day); } - protected override DateTime? GetSalesDate(string salesDate) + protected override DateOnly? GetSalesDate(string salesDate) { - Regex textRegex = SalesDateRegex(); - MatchCollection textMatches = textRegex.Matches(salesDate); + Match match = SalesDateRegex().Match(salesDate); - if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4) + if (match.Success == false) return null; - string month = textMatches[0].Groups[1].Value; - int releaseMonth = -1; + string monthAbbreviation = match.Groups[1].Value; + int day = int.Parse(match.Groups[2].Value); + int year = int.Parse(match.Groups[3].Value); - switch (month) - { - case "Jan": - releaseMonth = 1; - break; - case "Feb": - releaseMonth = 2; - break; - case "Mar": - releaseMonth = 3; - break; - case "Apr": - releaseMonth = 4; - break; - case "May": - releaseMonth = 5; - break; - case "Jun": - releaseMonth = 6; - break; - case "Jul": - releaseMonth = 7; - break; - case "Aug": - releaseMonth = 8; - break; - case "Sep": - releaseMonth = 9; - break; - case "Oct": - releaseMonth = 10; - break; - case "Nov": - releaseMonth = 11; - break; - case "Dec": - releaseMonth = 12; - break; - } + int month = DateTime.ParseExact(monthAbbreviation, "MMM", CultureInfo.InvariantCulture).Month; - if (releaseMonth == -1) - return null; - - int releaseYear = Convert.ToInt32(textMatches[0].Groups[3].Value); - int releaseDay = Convert.ToInt32(textMatches[0].Groups[2].Value); - - return new DateTime(releaseYear, releaseMonth, releaseDay); + return new(year, month, day); } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs b/JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs index 3d0c7a9..22c341f 100644 --- a/JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs +++ b/JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs @@ -1,13 +1,11 @@ -using JSMR.Application.Common.Caching; -using JSMR.Infrastructure.Common.Locales; +using JSMR.Infrastructure.Common.Locales; using JSMR.Infrastructure.Common.SupportedLanguages; using JSMR.Infrastructure.Http; using System.Text.RegularExpressions; namespace JSMR.Infrastructure.Scanning; -public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache) - : VoiceWorksScanner(loader, spamCircleCache) +public class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader) { protected override ILocale Locale => new JapaneseLocale(); @@ -21,7 +19,7 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam new AlingualLanguage() ]; - protected override DateTime? GetEstimatedReleaseDate(string expectedDate) + protected override DateOnly? GetEstimatedReleaseDate(string expectedDate) { if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定")) return null; @@ -54,10 +52,10 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam break; } - return new DateTime(releaseYear, releaseMonth, releaseDay); + return new DateOnly(releaseYear, releaseMonth, releaseDay); } - protected override DateTime? GetSalesDate(string salesDate) + protected override DateOnly? GetSalesDate(string salesDate) { Regex textRegex = new Regex("販売日: (.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase); MatchCollection textMatches = textRegex.Matches(salesDate); @@ -69,6 +67,6 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam int releaseMonth = Convert.ToInt32(textMatches[0].Groups[2].Value); int releaseDay = Convert.ToInt32(textMatches[0].Groups[3].Value); - return new DateTime(releaseYear, releaseMonth, releaseDay); + return new DateOnly(releaseYear, releaseMonth, releaseDay); } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs index 0d3e6ea..7a07d29 100644 --- a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs +++ b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs @@ -1,6 +1,4 @@ using HtmlAgilityPack; -using JSMR.Application.Common.Caching; -using JSMR.Application.Scanning; using JSMR.Application.Scanning.Contracts; using JSMR.Application.Scanning.Ports; using JSMR.Infrastructure.Common.Locales; @@ -12,72 +10,57 @@ using System.Text.RegularExpressions; namespace JSMR.Infrastructure.Scanning; -public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader, ISpamCircleCache spamCircleCache) : IVoiceWorksScanner +public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksScanner { protected abstract ILocale Locale { get; } protected abstract ISupportedLanguage[] SupportedLanguages { get; } - protected abstract DateTime? GetEstimatedReleaseDate(string expectedDate); - protected abstract DateTime? GetSalesDate(string salesDate); + protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate); + protected abstract DateOnly? GetSalesDate(string salesDate); - protected virtual bool ExcludeSpamCircles => true; - protected virtual bool ExcludePartiallyAIGeneratedWorks => true; - protected virtual bool ExcludeAIGeneratedWorks => true; - - public async Task> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default) + public async Task> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default) { - DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(request, cancellationToken); + DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken); List nodes = document.GetDLSiteNodes(); - return GetDLSiteWorks(nodes); + return GetDLSiteWorks(nodes, options); } - private async Task GetDLSiteHtmlCollectionAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken) + private async Task GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken) { - string url = await GetUrlAsync(request, cancellationToken); + string url = GetUrl(options); HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken); return new DLSiteHtmlDocument(document); } - protected virtual async ValueTask GetUrlAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken) + protected string GetUrl(VoiceWorkScanOptions options) { - DLSiteSearchFilterBuilder filterBuilder = new(); + var filterBuilder = new DLSiteSearchFilterBuilder() + .UseLocale(Locale) + .IncludeSupportedLanguages(SupportedLanguages) + .ExcludeMakers(options.ExcludedMakerIds); - foreach (ISupportedLanguage supprotedLanguage in SupportedLanguages) - { - filterBuilder.IncludeSupportedLanguage(supprotedLanguage); - } - - if (ExcludeSpamCircles) - { - string[] makerIds = await spamCircleCache.GetAsync(cancellationToken); - - foreach (string makerId in makerIds) - filterBuilder.ExcludeMaker(makerId); - } - - if (ExcludePartiallyAIGeneratedWorks) + if (options.ExcludePartiallyAIGeneratedWorks) filterBuilder.ExcludePartiallyAIGeneratedWorks(); - if (ExcludeAIGeneratedWorks) + if (options.ExcludeAIGeneratedWorks) filterBuilder.ExcludeAIGeneratedWorks(); - return filterBuilder.BuildSearchQuery(request.PageNumber, request.PageSize); + return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize); } - private List GetDLSiteWorks(List nodes) + private List GetDLSiteWorks(List nodes, VoiceWorkScanOptions options) { var works = new List(); - //var spamCircles = SpamCircleCache.Get(); foreach (DLSiteHtmlNode node in nodes) { DLSiteWork work = GetDLSiteWork(node); - //if (spamCircles.Any(circle => circle.MakerId == work.MakerId)) - // continue; + if (options.ExcludedMakerIds.Any(makerId => makerId == work.MakerId)) + continue; works.Add(work); } diff --git a/JSMR.Tests/JSMR.Tests.csproj b/JSMR.Tests/JSMR.Tests.csproj index fcc96f5..a1dea28 100644 --- a/JSMR.Tests/JSMR.Tests.csproj +++ b/JSMR.Tests/JSMR.Tests.csproj @@ -9,10 +9,12 @@ + + diff --git a/JSMR.Tests/Scanning/English-Page.html b/JSMR.Tests/Scanning/English-Page.html new file mode 100644 index 0000000..b3fcccc --- /dev/null +++ b/JSMR.Tests/Scanning/English-Page.html @@ -0,0 +1,227 @@ + + +
+
+ Reorder : + +
+
+ 626609 + total. Showing: + 1~30 +
+
+ Display : + +
+
+ + Items per page : +
+
+
+ + + + + + + + + + + + + + + +
+ + +
+
+ Oct. 3, 23:59 (JST) Discounted for a limited time. +
+ DL Exclusive +
+ + Title of Product + +
+
+ The Maker +
+
+ + $ 12.52 + + + $ 17.88 + + 30%OFF + / + 168pt (10%Earn) +
+
Description of the product.
+
+ Voice + Trial version + + +
+
+ Male Protagonist + Gal + Uniform + Harem + Big Breasts + Tanned Skin / Suntan +
+
+
+
    +
  • Release date: Sep/06/2025
  • +
  • +
    Purchased: 1,000
    +
  • +
  • + +
    + (44) +
    +
    +
  • +
  • (1,924)
  • +
+
+ + +
+
+ + +
+
+

Middle Oct. 2025 Upcoming works

+ + Title of Product + +
+
+ The Maker +
+
+ + $ 12.52 + + + $ 17.88 + + 30%OFF + / + 168pt (10%Earn) +
+
Description of the product.
+
+ Voice + Trial version + + +
+
+ Male Protagonist + Gal + Uniform + Harem + Big Breasts + Tanned Skin / Suntan +
+
+
+
    +
  • Announced:: Sep/05/2025
  • +
  • +
    Favorited: 500
    +
  • +
  • + +
    + (44) +
    +
    +
  • +
  • (1,924)
  • +
+
+ + +
+
+
+ + \ No newline at end of file diff --git a/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs new file mode 100644 index 0000000..8e97640 --- /dev/null +++ b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs @@ -0,0 +1,53 @@ +using JSMR.Application.Scanning.Contracts; +using JSMR.Infrastructure.Http; +using JSMR.Infrastructure.Scanning; +using JSMR.Tests.Utilities; +using NSubstitute; +using Shouldly; + +namespace JSMR.Tests.Integrations.DLSite; + +public class VoiceWorkScannerTests +{ + private static async Task ReadResourceAsync(string resourceName) + { + return await ResourceHelper.ReadAsync($"JSMR.Tests.Scanning.{resourceName}"); + } + + [Fact] + public async Task Scan_With_English_Locale() + { + string englishPageHtml = await ReadResourceAsync("English-Page.html"); + + IHttpService httpService = Substitute.For(); + + httpService.GetStringAsync(Arg.Any(), CancellationToken.None) + .Returns(Task.FromResult(englishPageHtml)); + + HtmlLoader loader = new(httpService); + EnglishVoiceWorksScanner scanner = new(loader); + + VoiceWorkScanOptions options = new( + PageNumber: 1, + PageSize: 100, + ExcludeAIGeneratedWorks: true, + ExcludePartiallyAIGeneratedWorks: true, + ExcludedMakerIds: [] + ); + + var result = await scanner.ScanPageAsync(options, CancellationToken.None); + + result.Count.ShouldBe(2); + + result[0].ExpectedDate.ShouldBeNull(); + result[0].SalesDate.ShouldBe(new DateOnly(2025, 9, 6)); + result[0].ProductId.ShouldBe("RJ00000001"); + result[0].ProductName.ShouldBe("Title of Product"); + result[0].Description.ShouldBe("Description of the product."); + result[0].Downloads.ShouldBe(1000); + + result[1].ExpectedDate.ShouldBe(new DateOnly(2025, 10, 11)); + result[1].SalesDate.ShouldBeNull(); + result[1].ProductId.ShouldBe("RJ00000002"); + } +} \ No newline at end of file