Updated scanner logic, and added initial scanner tests.
This commit is contained in:
@@ -22,7 +22,7 @@ builder.Services
|
|||||||
var cs = builder.Configuration.GetConnectionString("AppDb")
|
var cs = builder.Configuration.GetConnectionString("AppDb")
|
||||||
?? throw new InvalidOperationException("Missing ConnectionStrings:AppDb2");
|
?? throw new InvalidOperationException("Missing ConnectionStrings:AppDb2");
|
||||||
|
|
||||||
builder.Services.AddDbContext<AppDbContext>(opt =>
|
builder.Services.AddDbContextFactory<AppDbContext>(opt =>
|
||||||
opt.UseMySql(cs, ServerVersion.AutoDetect(cs))
|
opt.UseMySql(cs, ServerVersion.AutoDetect(cs))
|
||||||
.EnableSensitiveDataLogging(false));
|
.EnableSensitiveDataLogging(false));
|
||||||
|
|
||||||
|
|||||||
@@ -6,12 +6,12 @@
|
|||||||
<Nullable>enable</Nullable>
|
<Nullable>enable</Nullable>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
|
||||||
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.8" />
|
|
||||||
</ItemGroup>
|
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<Folder Include="Tags\Queries\Ports\" />
|
<Folder Include="Tags\Queries\Ports\" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<PackageReference Include="Microsoft.Extensions.DependencyInjection.Abstractions" Version="9.0.9" />
|
||||||
|
</ItemGroup>
|
||||||
|
|
||||||
</Project>
|
</Project>
|
||||||
|
|||||||
@@ -8,8 +8,8 @@ public class DLSiteWork
|
|||||||
public string? ProductUrl { get; set; }
|
public string? ProductUrl { get; set; }
|
||||||
public string? ProductId { get; set; }
|
public string? ProductId { get; set; }
|
||||||
public DateOnly? AnnouncedDate { get; set; }
|
public DateOnly? AnnouncedDate { get; set; }
|
||||||
public DateTime? ExpectedDate { get; set; }
|
public DateOnly? ExpectedDate { get; set; }
|
||||||
public DateTime? SalesDate { get; set; }
|
public DateOnly? SalesDate { get; set; }
|
||||||
public int? Downloads { get; set; }
|
public int? Downloads { get; set; }
|
||||||
public byte? StarRating { get; set; }
|
public byte? StarRating { get; set; }
|
||||||
public int? Votes { get; set; }
|
public int? Votes { get; set; }
|
||||||
|
|||||||
@@ -0,0 +1,3 @@
|
|||||||
|
namespace JSMR.Application.Scanning.Contracts;
|
||||||
|
|
||||||
|
public record VoiceWorkScanOptions(int PageNumber, int PageSize, string[] ExcludedMakerIds, bool ExcludePartiallyAIGeneratedWorks, bool ExcludeAIGeneratedWorks);
|
||||||
@@ -4,5 +4,5 @@ namespace JSMR.Application.Scanning.Ports;
|
|||||||
|
|
||||||
public interface IVoiceWorksScanner
|
public interface IVoiceWorksScanner
|
||||||
{
|
{
|
||||||
Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default);
|
Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions request, CancellationToken cancellationToken = default);
|
||||||
}
|
}
|
||||||
@@ -1,26 +1,53 @@
|
|||||||
using JSMR.Application.Scanning.Ports;
|
using JSMR.Application.Common.Caching;
|
||||||
|
using JSMR.Application.Integrations.DLSite.Models;
|
||||||
|
using JSMR.Application.Integrations.Ports;
|
||||||
|
using JSMR.Application.Scanning.Contracts;
|
||||||
|
using JSMR.Application.Scanning.Ports;
|
||||||
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
|
||||||
namespace JSMR.Application.Scanning;
|
namespace JSMR.Application.Scanning;
|
||||||
|
|
||||||
public sealed class ScanVoiceWorksHandler(IVoiceWorksScanner scanner)
|
public sealed class ScanVoiceWorksHandler(IServiceProvider serviceProvider, IDLSiteClient dlsiteClient, ISpamCircleCache spamCircleCache)
|
||||||
{
|
{
|
||||||
//public async Task<ScanVoiceWorksResponse> HandleAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
public async Task<ScanVoiceWorksResponse> HandleAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
||||||
//{
|
{
|
||||||
// var works = await scanner.ScanPageAsync(request, cancellationToken);
|
IVoiceWorksScanner? scanner = serviceProvider.GetKeyedService<IVoiceWorksScanner>(request.Locale);
|
||||||
|
|
||||||
// if (works.Count == 0)
|
if (scanner is null)
|
||||||
// return new ScanVoiceWorksResponse();
|
return new();
|
||||||
|
|
||||||
// var ingests = works.Select(VoiceWorkIngest.From).ToList();
|
VoiceWorkScanOptions options = new(
|
||||||
// var upsert = await _writer.UpsertAsync(ingests, ct);
|
PageNumber: request.PageNumber,
|
||||||
|
PageSize: request.PageSize,
|
||||||
|
ExcludedMakerIds: await spamCircleCache.GetAsync(cancellationToken),
|
||||||
|
ExcludePartiallyAIGeneratedWorks: true,
|
||||||
|
ExcludeAIGeneratedWorks: true
|
||||||
|
);
|
||||||
|
|
||||||
// // only update search text for affected rows
|
IReadOnlyList<DLSiteWork> works = await scanner.ScanPageAsync(options, cancellationToken);
|
||||||
// await _search.UpdateAsync(upsert.AffectedVoiceWorkIds, ct);
|
|
||||||
|
|
||||||
// return new ScanVoiceWorksResponse
|
if (works.Count == 0)
|
||||||
// {
|
return new();
|
||||||
// Inserted = upsert.Inserted,
|
|
||||||
// Updated = upsert.Updated
|
string[] productIds = [.. works.Where(x => !string.IsNullOrWhiteSpace(x.ProductId)).Select(x => x.ProductId!)];
|
||||||
// };
|
VoiceWorkDetailCollection voiceWorkDetails = await dlsiteClient.GetVoiceWorkDetailsAsync(productIds, cancellationToken);
|
||||||
//}
|
|
||||||
|
// TODO
|
||||||
|
|
||||||
|
/*
|
||||||
|
var ingests = works.Select(VoiceWorkIngest.From).ToList();
|
||||||
|
var upsert = await _writer.UpsertAsync(ingests, ct);
|
||||||
|
|
||||||
|
// only update search text for affected rows
|
||||||
|
await _search.UpdateAsync(upsert.AffectedVoiceWorkIds, ct);
|
||||||
|
|
||||||
|
return new ScanVoiceWorksResponse
|
||||||
|
{
|
||||||
|
Inserted = upsert.Inserted,
|
||||||
|
Updated = upsert.Updated
|
||||||
|
};
|
||||||
|
*/
|
||||||
|
|
||||||
|
return new();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
@@ -2,4 +2,4 @@
|
|||||||
|
|
||||||
namespace JSMR.Application.Scanning;
|
namespace JSMR.Application.Scanning;
|
||||||
|
|
||||||
public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale);
|
public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale, string[] ExcludedMakerIds);
|
||||||
@@ -1,10 +1,12 @@
|
|||||||
using JSMR.Application.Circles.Queries.GetCreators;
|
using JSMR.Application.Circles.Queries.GetCreators;
|
||||||
using JSMR.Application.Circles.Queries.GetTags;
|
using JSMR.Application.Circles.Queries.GetTags;
|
||||||
using JSMR.Application.Circles.Queries.Search;
|
using JSMR.Application.Circles.Queries.Search;
|
||||||
|
using JSMR.Application.Common;
|
||||||
using JSMR.Application.Common.Caching;
|
using JSMR.Application.Common.Caching;
|
||||||
using JSMR.Application.Creators.Ports;
|
using JSMR.Application.Creators.Ports;
|
||||||
using JSMR.Application.Creators.Queries.Search.Ports;
|
using JSMR.Application.Creators.Queries.Search.Ports;
|
||||||
using JSMR.Application.Integrations.Ports;
|
using JSMR.Application.Integrations.Ports;
|
||||||
|
using JSMR.Application.Scanning.Ports;
|
||||||
using JSMR.Application.Tags.Ports;
|
using JSMR.Application.Tags.Ports;
|
||||||
using JSMR.Application.Tags.Queries.Search.Ports;
|
using JSMR.Application.Tags.Queries.Search.Ports;
|
||||||
using JSMR.Application.VoiceWorks.Ports;
|
using JSMR.Application.VoiceWorks.Ports;
|
||||||
@@ -17,6 +19,7 @@ using JSMR.Infrastructure.Data.Repositories.Tags;
|
|||||||
using JSMR.Infrastructure.Data.Repositories.VoiceWorks;
|
using JSMR.Infrastructure.Data.Repositories.VoiceWorks;
|
||||||
using JSMR.Infrastructure.Http;
|
using JSMR.Infrastructure.Http;
|
||||||
using JSMR.Infrastructure.Integrations.DLSite;
|
using JSMR.Infrastructure.Integrations.DLSite;
|
||||||
|
using JSMR.Infrastructure.Scanning;
|
||||||
using Microsoft.Extensions.DependencyInjection;
|
using Microsoft.Extensions.DependencyInjection;
|
||||||
|
|
||||||
namespace JSMR.Infrastructure.DI;
|
namespace JSMR.Infrastructure.DI;
|
||||||
@@ -32,6 +35,8 @@ public static class InfrastructureServiceCollectionExtensions
|
|||||||
services.AddScoped<IVoiceWorkSearchProvider, VoiceWorkSearchProvider>();
|
services.AddScoped<IVoiceWorkSearchProvider, VoiceWorkSearchProvider>();
|
||||||
services.AddScoped<IVoiceWorkFullTextSearch, MySqlVoiceWorkFullTextSearch>();
|
services.AddScoped<IVoiceWorkFullTextSearch, MySqlVoiceWorkFullTextSearch>();
|
||||||
services.AddScoped<IVoiceWorkWriter, VoiceWorkWriter>();
|
services.AddScoped<IVoiceWorkWriter, VoiceWorkWriter>();
|
||||||
|
services.AddKeyedScoped<IVoiceWorksScanner, JapaneseVoiceWorksScanner>(Locale.Japanese);
|
||||||
|
services.AddKeyedScoped<IVoiceWorksScanner, EnglishVoiceWorksScanner>(Locale.English);
|
||||||
|
|
||||||
services.AddScoped<ITagSearchProvider, TagSearchProvider>();
|
services.AddScoped<ITagSearchProvider, TagSearchProvider>();
|
||||||
services.AddScoped<ITagWriter, TagWriter>();
|
services.AddScoped<ITagWriter, TagWriter>();
|
||||||
@@ -42,6 +47,11 @@ public static class InfrastructureServiceCollectionExtensions
|
|||||||
services.AddSingleton<ICache, MemoryCacheAdapter>();
|
services.AddSingleton<ICache, MemoryCacheAdapter>();
|
||||||
services.AddSingleton<ISpamCircleCache, SpamCircleCache>();
|
services.AddSingleton<ISpamCircleCache, SpamCircleCache>();
|
||||||
|
|
||||||
|
services.AddHttpClient<IHttpService, HttpService>(client =>
|
||||||
|
{
|
||||||
|
client.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0");
|
||||||
|
});
|
||||||
|
|
||||||
services.AddScoped<IHttpService, HttpService>();
|
services.AddScoped<IHttpService, HttpService>();
|
||||||
services.AddScoped<IHtmlLoader, HtmlLoader>();
|
services.AddScoped<IHtmlLoader, HtmlLoader>();
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@
|
|||||||
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.8" />
|
<PackageReference Include="Microsoft.EntityFrameworkCore" Version="9.0.8" />
|
||||||
<PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="9.0.8" />
|
<PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="9.0.8" />
|
||||||
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.8" />
|
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.8" />
|
||||||
|
<PackageReference Include="Microsoft.Extensions.Http" Version="9.0.9" />
|
||||||
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.4" />
|
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.4" />
|
||||||
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="9.0.0" />
|
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="9.0.0" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|||||||
@@ -34,6 +34,16 @@ public class DLSiteSearchFilterBuilder
|
|||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public DLSiteSearchFilterBuilder IncludeSupportedLanguages(ISupportedLanguage[] languages)
|
||||||
|
{
|
||||||
|
foreach (ISupportedLanguage language in languages)
|
||||||
|
{
|
||||||
|
IncludeSupportedLanguage(language);
|
||||||
|
}
|
||||||
|
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
|
||||||
public DLSiteSearchFilterBuilder IncludeSupportedLanguage(ISupportedLanguage language)
|
public DLSiteSearchFilterBuilder IncludeSupportedLanguage(ISupportedLanguage language)
|
||||||
{
|
{
|
||||||
AddToOptionsAnd(language.Code);
|
AddToOptionsAnd(language.Code);
|
||||||
|
|||||||
@@ -1,15 +1,14 @@
|
|||||||
using JSMR.Application.Common.Caching;
|
using JSMR.Infrastructure.Common.Locales;
|
||||||
using JSMR.Infrastructure.Common.Locales;
|
|
||||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||||
using JSMR.Infrastructure.Http;
|
using JSMR.Infrastructure.Http;
|
||||||
|
using System.Globalization;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace JSMR.Infrastructure.Scanning;
|
namespace JSMR.Infrastructure.Scanning;
|
||||||
|
|
||||||
public partial class EnglishVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache)
|
public partial class EnglishVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader)
|
||||||
: VoiceWorksScanner(loader, spamCircleCache)
|
|
||||||
{
|
{
|
||||||
[GeneratedRegex(@"Release: (.*?)[/](\d{2})[/](\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
[GeneratedRegex(@"Release date: (.*?)[/](\d{1,2})[/](\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
||||||
private static partial Regex SalesDateRegex();
|
private static partial Regex SalesDateRegex();
|
||||||
|
|
||||||
[GeneratedRegex(@"^(Early|Middle|Late)\s(.*?)\s(\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
[GeneratedRegex(@"^(Early|Middle|Late)\s(.*?)\s(\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
||||||
@@ -24,140 +23,47 @@ public partial class EnglishVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCac
|
|||||||
new AlingualLanguage()
|
new AlingualLanguage()
|
||||||
];
|
];
|
||||||
|
|
||||||
protected override DateTime? GetEstimatedReleaseDate(string expectedDate)
|
protected override DateOnly? GetEstimatedReleaseDate(string expectedDate)
|
||||||
{
|
{
|
||||||
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
|
if (expectedDate.Contains("Release Date: TBC", StringComparison.OrdinalIgnoreCase))
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
Regex textRegex = EstimatedDateRegex();
|
Match match = EstimatedDateRegex().Match(expectedDate);
|
||||||
MatchCollection textMatches = textRegex.Matches(expectedDate);
|
|
||||||
|
|
||||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
if (match.Success == false)
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
GroupCollection groups = textMatches[0].Groups;
|
GroupCollection groups = match.Groups;
|
||||||
|
|
||||||
int releaseYear = Convert.ToInt32(groups[3].Value);
|
int day = groups[1].Value.ToLowerInvariant() switch
|
||||||
|
|
||||||
int releaseMonth = 1;
|
|
||||||
int releaseDay = 1;
|
|
||||||
|
|
||||||
string releaseTime = groups[1].Value;
|
|
||||||
string releaseMonthText = groups[2].Value;
|
|
||||||
|
|
||||||
switch (releaseTime)
|
|
||||||
{
|
{
|
||||||
case "Early":
|
"early" => 1,
|
||||||
releaseDay = 1;
|
"middle" => 11,
|
||||||
break;
|
"late" => 21,
|
||||||
case "Middle":
|
_ => 1
|
||||||
releaseDay = 11;
|
};
|
||||||
break;
|
|
||||||
case "Late":
|
string monthAbbreviation = groups[2].Value.Replace(".", "");
|
||||||
releaseDay = 21;
|
int month = DateTime.ParseExact(monthAbbreviation, "MMM", CultureInfo.InvariantCulture).Month;
|
||||||
break;
|
|
||||||
|
int year = Convert.ToInt32(groups[3].Value);
|
||||||
|
|
||||||
|
return new DateOnly(year, month, day);
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (releaseMonthText)
|
protected override DateOnly? GetSalesDate(string salesDate)
|
||||||
{
|
{
|
||||||
case "Jan.":
|
Match match = SalesDateRegex().Match(salesDate);
|
||||||
releaseMonth = 1;
|
|
||||||
break;
|
|
||||||
case "Feb.":
|
|
||||||
releaseMonth = 2;
|
|
||||||
break;
|
|
||||||
case "Mar.":
|
|
||||||
releaseMonth = 3;
|
|
||||||
break;
|
|
||||||
case "Apr.":
|
|
||||||
releaseMonth = 4;
|
|
||||||
break;
|
|
||||||
case "May.":
|
|
||||||
releaseMonth = 5;
|
|
||||||
break;
|
|
||||||
case "Jun.":
|
|
||||||
releaseMonth = 6;
|
|
||||||
break;
|
|
||||||
case "Jul.":
|
|
||||||
releaseMonth = 7;
|
|
||||||
break;
|
|
||||||
case "Aug.":
|
|
||||||
releaseMonth = 8;
|
|
||||||
break;
|
|
||||||
case "Sep.":
|
|
||||||
releaseMonth = 9;
|
|
||||||
break;
|
|
||||||
case "Oct.":
|
|
||||||
releaseMonth = 10;
|
|
||||||
break;
|
|
||||||
case "Nov.":
|
|
||||||
releaseMonth = 11;
|
|
||||||
break;
|
|
||||||
case "Dec.":
|
|
||||||
releaseMonth = 12;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
if (match.Success == false)
|
||||||
}
|
|
||||||
|
|
||||||
protected override DateTime? GetSalesDate(string salesDate)
|
|
||||||
{
|
|
||||||
Regex textRegex = SalesDateRegex();
|
|
||||||
MatchCollection textMatches = textRegex.Matches(salesDate);
|
|
||||||
|
|
||||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
|
||||||
return null;
|
return null;
|
||||||
|
|
||||||
string month = textMatches[0].Groups[1].Value;
|
string monthAbbreviation = match.Groups[1].Value;
|
||||||
int releaseMonth = -1;
|
int day = int.Parse(match.Groups[2].Value);
|
||||||
|
int year = int.Parse(match.Groups[3].Value);
|
||||||
|
|
||||||
switch (month)
|
int month = DateTime.ParseExact(monthAbbreviation, "MMM", CultureInfo.InvariantCulture).Month;
|
||||||
{
|
|
||||||
case "Jan":
|
|
||||||
releaseMonth = 1;
|
|
||||||
break;
|
|
||||||
case "Feb":
|
|
||||||
releaseMonth = 2;
|
|
||||||
break;
|
|
||||||
case "Mar":
|
|
||||||
releaseMonth = 3;
|
|
||||||
break;
|
|
||||||
case "Apr":
|
|
||||||
releaseMonth = 4;
|
|
||||||
break;
|
|
||||||
case "May":
|
|
||||||
releaseMonth = 5;
|
|
||||||
break;
|
|
||||||
case "Jun":
|
|
||||||
releaseMonth = 6;
|
|
||||||
break;
|
|
||||||
case "Jul":
|
|
||||||
releaseMonth = 7;
|
|
||||||
break;
|
|
||||||
case "Aug":
|
|
||||||
releaseMonth = 8;
|
|
||||||
break;
|
|
||||||
case "Sep":
|
|
||||||
releaseMonth = 9;
|
|
||||||
break;
|
|
||||||
case "Oct":
|
|
||||||
releaseMonth = 10;
|
|
||||||
break;
|
|
||||||
case "Nov":
|
|
||||||
releaseMonth = 11;
|
|
||||||
break;
|
|
||||||
case "Dec":
|
|
||||||
releaseMonth = 12;
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (releaseMonth == -1)
|
return new(year, month, day);
|
||||||
return null;
|
|
||||||
|
|
||||||
int releaseYear = Convert.ToInt32(textMatches[0].Groups[3].Value);
|
|
||||||
int releaseDay = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
|
||||||
|
|
||||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,13 +1,11 @@
|
|||||||
using JSMR.Application.Common.Caching;
|
using JSMR.Infrastructure.Common.Locales;
|
||||||
using JSMR.Infrastructure.Common.Locales;
|
|
||||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||||
using JSMR.Infrastructure.Http;
|
using JSMR.Infrastructure.Http;
|
||||||
using System.Text.RegularExpressions;
|
using System.Text.RegularExpressions;
|
||||||
|
|
||||||
namespace JSMR.Infrastructure.Scanning;
|
namespace JSMR.Infrastructure.Scanning;
|
||||||
|
|
||||||
public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache)
|
public class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader)
|
||||||
: VoiceWorksScanner(loader, spamCircleCache)
|
|
||||||
{
|
{
|
||||||
protected override ILocale Locale => new JapaneseLocale();
|
protected override ILocale Locale => new JapaneseLocale();
|
||||||
|
|
||||||
@@ -21,7 +19,7 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam
|
|||||||
new AlingualLanguage()
|
new AlingualLanguage()
|
||||||
];
|
];
|
||||||
|
|
||||||
protected override DateTime? GetEstimatedReleaseDate(string expectedDate)
|
protected override DateOnly? GetEstimatedReleaseDate(string expectedDate)
|
||||||
{
|
{
|
||||||
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
|
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
|
||||||
return null;
|
return null;
|
||||||
@@ -54,10 +52,10 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
return new DateOnly(releaseYear, releaseMonth, releaseDay);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected override DateTime? GetSalesDate(string salesDate)
|
protected override DateOnly? GetSalesDate(string salesDate)
|
||||||
{
|
{
|
||||||
Regex textRegex = new Regex("販売日: (.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase);
|
Regex textRegex = new Regex("販売日: (.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase);
|
||||||
MatchCollection textMatches = textRegex.Matches(salesDate);
|
MatchCollection textMatches = textRegex.Matches(salesDate);
|
||||||
@@ -69,6 +67,6 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spam
|
|||||||
int releaseMonth = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
int releaseMonth = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
||||||
int releaseDay = Convert.ToInt32(textMatches[0].Groups[3].Value);
|
int releaseDay = Convert.ToInt32(textMatches[0].Groups[3].Value);
|
||||||
|
|
||||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
return new DateOnly(releaseYear, releaseMonth, releaseDay);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,4 @@
|
|||||||
using HtmlAgilityPack;
|
using HtmlAgilityPack;
|
||||||
using JSMR.Application.Common.Caching;
|
|
||||||
using JSMR.Application.Scanning;
|
|
||||||
using JSMR.Application.Scanning.Contracts;
|
using JSMR.Application.Scanning.Contracts;
|
||||||
using JSMR.Application.Scanning.Ports;
|
using JSMR.Application.Scanning.Ports;
|
||||||
using JSMR.Infrastructure.Common.Locales;
|
using JSMR.Infrastructure.Common.Locales;
|
||||||
@@ -12,72 +10,57 @@ using System.Text.RegularExpressions;
|
|||||||
|
|
||||||
namespace JSMR.Infrastructure.Scanning;
|
namespace JSMR.Infrastructure.Scanning;
|
||||||
|
|
||||||
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader, ISpamCircleCache spamCircleCache) : IVoiceWorksScanner
|
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksScanner
|
||||||
{
|
{
|
||||||
protected abstract ILocale Locale { get; }
|
protected abstract ILocale Locale { get; }
|
||||||
protected abstract ISupportedLanguage[] SupportedLanguages { get; }
|
protected abstract ISupportedLanguage[] SupportedLanguages { get; }
|
||||||
|
|
||||||
protected abstract DateTime? GetEstimatedReleaseDate(string expectedDate);
|
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
|
||||||
protected abstract DateTime? GetSalesDate(string salesDate);
|
protected abstract DateOnly? GetSalesDate(string salesDate);
|
||||||
|
|
||||||
protected virtual bool ExcludeSpamCircles => true;
|
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||||
protected virtual bool ExcludePartiallyAIGeneratedWorks => true;
|
|
||||||
protected virtual bool ExcludeAIGeneratedWorks => true;
|
|
||||||
|
|
||||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default)
|
|
||||||
{
|
{
|
||||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(request, cancellationToken);
|
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||||
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
||||||
|
|
||||||
return GetDLSiteWorks(nodes);
|
return GetDLSiteWorks(nodes, options);
|
||||||
}
|
}
|
||||||
|
|
||||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
string url = await GetUrlAsync(request, cancellationToken);
|
string url = GetUrl(options);
|
||||||
|
|
||||||
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||||
|
|
||||||
return new DLSiteHtmlDocument(document);
|
return new DLSiteHtmlDocument(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected virtual async ValueTask<string> GetUrlAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
protected string GetUrl(VoiceWorkScanOptions options)
|
||||||
{
|
{
|
||||||
DLSiteSearchFilterBuilder filterBuilder = new();
|
var filterBuilder = new DLSiteSearchFilterBuilder()
|
||||||
|
.UseLocale(Locale)
|
||||||
|
.IncludeSupportedLanguages(SupportedLanguages)
|
||||||
|
.ExcludeMakers(options.ExcludedMakerIds);
|
||||||
|
|
||||||
foreach (ISupportedLanguage supprotedLanguage in SupportedLanguages)
|
if (options.ExcludePartiallyAIGeneratedWorks)
|
||||||
{
|
|
||||||
filterBuilder.IncludeSupportedLanguage(supprotedLanguage);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ExcludeSpamCircles)
|
|
||||||
{
|
|
||||||
string[] makerIds = await spamCircleCache.GetAsync(cancellationToken);
|
|
||||||
|
|
||||||
foreach (string makerId in makerIds)
|
|
||||||
filterBuilder.ExcludeMaker(makerId);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (ExcludePartiallyAIGeneratedWorks)
|
|
||||||
filterBuilder.ExcludePartiallyAIGeneratedWorks();
|
filterBuilder.ExcludePartiallyAIGeneratedWorks();
|
||||||
|
|
||||||
if (ExcludeAIGeneratedWorks)
|
if (options.ExcludeAIGeneratedWorks)
|
||||||
filterBuilder.ExcludeAIGeneratedWorks();
|
filterBuilder.ExcludeAIGeneratedWorks();
|
||||||
|
|
||||||
return filterBuilder.BuildSearchQuery(request.PageNumber, request.PageSize);
|
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes)
|
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes, VoiceWorkScanOptions options)
|
||||||
{
|
{
|
||||||
var works = new List<DLSiteWork>();
|
var works = new List<DLSiteWork>();
|
||||||
//var spamCircles = SpamCircleCache.Get();
|
|
||||||
|
|
||||||
foreach (DLSiteHtmlNode node in nodes)
|
foreach (DLSiteHtmlNode node in nodes)
|
||||||
{
|
{
|
||||||
DLSiteWork work = GetDLSiteWork(node);
|
DLSiteWork work = GetDLSiteWork(node);
|
||||||
|
|
||||||
//if (spamCircles.Any(circle => circle.MakerId == work.MakerId))
|
if (options.ExcludedMakerIds.Any(makerId => makerId == work.MakerId))
|
||||||
// continue;
|
continue;
|
||||||
|
|
||||||
works.Add(work);
|
works.Add(work);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -9,10 +9,12 @@
|
|||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<None Remove="Integrations\DLSite\Product-Info.json" />
|
<None Remove="Integrations\DLSite\Product-Info.json" />
|
||||||
|
<None Remove="Scanning\English-Page.html" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<EmbeddedResource Include="Integrations\DLSite\Product-Info.json" />
|
<EmbeddedResource Include="Integrations\DLSite\Product-Info.json" />
|
||||||
|
<EmbeddedResource Include="Scanning\English-Page.html" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
|
|||||||
227
JSMR.Tests/Scanning/English-Page.html
Normal file
227
JSMR.Tests/Scanning/English-Page.html
Normal file
@@ -0,0 +1,227 @@
|
|||||||
|
<html>
|
||||||
|
<body>
|
||||||
|
<div data-toggle="found" class="sort_box border_b pb10">
|
||||||
|
<div class="status_select">
|
||||||
|
Reorder :
|
||||||
|
<select name="order" id="query_order" class="_change_submit"><!----><!----><option value="trend">Sort by popularity</option><option value="release_d">Release date - New to Old</option><option value="release">Release date - Old to New</option><option value="dl_d">Best Selling</option><option value="price">Price - Low to High</option><option value="price_d">Price - High to Low</option><option value="rate_d">Rating - High to Low</option><option value="review_d">Reviews - High to Low</option></select>
|
||||||
|
</div>
|
||||||
|
<div class="page_total">
|
||||||
|
<strong>626609</strong>
|
||||||
|
<span> total. Showing: </span>
|
||||||
|
<strong>1~30</strong>
|
||||||
|
</div>
|
||||||
|
<div class="display_type_select">
|
||||||
|
<span>Display :</span>
|
||||||
|
<ul>
|
||||||
|
<li class="on display_normal">
|
||||||
|
<a>1カラム表示</a>
|
||||||
|
</li>
|
||||||
|
<li class="display_block">
|
||||||
|
<a href="#">画像表示</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="display_num_select">
|
||||||
|
<ul>
|
||||||
|
<li class="on">
|
||||||
|
<a>30</a>
|
||||||
|
</li>
|
||||||
|
<li class="">
|
||||||
|
<a href="#">50</a>
|
||||||
|
</li>
|
||||||
|
<li class="">
|
||||||
|
<a href="#">100</a>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
<span>Items per page :</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
|
<div id="search_result_list">
|
||||||
|
<table class="work_1col_table n_worklist">
|
||||||
|
<tbody>
|
||||||
|
<!-- RJ00000001 -->
|
||||||
|
<tr data-list_item_product_id="RJ00000001">
|
||||||
|
<td class="work_1col_thumb">
|
||||||
|
<div class="work_thumb">
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ01464588.html" id="_link_RJ00000001" class="work_thumb_inner">
|
||||||
|
<img alt="Title of Product" src="//img.dlsite.jp/resize/images2/work/doujin/RJ00000000/RJ00000001_img_main_240x240.jpg" class="lazy">
|
||||||
|
<div class="work_img_popover">
|
||||||
|
<img src="//img.dlsite.jp/modpub/images2/work/doujin/RJ00000000/RJ00000001_img_main.jpg" alt="Title of Product">
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
<div class="work_category type_SOU"><a href="https://www.dlsite.com/maniax/fsr/=/work_type/SOU">Voice / ASMR</a></div>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<dl class="work_1col">
|
||||||
|
<dt class="work_name">
|
||||||
|
<span class="period_date">Oct. 3, 23:59 (JST) Discounted for a limited time.</span>
|
||||||
|
<div class="icon_wrap">
|
||||||
|
<span class="icon_lead_01 type_exclusive" title="DL Exclusive">DL Exclusive</span>
|
||||||
|
</div>
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ00000001.html" title="Title of Product">
|
||||||
|
Title of Product
|
||||||
|
</a>
|
||||||
|
</dt>
|
||||||
|
<dd class="maker_name">
|
||||||
|
<a href="https://www.dlsite.com/maniax/circle/profile/=/maker_id/RG00001.html">The Maker</a>
|
||||||
|
</dd>
|
||||||
|
<dd class="work_price_wrap">
|
||||||
|
<span class="work_price">
|
||||||
|
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">12.52</span> <span class="work_price_suffix"></span></span>
|
||||||
|
</span>
|
||||||
|
<span class="strike">
|
||||||
|
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">17.88</span> <span class="work_price_suffix"></span></span>
|
||||||
|
</span>
|
||||||
|
<span class="icon_campaign type_sale">30%OFF</span>
|
||||||
|
<span class="separator">/</span>
|
||||||
|
<span class="work_point">168pt (10%Earn)</span>
|
||||||
|
</dd>
|
||||||
|
<dd class="work_text">Description of the product.</dd>
|
||||||
|
<dd class="work_genre">
|
||||||
|
<span class="icon_SND" title="Voice">Voice</span>
|
||||||
|
<span class="icon_TRI" title="Trial version">Trial version</span>
|
||||||
|
<span style="display: none;"></span>
|
||||||
|
<input type="hidden" class="__product_attributes" name="__product_attributes" id="_RJ00000001" value="RG00001,adl,male,SND,MS2,TRI,JPN,REV,502,046,415,074,182,321" disabled="disabled">
|
||||||
|
</dd>
|
||||||
|
<dd class="search_tag">
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/502/from/work.genre">Male Protagonist</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/415/from/work.genre">Gal</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/074/from/work.genre">Uniform</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/046/from/work.genre">Harem</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/182/from/work.genre">Big Breasts</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/321/from/work.genre">Tanned Skin / Suntan</a>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</td>
|
||||||
|
<td class="work_1col_right">
|
||||||
|
<ul class="work_info_box">
|
||||||
|
<li class="sales_date">Release date: Sep/06/2025</li>
|
||||||
|
<li class="work_dl clear">
|
||||||
|
<div class="_work_dl_RJ00000001">Purchased: <span class="_dl_count_RJ00000001">1,000</span></div>
|
||||||
|
</li>
|
||||||
|
<li class="work_review clear">
|
||||||
|
<span class="work_to_review">
|
||||||
|
<div title="Reviews">
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/reviewlist/=/product_id/RJ00000001.html">(44)</a>
|
||||||
|
</div>
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
<li class="work_rating"><div class="star_rating star_45">(1,924)</div></li>
|
||||||
|
</ul>
|
||||||
|
<div class="work_btn_box">
|
||||||
|
<div hidden="hidden" class="ga4_event_item_RJ00000001" data-product_id="RJ00000001" data-work_name="Title of Product" data-maker_id="RG00001" data-work_type="SOU" data-options="SND#MS2#TRI#JPN#REV" data-price="1848" data-official_price="2640"></div>
|
||||||
|
<ul class="work_btn_list btn_free_sample">
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_cart_xs">
|
||||||
|
<a href="/maniax/cart/=/product_id/RJ00000001.html" class="btn_cart">Add to Cart</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_favorite_xs">
|
||||||
|
<a href="/maniax/mypage/wishlist/=/product_id/RJ00000001.html" class="btn_favorite">Favorites</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_free_sample">
|
||||||
|
<a href="#" class="btn_free_sample" data-product-id="RJ00000001">Samples</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
<!-- RJ00000002 -->
|
||||||
|
<tr data-list_item_product_id="RJ00000002">
|
||||||
|
<td class="work_1col_thumb">
|
||||||
|
<div class="work_thumb">
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ01464588.html" id="_link_RJ00000002" class="work_thumb_inner">
|
||||||
|
<img alt="Title of Product" src="//img.dlsite.jp/resize/images2/work/doujin/RJ00000000/RJ00000002_img_main_240x240.jpg" class="lazy">
|
||||||
|
<div class="work_img_popover">
|
||||||
|
<img src="//img.dlsite.jp/modpub/images2/work/doujin/RJ00000000/RJ00000002_img_main.jpg" alt="Title of Product">
|
||||||
|
</div>
|
||||||
|
</a>
|
||||||
|
<div class="work_category type_SOU"><a href="https://www.dlsite.com/maniax/fsr/=/work_type/SOU">Voice / ASMR</a></div>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
<td>
|
||||||
|
<dl class="work_1col">
|
||||||
|
<dt class="work_name">
|
||||||
|
<p class="expected_date">Middle Oct. 2025 Upcoming works</p>
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ00000002.html" title="Title of Product">
|
||||||
|
Title of Product
|
||||||
|
</a>
|
||||||
|
</dt>
|
||||||
|
<dd class="maker_name">
|
||||||
|
<a href="https://www.dlsite.com/maniax/circle/profile/=/maker_id/RG00002.html">The Maker</a>
|
||||||
|
</dd>
|
||||||
|
<dd class="work_price_wrap">
|
||||||
|
<span class="work_price">
|
||||||
|
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">12.52</span> <span class="work_price_suffix"></span></span>
|
||||||
|
</span>
|
||||||
|
<span class="strike">
|
||||||
|
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">17.88</span> <span class="work_price_suffix"></span></span>
|
||||||
|
</span>
|
||||||
|
<span class="icon_campaign type_sale">30%OFF</span>
|
||||||
|
<span class="separator">/</span>
|
||||||
|
<span class="work_point">168pt (10%Earn)</span>
|
||||||
|
</dd>
|
||||||
|
<dd class="work_text">Description of the product.</dd>
|
||||||
|
<dd class="work_genre">
|
||||||
|
<span class="icon_SND" title="Voice">Voice</span>
|
||||||
|
<span class="icon_TRI" title="Trial version">Trial version</span>
|
||||||
|
<span style="display: none;"></span>
|
||||||
|
<input type="hidden" class="__product_attributes" name="__product_attributes" id="_RJ00000001" value="RG00001,adl,male,SND,MS2,TRI,JPN,REV,502,046,415,074,182,321" disabled="disabled">
|
||||||
|
</dd>
|
||||||
|
<dd class="search_tag">
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/502/from/work.genre">Male Protagonist</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/415/from/work.genre">Gal</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/074/from/work.genre">Uniform</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/046/from/work.genre">Harem</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/182/from/work.genre">Big Breasts</a>
|
||||||
|
<a href="https://www.dlsite.com/maniax/fsr/=/genre/321/from/work.genre">Tanned Skin / Suntan</a>
|
||||||
|
</dd>
|
||||||
|
</dl>
|
||||||
|
</td>
|
||||||
|
<td class="work_1col_right">
|
||||||
|
<ul class="work_info_box">
|
||||||
|
<li class="sales_date">Announced:: Sep/05/2025</li>
|
||||||
|
<li class="work_dl clear">
|
||||||
|
<div>Favorited: <span>500</span></div>
|
||||||
|
</li>
|
||||||
|
<li class="work_review clear">
|
||||||
|
<span class="work_to_review">
|
||||||
|
<div title="Reviews">
|
||||||
|
<a href="https://www.dlsite.com/maniax/work/reviewlist/=/product_id/RJ00000002.html">(44)</a>
|
||||||
|
</div>
|
||||||
|
</span>
|
||||||
|
</li>
|
||||||
|
<li class="work_rating"><div class="star_rating star_45">(1,924)</div></li>
|
||||||
|
</ul>
|
||||||
|
<div class="work_btn_box">
|
||||||
|
<div hidden="hidden" class="ga4_event_item_RJ00000002" data-product_id="RJ00000002" data-work_name="Title of Product" data-maker_id="RG00002" data-work_type="SOU" data-options="SND#MS2#TRI#JPN#REV" data-price="1848" data-official_price="2640"></div>
|
||||||
|
<ul class="work_btn_list btn_free_sample">
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_cart_xs">
|
||||||
|
<a href="/maniax/cart/=/product_id/RJ00000002.html" class="btn_cart">Add to Cart</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_favorite_xs">
|
||||||
|
<a href="/maniax/mypage/wishlist/=/product_id/RJ00000002.html" class="btn_favorite">Favorites</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
<li class="work_btn_list_item">
|
||||||
|
<p class="work_free_sample">
|
||||||
|
<a href="#" class="btn_free_sample" data-product-id="RJ00000002">Samples</a>
|
||||||
|
</p>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
53
JSMR.Tests/Scanning/VoiceWorkScannerTests.cs
Normal file
53
JSMR.Tests/Scanning/VoiceWorkScannerTests.cs
Normal file
@@ -0,0 +1,53 @@
|
|||||||
|
using JSMR.Application.Scanning.Contracts;
|
||||||
|
using JSMR.Infrastructure.Http;
|
||||||
|
using JSMR.Infrastructure.Scanning;
|
||||||
|
using JSMR.Tests.Utilities;
|
||||||
|
using NSubstitute;
|
||||||
|
using Shouldly;
|
||||||
|
|
||||||
|
namespace JSMR.Tests.Integrations.DLSite;
|
||||||
|
|
||||||
|
public class VoiceWorkScannerTests
|
||||||
|
{
|
||||||
|
private static async Task<string> ReadResourceAsync(string resourceName)
|
||||||
|
{
|
||||||
|
return await ResourceHelper.ReadAsync($"JSMR.Tests.Scanning.{resourceName}");
|
||||||
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public async Task Scan_With_English_Locale()
|
||||||
|
{
|
||||||
|
string englishPageHtml = await ReadResourceAsync("English-Page.html");
|
||||||
|
|
||||||
|
IHttpService httpService = Substitute.For<IHttpService>();
|
||||||
|
|
||||||
|
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
|
||||||
|
.Returns(Task.FromResult(englishPageHtml));
|
||||||
|
|
||||||
|
HtmlLoader loader = new(httpService);
|
||||||
|
EnglishVoiceWorksScanner scanner = new(loader);
|
||||||
|
|
||||||
|
VoiceWorkScanOptions options = new(
|
||||||
|
PageNumber: 1,
|
||||||
|
PageSize: 100,
|
||||||
|
ExcludeAIGeneratedWorks: true,
|
||||||
|
ExcludePartiallyAIGeneratedWorks: true,
|
||||||
|
ExcludedMakerIds: []
|
||||||
|
);
|
||||||
|
|
||||||
|
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
|
||||||
|
|
||||||
|
result.Count.ShouldBe(2);
|
||||||
|
|
||||||
|
result[0].ExpectedDate.ShouldBeNull();
|
||||||
|
result[0].SalesDate.ShouldBe(new DateOnly(2025, 9, 6));
|
||||||
|
result[0].ProductId.ShouldBe("RJ00000001");
|
||||||
|
result[0].ProductName.ShouldBe("Title of Product");
|
||||||
|
result[0].Description.ShouldBe("Description of the product.");
|
||||||
|
result[0].Downloads.ShouldBe(1000);
|
||||||
|
|
||||||
|
result[1].ExpectedDate.ShouldBe(new DateOnly(2025, 10, 11));
|
||||||
|
result[1].SalesDate.ShouldBeNull();
|
||||||
|
result[1].ProductId.ShouldBe("RJ00000002");
|
||||||
|
}
|
||||||
|
}
|
||||||
Reference in New Issue
Block a user