Updated scanner logic, and added initial scanner tests.
This commit is contained in:
@@ -1,6 +1,4 @@
|
||||
using HtmlAgilityPack;
|
||||
using JSMR.Application.Common.Caching;
|
||||
using JSMR.Application.Scanning;
|
||||
using JSMR.Application.Scanning.Contracts;
|
||||
using JSMR.Application.Scanning.Ports;
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
@@ -12,72 +10,57 @@ using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader, ISpamCircleCache spamCircleCache) : IVoiceWorksScanner
|
||||
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksScanner
|
||||
{
|
||||
protected abstract ILocale Locale { get; }
|
||||
protected abstract ISupportedLanguage[] SupportedLanguages { get; }
|
||||
|
||||
protected abstract DateTime? GetEstimatedReleaseDate(string expectedDate);
|
||||
protected abstract DateTime? GetSalesDate(string salesDate);
|
||||
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
|
||||
protected abstract DateOnly? GetSalesDate(string salesDate);
|
||||
|
||||
protected virtual bool ExcludeSpamCircles => true;
|
||||
protected virtual bool ExcludePartiallyAIGeneratedWorks => true;
|
||||
protected virtual bool ExcludeAIGeneratedWorks => true;
|
||||
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default)
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
{
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(request, cancellationToken);
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
||||
|
||||
return GetDLSiteWorks(nodes);
|
||||
return GetDLSiteWorks(nodes, options);
|
||||
}
|
||||
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
||||
{
|
||||
string url = await GetUrlAsync(request, cancellationToken);
|
||||
string url = GetUrl(options);
|
||||
|
||||
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
return new DLSiteHtmlDocument(document);
|
||||
}
|
||||
|
||||
protected virtual async ValueTask<string> GetUrlAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
||||
protected string GetUrl(VoiceWorkScanOptions options)
|
||||
{
|
||||
DLSiteSearchFilterBuilder filterBuilder = new();
|
||||
var filterBuilder = new DLSiteSearchFilterBuilder()
|
||||
.UseLocale(Locale)
|
||||
.IncludeSupportedLanguages(SupportedLanguages)
|
||||
.ExcludeMakers(options.ExcludedMakerIds);
|
||||
|
||||
foreach (ISupportedLanguage supprotedLanguage in SupportedLanguages)
|
||||
{
|
||||
filterBuilder.IncludeSupportedLanguage(supprotedLanguage);
|
||||
}
|
||||
|
||||
if (ExcludeSpamCircles)
|
||||
{
|
||||
string[] makerIds = await spamCircleCache.GetAsync(cancellationToken);
|
||||
|
||||
foreach (string makerId in makerIds)
|
||||
filterBuilder.ExcludeMaker(makerId);
|
||||
}
|
||||
|
||||
if (ExcludePartiallyAIGeneratedWorks)
|
||||
if (options.ExcludePartiallyAIGeneratedWorks)
|
||||
filterBuilder.ExcludePartiallyAIGeneratedWorks();
|
||||
|
||||
if (ExcludeAIGeneratedWorks)
|
||||
if (options.ExcludeAIGeneratedWorks)
|
||||
filterBuilder.ExcludeAIGeneratedWorks();
|
||||
|
||||
return filterBuilder.BuildSearchQuery(request.PageNumber, request.PageSize);
|
||||
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
|
||||
}
|
||||
|
||||
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes)
|
||||
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes, VoiceWorkScanOptions options)
|
||||
{
|
||||
var works = new List<DLSiteWork>();
|
||||
//var spamCircles = SpamCircleCache.Get();
|
||||
|
||||
foreach (DLSiteHtmlNode node in nodes)
|
||||
{
|
||||
DLSiteWork work = GetDLSiteWork(node);
|
||||
|
||||
//if (spamCircles.Any(circle => circle.MakerId == work.MakerId))
|
||||
// continue;
|
||||
if (options.ExcludedMakerIds.Any(makerId => makerId == work.MakerId))
|
||||
continue;
|
||||
|
||||
works.Add(work);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user