156 lines
5.6 KiB
C#
156 lines
5.6 KiB
C#
using HtmlAgilityPack;
|
|
using JSMR.Application.Enums;
|
|
using JSMR.Application.Scanning.Contracts;
|
|
using JSMR.Application.Scanning.Ports;
|
|
using JSMR.Domain.Enums;
|
|
using JSMR.Domain.ValueObjects;
|
|
using JSMR.Infrastructure.Http;
|
|
using JSMR.Infrastructure.Scanning.Models;
|
|
using System.Globalization;
|
|
using System.Text.RegularExpressions;
|
|
|
|
namespace JSMR.Infrastructure.Scanning;
|
|
|
|
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksScanner
|
|
{
|
|
protected abstract Locale Locale { get; }
|
|
protected abstract SupportedLanguage[] SupportedLanguages { get; }
|
|
|
|
protected abstract DateOnly? GetEstimatedReleaseDate(string expectedDate);
|
|
protected abstract DateOnly? GetSalesDate(string salesDate);
|
|
|
|
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
|
{
|
|
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
|
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
|
|
|
return GetDLSiteWorks(nodes, options);
|
|
}
|
|
|
|
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
|
{
|
|
string url = GetUrl(options);
|
|
|
|
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
|
|
|
return new DLSiteHtmlDocument(document);
|
|
}
|
|
|
|
protected string GetUrl(VoiceWorkScanOptions options)
|
|
{
|
|
var filterBuilder = new DLSiteSearchFilterBuilder()
|
|
.UseLocale(Locale)
|
|
.IncludeSupportedLanguages(SupportedLanguages)
|
|
.ExcludeMakers(options.ExcludedMakerIds);
|
|
|
|
if (options.ExcludePartiallyAIGeneratedWorks)
|
|
filterBuilder.ExcludePartiallyAIGeneratedWorks();
|
|
|
|
if (options.ExcludeAIGeneratedWorks)
|
|
filterBuilder.ExcludeAIGeneratedWorks();
|
|
|
|
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
|
|
}
|
|
|
|
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes, VoiceWorkScanOptions options)
|
|
{
|
|
var works = new List<DLSiteWork>();
|
|
|
|
foreach (DLSiteHtmlNode node in nodes)
|
|
{
|
|
DLSiteWork work = GetDLSiteWork(node);
|
|
|
|
if (options.ExcludedMakerIds.Any(makerId => makerId == work.MakerId))
|
|
continue;
|
|
|
|
works.Add(work);
|
|
}
|
|
|
|
return works;
|
|
}
|
|
|
|
private DLSiteWork GetDLSiteWork(DLSiteHtmlNode node)
|
|
{
|
|
string productUrl = node.ProductLinkNode.Attributes["href"].Value;
|
|
string makerUrl = node.MakerLinkNode.Attributes["href"].Value;
|
|
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
|
|
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
|
|
ScannedRating? rating = GetScannedRating(node.StarRatingNode);
|
|
|
|
DLSiteWork work = new()
|
|
{
|
|
ProductName = ScannerUtilities.GetDecodedText(node.ProductTextNode),
|
|
Description = ScannerUtilities.GetDecodedText(node.DescriptionNode),
|
|
ProductId = ScannerUtilities.GetTextBetween(productUrl, "product_id/", ".html"),
|
|
Maker = ScannerUtilities.GetDecodedText(node.MakerLinkNode),
|
|
MakerId = ScannerUtilities.GetTextBetween(makerUrl, "maker_id/", ".html"),
|
|
Genres = ScannerUtilities.GetStringListFromNodes(node.GenreNodes),
|
|
Tags = ScannerUtilities.GetStringListFromNodes(node.SearchTagNodes),
|
|
Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes),
|
|
SmallImageUrl = imageSource,
|
|
ImageUrl = imageUrl,
|
|
Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
|
|
StarRating = rating?.Score,
|
|
Votes = rating?.Votes,
|
|
AgeRating = GetAgeRating(node.GenreNodes)
|
|
};
|
|
|
|
if (node.ExpectedDateNode != null)
|
|
{
|
|
work.ExpectedDate = GetEstimatedReleaseDate(node.ExpectedDateNode.InnerHtml.Trim());
|
|
}
|
|
|
|
if (node.SalesDateNode != null)
|
|
{
|
|
work.SalesDate = GetSalesDate(node.SalesDateNode.InnerHtml);
|
|
}
|
|
|
|
if (node.DownloadsNode != null)
|
|
{
|
|
work.Downloads = int.Parse(node.DownloadsNode.InnerHtml, NumberStyles.AllowThousands);
|
|
}
|
|
|
|
return work;
|
|
}
|
|
|
|
private static AgeRating GetAgeRating(HtmlNode[] genreNodes)
|
|
{
|
|
List<string> genres = ScannerUtilities.GetStringListFromNodes(genreNodes);
|
|
|
|
if (genres.Contains("全年齢"))
|
|
return AgeRating.AllAges;
|
|
|
|
if (genres.Contains("R-15"))
|
|
return AgeRating.R15;
|
|
|
|
return AgeRating.R18;
|
|
}
|
|
|
|
private static ScannedRating? GetScannedRating(HtmlNode? starRatingNode)
|
|
{
|
|
if (starRatingNode == null)
|
|
return null;
|
|
|
|
string voteText = starRatingNode.InnerText;
|
|
|
|
string? ratingClass = starRatingNode.GetClasses().FirstOrDefault(classNames =>
|
|
classNames.Contains("star_") && classNames != "star_rating");
|
|
|
|
if (string.IsNullOrEmpty(ratingClass))
|
|
return null;
|
|
|
|
Regex votesRegex = new Regex(@"\((.*?)\)", RegexOptions.IgnoreCase);
|
|
MatchCollection voteMatches = votesRegex.Matches(voteText);
|
|
|
|
if (voteMatches.Count == 0 || voteMatches[0].Groups.Count < 2)
|
|
return null;
|
|
|
|
ScannedRating rating = new()
|
|
{
|
|
Score = Convert.ToByte(ratingClass.Replace("star_", "")),
|
|
Votes = int.Parse(voteMatches[0].Groups[1].Value, NumberStyles.AllowThousands)
|
|
};
|
|
|
|
return rating;
|
|
}
|
|
} |