Updated scanner logic to handle thumb VueJS components. Removed uneeded DLSiteWork fields.
All checks were successful
ci / build-test (push) Successful in 2m21s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-02-28 22:20:24 -05:00
parent ca7ffa1730
commit 704a6fc433
9 changed files with 316 additions and 40 deletions

View File

@@ -7,6 +7,7 @@ using JSMR.Domain.ValueObjects;
using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning.Models;
using System.Globalization;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning;
@@ -22,7 +23,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
{
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
return GetDLSiteWorks(nodes, options);
}
@@ -52,7 +53,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
}
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes, VoiceWorkScanOptions options)
private List<DLSiteWork> GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
{
var works = new List<DLSiteWork>();
@@ -73,8 +74,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
{
string productUrl = node.ProductLinkNode.Attributes["href"].Value;
string makerUrl = node.MakerLinkNode.Attributes["href"].Value;
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
(string imageSource, string imageUrl) = TryGetImageSourceAndUrl(node);
ScannedRating? rating = GetScannedRating(node.StarRatingNode);
DLSiteWork work = new()
@@ -89,7 +89,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes),
SmallImageUrl = imageSource,
ImageUrl = imageUrl,
Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
//Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
StarRating = rating?.Score,
Votes = rating?.Votes,
AgeRating = GetAgeRating(node.GenreNodes)
@@ -113,6 +113,36 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
return work;
}
private static (string, string) TryGetImageSourceAndUrl(DLSiteHtmlNode node)
{
if (node.ThumbWithNgFilterBlockNode is not null)
{
string candidates = node.ThumbWithNgFilterBlockNode.GetAttributeValue(":thumb-candidates", string.Empty);
string[] imageUrls = ScannerUtilities.ParseJavaScriptArray(candidates);
if (imageUrls.Length == 0)
{
throw new Exception("No thumb candidartes found");
}
string imageSource = imageUrls[0];
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
return (imageSource, imageUrl);
}
else if (node.ImageNode is not null)
{
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
return (imageSource, imageUrl);
}
else
{
throw new Exception("Unable to find image source and/or url");
}
}
private static AgeRating GetAgeRating(HtmlNode[] genreNodes)
{
List<string> genres = ScannerUtilities.GetStringListFromNodes(genreNodes);