Updated scanner logic to handle thumb VueJS components. Removed uneeded DLSiteWork fields.
This commit is contained in:
@@ -2,37 +2,26 @@
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning.Models;
|
||||
|
||||
public class DLSiteHtmlDocument
|
||||
public class DLSiteHtmlDocument(HtmlDocument document)
|
||||
{
|
||||
private readonly HtmlNodeCollection _workColumns;
|
||||
private readonly HtmlNodeCollection _workColumnRights;
|
||||
private readonly HtmlNodeCollection _workThumbs;
|
||||
private readonly HtmlNodeCollection _workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']");
|
||||
private readonly HtmlNodeCollection _workColumnRights = document.DocumentNode.SelectNodes("//td[contains(@class, 'work_1col_right')]");
|
||||
private readonly HtmlNodeCollection _workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']");
|
||||
public HtmlNode PageTotalNode { get; } = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0];
|
||||
|
||||
public HtmlNode PageTotalNode { get; }
|
||||
|
||||
public DLSiteHtmlDocument(HtmlDocument document)
|
||||
public DLSiteHtmlNode[] GetDLSiteNodes()
|
||||
{
|
||||
_workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']");
|
||||
//_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']");
|
||||
_workColumnRights = document.DocumentNode.SelectNodes("//td[contains(@class, 'work_1col_right')]");
|
||||
_workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']");
|
||||
|
||||
PageTotalNode = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0];
|
||||
}
|
||||
|
||||
public List<DLSiteHtmlNode> GetDLSiteNodes()
|
||||
{
|
||||
var nodes = new List<DLSiteHtmlNode>();
|
||||
List<DLSiteHtmlNode> nodes = [];
|
||||
|
||||
if (_workColumns.Count != _workColumnRights.Count || _workColumns.Count != _workThumbs.Count)
|
||||
throw new Exception("Work column node counts do not match!");
|
||||
|
||||
for (int i = 0; i < _workColumns.Count; i++)
|
||||
{
|
||||
var node = new DLSiteHtmlNode(_workColumns[i], _workColumnRights[i], _workThumbs[i]);
|
||||
DLSiteHtmlNode node = new(_workColumns[i], _workColumnRights[i], _workThumbs[i]);
|
||||
nodes.Add(node);
|
||||
}
|
||||
|
||||
return nodes;
|
||||
return [.. nodes];
|
||||
}
|
||||
}
|
||||
@@ -19,7 +19,8 @@ public class DLSiteHtmlNode
|
||||
public HtmlNode? SalesDateNode { get; private set; }
|
||||
public HtmlNode DownloadsNode { get; private set; }
|
||||
public HtmlNode? StarRatingNode { get; private set; }
|
||||
public HtmlNode ImageNode { get; private set; }
|
||||
public HtmlNode? ImageNode { get; private set; }
|
||||
public HtmlNode? ThumbWithNgFilterBlockNode { get; private set; }
|
||||
public HtmlNode[] GenreNodes { get; private set; }
|
||||
public HtmlNode[] SearchTagNodes { get; private set; }
|
||||
public HtmlNode[] CreatorNodes { get; private set; }
|
||||
@@ -55,7 +56,8 @@ public class DLSiteHtmlNode
|
||||
|
||||
//InitializeSalesAndDownloadsNodes();
|
||||
StarRatingNode = GetStarRatingNode();
|
||||
ImageNode = GetImageNode();
|
||||
ImageNode = TryGetImageNode();
|
||||
ThumbWithNgFilterBlockNode = ThumbNode.SelectSingleNode(".//thumb-with-ng-filter-block");
|
||||
}
|
||||
|
||||
private HtmlNode[] GetGenreNodes()
|
||||
@@ -165,10 +167,13 @@ public class DLSiteHtmlNode
|
||||
// }
|
||||
//}
|
||||
|
||||
private HtmlNode GetImageNode()
|
||||
private HtmlNode? TryGetImageNode()
|
||||
{
|
||||
HtmlNode linkNode = ThumbNode.SelectNodes(".//a")[0];
|
||||
HtmlNode? linkNode = ThumbNode.SelectSingleNode(".//a");
|
||||
|
||||
return linkNode.SelectNodes(".//img")[0];
|
||||
if (linkNode is null)
|
||||
return null;
|
||||
|
||||
return linkNode.SelectSingleNode(".//img");
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,5 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Text.Json;
|
||||
using System.Web;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
@@ -45,4 +46,26 @@ public static class ScannerUtilities
|
||||
|
||||
return imageSource;
|
||||
}
|
||||
|
||||
public static string[] ParseJavaScriptArray(string value)
|
||||
{
|
||||
try
|
||||
{
|
||||
string json = NormalizeJavaScriptArray(value);
|
||||
|
||||
return JsonSerializer.Deserialize<string[]>(json) ?? [];
|
||||
}
|
||||
catch
|
||||
{
|
||||
return [.. value
|
||||
.Trim('[', ']')
|
||||
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
||||
.Select(x => x.Trim().Trim('\'', '"'))];
|
||||
}
|
||||
}
|
||||
|
||||
private static string NormalizeJavaScriptArray(string input)
|
||||
{
|
||||
return input.Trim().Replace('\'', '"');
|
||||
}
|
||||
}
|
||||
@@ -7,6 +7,7 @@ using JSMR.Domain.ValueObjects;
|
||||
using JSMR.Infrastructure.Http;
|
||||
using JSMR.Infrastructure.Scanning.Models;
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
@@ -22,7 +23,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default)
|
||||
{
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
||||
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
|
||||
|
||||
return GetDLSiteWorks(nodes, options);
|
||||
}
|
||||
@@ -52,7 +53,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
|
||||
}
|
||||
|
||||
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes, VoiceWorkScanOptions options)
|
||||
private List<DLSiteWork> GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
|
||||
{
|
||||
var works = new List<DLSiteWork>();
|
||||
|
||||
@@ -73,8 +74,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
{
|
||||
string productUrl = node.ProductLinkNode.Attributes["href"].Value;
|
||||
string makerUrl = node.MakerLinkNode.Attributes["href"].Value;
|
||||
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
|
||||
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
|
||||
(string imageSource, string imageUrl) = TryGetImageSourceAndUrl(node);
|
||||
ScannedRating? rating = GetScannedRating(node.StarRatingNode);
|
||||
|
||||
DLSiteWork work = new()
|
||||
@@ -89,7 +89,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes),
|
||||
SmallImageUrl = imageSource,
|
||||
ImageUrl = imageUrl,
|
||||
Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
|
||||
//Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
|
||||
StarRating = rating?.Score,
|
||||
Votes = rating?.Votes,
|
||||
AgeRating = GetAgeRating(node.GenreNodes)
|
||||
@@ -113,6 +113,36 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
return work;
|
||||
}
|
||||
|
||||
private static (string, string) TryGetImageSourceAndUrl(DLSiteHtmlNode node)
|
||||
{
|
||||
if (node.ThumbWithNgFilterBlockNode is not null)
|
||||
{
|
||||
string candidates = node.ThumbWithNgFilterBlockNode.GetAttributeValue(":thumb-candidates", string.Empty);
|
||||
string[] imageUrls = ScannerUtilities.ParseJavaScriptArray(candidates);
|
||||
|
||||
if (imageUrls.Length == 0)
|
||||
{
|
||||
throw new Exception("No thumb candidartes found");
|
||||
}
|
||||
|
||||
string imageSource = imageUrls[0];
|
||||
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
|
||||
|
||||
return (imageSource, imageUrl);
|
||||
}
|
||||
else if (node.ImageNode is not null)
|
||||
{
|
||||
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
|
||||
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
|
||||
|
||||
return (imageSource, imageUrl);
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new Exception("Unable to find image source and/or url");
|
||||
}
|
||||
}
|
||||
|
||||
private static AgeRating GetAgeRating(HtmlNode[] genreNodes)
|
||||
{
|
||||
List<string> genres = ScannerUtilities.GetStringListFromNodes(genreNodes);
|
||||
|
||||
Reference in New Issue
Block a user