Updated scanner tests to work with user english locale pages.

This commit is contained in:
2025-10-17 19:47:15 -04:00
parent 278b6df650
commit dbe249194a
9 changed files with 470 additions and 63 deletions

View File

@@ -13,7 +13,8 @@ public class DLSiteHtmlDocument
public DLSiteHtmlDocument(HtmlDocument document)
{
_workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']");
_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']");
//_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']");
_workColumnRights = document.DocumentNode.SelectNodes("//td[contains(@class, 'work_1col_right')]");
_workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']");
PageTotalNode = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0];

View File

@@ -11,17 +11,18 @@ public class DLSiteHtmlNode
public HtmlNode ProductNode { get; private set; }
public HtmlNode ProductLinkNode { get; private set; }
public HtmlNode ProductTextNode { get; private set; }
public HtmlNode DescriptionNode { get; private set; }
public HtmlNode? DescriptionNode { get; private set; }
public HtmlNode MakerNode { get; private set; }
public HtmlNode MakerLinkNode { get; private set; }
public HtmlNode SalesDateNode { get; private set; }
public HtmlNode ExpectedDateNode { get; private set; }
public HtmlNode? ExpectedDateNode { get; private set; }
public HtmlNode? WorkInfoBox { get; private set; }
public HtmlNode? SalesDateNode { get; private set; }
public HtmlNode DownloadsNode { get; private set; }
public HtmlNode StarRatingNode { get; private set; }
public HtmlNode? StarRatingNode { get; private set; }
public HtmlNode ImageNode { get; private set; }
public List<HtmlNode> GenreNodes { get; private set; }
public List<HtmlNode> SearchTagNodes { get; private set; }
public List<HtmlNode> CreatorNodes { get; private set; }
public HtmlNode[] GenreNodes { get; private set; }
public HtmlNode[] SearchTagNodes { get; private set; }
public HtmlNode[] CreatorNodes { get; private set; }
public DLSiteHtmlNode(HtmlNode leftNode, HtmlNode rightNode, HtmlNode thumbNode)
{
@@ -33,35 +34,44 @@ public class DLSiteHtmlNode
ProductLinkNode = ProductNode.SelectNodes(".//a")[0];
ProductTextNode = GetProductTextNode();
DescriptionNode = LeftNode.SelectNodes(".//dd[@class='work_text']")[0];
//DescriptionNode = LeftNode.SelectNodes(".//dd[@class='work_text']")[0];
DescriptionNode = LeftNode.SelectNodes(".//dd[@class='work_text']")?.FirstOrDefault();
MakerNode = LeftNode.SelectNodes(".//dd[@class='maker_name']")[0];
MakerLinkNode = MakerNode.SelectNodes(".//a[contains(@href, 'maker_id')]")[0];
ExpectedDateNode = GetExpectedDateNode();
//ExpectedDateNode = GetExpectedDateNode();
ExpectedDateNode = ProductNode.SelectNodes(".//p[@class='expected_date']")?.FirstOrDefault();
InitializeGenreNodes();
InitializeSearchTagNodes();
InitializeCreatorNodes();
InitializeSalesAndDownloadsNodes();
InitializeStarRatingNode();
InitializeImageNode();
GenreNodes = GetGenreNodes();
SearchTagNodes = GetSearchTagNodes();
CreatorNodes = GetCreatorNodes();
WorkInfoBox = RightNode.SelectNodes(".//ul[@class='work_info_box']")?.FirstOrDefault();
SalesDateNode = WorkInfoBox?.SelectNodes(".//li[@class='sales_date']")?.FirstOrDefault();
// TODO: Fix!
//DownloadsNode = RightNode.SelectSingleNode(".//span[@class='_dl_count_" + works[rightsIndex].ProductId + "']");
DownloadsNode = RightNode.SelectSingleNode(".//span[contains(@class, '_dl_count_')]");
//InitializeSalesAndDownloadsNodes();
StarRatingNode = GetStarRatingNode();
ImageNode = GetImageNode();
}
private void InitializeGenreNodes()
private HtmlNode[] GetGenreNodes()
{
HtmlNode genreNode = LeftNode.SelectNodes(".//dd[@class='work_genre']")[0];
GenreNodes = [.. genreNode.SelectNodes(".//span")];
return [.. genreNode.SelectNodes(".//span")];
}
private void InitializeSearchTagNodes()
private HtmlNode[] GetSearchTagNodes()
{
HtmlNodeCollection searchTagNodes = LeftNode.SelectNodes(".//dd[@class='search_tag']");
if (searchTagNodes == null || searchTagNodes.Count == 0)
{
SearchTagNodes = [];
return [];
}
else
{
@@ -69,56 +79,64 @@ public class DLSiteHtmlNode
if (searchTagNodesLinks == null || searchTagNodesLinks.Count == 0)
{
SearchTagNodes = [];
return [];
}
else
{
SearchTagNodes = [.. searchTagNodesLinks];
return [.. searchTagNodesLinks];
}
}
}
private void InitializeCreatorNodes()
private HtmlNode[] GetCreatorNodes()
{
HtmlNodeCollection creatorNodes = MakerNode.SelectNodes(".//a[contains(@href, 'keyword_creater')]");
if (creatorNodes == null || creatorNodes.Count == 0)
{
CreatorNodes = [];
return [];
}
else
{
CreatorNodes = [.. creatorNodes];
return [.. creatorNodes];
}
}
private void InitializeSalesAndDownloadsNodes()
{
HtmlNodeCollection workInfoBox = RightNode.SelectNodes(".//ul[@class='work_info_box']");
//private void InitializeSalesAndDownloadsNodes()
//{
// HtmlNodeCollection workInfoBox = RightNode.SelectNodes(".//ul[@class='work_info_box']");
if (workInfoBox != null)
{
HtmlNodeCollection salesDateNodes = workInfoBox[0].SelectNodes(".//li[@class='sales_date']");
// if (workInfoBox != null)
// {
// HtmlNodeCollection salesDateNodes = workInfoBox[0].SelectNodes(".//li[@class='sales_date']");
if (salesDateNodes != null && salesDateNodes.Count > 0)
{
SalesDateNode = salesDateNodes[0];
}
// if (salesDateNodes != null && salesDateNodes.Count > 0)
// {
// SalesDateNode = salesDateNodes[0];
// }
// TODO: Fix!
//DownloadsNode = RightNode.SelectSingleNode(".//span[@class='_dl_count_" + works[rightsIndex].ProductId + "']");
DownloadsNode = RightNode.SelectSingleNode(".//span[contains(@class, '_dl_count_')]");
}
}
// // TODO: Fix!
// //DownloadsNode = RightNode.SelectSingleNode(".//span[@class='_dl_count_" + works[rightsIndex].ProductId + "']");
// DownloadsNode = RightNode.SelectSingleNode(".//span[contains(@class, '_dl_count_')]");
// }
//}
private void InitializeStarRatingNode()
//private HtmlNode? GetSalesDateNode()
//{
// if (WorkInfoBox is null)
// return null;
// return WorkInfoBox.SelectNodes(".//li[@class='sales_date']").FirstOrDefault();
//}
private HtmlNode? GetStarRatingNode()
{
var ratingsNode = RightNode.SelectSingleNode(".//li[@class='work_rating']");
if (ratingsNode == null)
return;
return null;
StarRatingNode = ratingsNode.SelectSingleNode(".//div[contains(@class, 'star_rating')]");
return ratingsNode.SelectSingleNode(".//div[contains(@class, 'star_rating')]");
}
private HtmlNode GetProductTextNode()
@@ -133,24 +151,24 @@ public class DLSiteHtmlNode
}
}
private HtmlNode GetExpectedDateNode()
{
HtmlNodeCollection expectedDateNodes = ProductNode.SelectNodes(".//p[@class='expected_date']");
//private HtmlNode? GetExpectedDateNode()
//{
// HtmlNodeCollection expectedDateNodes = ProductNode.SelectNodes(".//p[@class='expected_date']").FirstOrDefault();
if (expectedDateNodes != null && expectedDateNodes.Count > 0)
{
return expectedDateNodes[0];
}
else
{
return null;
}
}
// if (expectedDateNodes != null && expectedDateNodes.Count > 0)
// {
// return expectedDateNodes[0];
// }
// else
// {
// return null;
// }
//}
private void InitializeImageNode()
private HtmlNode GetImageNode()
{
HtmlNode linkNode = ThumbNode.SelectNodes(".//a")[0];
ImageNode = linkNode.SelectNodes(".//img")[0];
return linkNode.SelectNodes(".//img")[0];
}
}

View File

@@ -5,7 +5,7 @@ namespace JSMR.Infrastructure.Scanning;
public static class ScannerUtilities
{
public static List<string> GetStringListFromNodes(List<HtmlNode> nodes)
public static List<string> GetStringListFromNodes(HtmlNode[] nodes)
{
return nodes
.Where(node => string.IsNullOrEmpty(node.InnerHtml) == false)

View File

@@ -114,7 +114,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
return work;
}
private static AgeRating GetAgeRating(List<HtmlNode> genreNodes)
private static AgeRating GetAgeRating(HtmlNode[] genreNodes)
{
List<string> genres = ScannerUtilities.GetStringListFromNodes(genreNodes);