Initial implementation of voice works scanning.

This commit is contained in:
2025-09-11 00:07:49 -04:00
parent f250276a99
commit 3c0a39b324
50 changed files with 1351 additions and 88 deletions

View File

@@ -0,0 +1,37 @@
using HtmlAgilityPack;
namespace JSMR.Infrastructure.Scanning.Models;
public class DLSiteHtmlDocument
{
private readonly HtmlNodeCollection _workColumns;
private readonly HtmlNodeCollection _workColumnRights;
private readonly HtmlNodeCollection _workThumbs;
public HtmlNode PageTotalNode { get; }
public DLSiteHtmlDocument(HtmlDocument document)
{
_workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']");
_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']");
_workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']");
PageTotalNode = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0];
}
public List<DLSiteHtmlNode> GetDLSiteNodes()
{
var nodes = new List<DLSiteHtmlNode>();
if (_workColumns.Count != _workColumnRights.Count || _workColumns.Count != _workThumbs.Count)
throw new Exception("Work column node counts do not match!");
for (int i = 0; i < _workColumns.Count; i++)
{
var node = new DLSiteHtmlNode(_workColumns[i], _workColumnRights[i], _workThumbs[i]);
nodes.Add(node);
}
return nodes;
}
}

View File

@@ -0,0 +1,156 @@
using HtmlAgilityPack;
namespace JSMR.Infrastructure.Scanning.Models;
public class DLSiteHtmlNode
{
public HtmlNode LeftNode { get; }
public HtmlNode RightNode { get; }
public HtmlNode ThumbNode { get; }
public HtmlNode ProductNode { get; private set; }
public HtmlNode ProductLinkNode { get; private set; }
public HtmlNode ProductTextNode { get; private set; }
public HtmlNode DescriptionNode { get; private set; }
public HtmlNode MakerNode { get; private set; }
public HtmlNode MakerLinkNode { get; private set; }
public HtmlNode SalesDateNode { get; private set; }
public HtmlNode ExpectedDateNode { get; private set; }
public HtmlNode DownloadsNode { get; private set; }
public HtmlNode StarRatingNode { get; private set; }
public HtmlNode ImageNode { get; private set; }
public List<HtmlNode> GenreNodes { get; private set; }
public List<HtmlNode> SearchTagNodes { get; private set; }
public List<HtmlNode> CreatorNodes { get; private set; }
public DLSiteHtmlNode(HtmlNode leftNode, HtmlNode rightNode, HtmlNode thumbNode)
{
LeftNode = leftNode;
RightNode = rightNode;
ThumbNode = thumbNode;
ProductNode = LeftNode.SelectNodes(".//dt[@class='work_name']")[0];
ProductLinkNode = ProductNode.SelectNodes(".//a")[0];
ProductTextNode = GetProductTextNode();
DescriptionNode = LeftNode.SelectNodes(".//dd[@class='work_text']")[0];
MakerNode = LeftNode.SelectNodes(".//dd[@class='maker_name']")[0];
MakerLinkNode = MakerNode.SelectNodes(".//a[contains(@href, 'maker_id')]")[0];
ExpectedDateNode = GetExpectedDateNode();
InitializeGenreNodes();
InitializeSearchTagNodes();
InitializeCreatorNodes();
InitializeSalesAndDownloadsNodes();
InitializeStarRatingNode();
InitializeImageNode();
}
private void InitializeGenreNodes()
{
HtmlNode genreNode = LeftNode.SelectNodes(".//dd[@class='work_genre']")[0];
GenreNodes = [.. genreNode.SelectNodes(".//span")];
}
private void InitializeSearchTagNodes()
{
HtmlNodeCollection searchTagNodes = LeftNode.SelectNodes(".//dd[@class='search_tag']");
if (searchTagNodes == null || searchTagNodes.Count == 0)
{
SearchTagNodes = [];
}
else
{
HtmlNodeCollection searchTagNodesLinks = searchTagNodes[0].SelectNodes(".//a");
if (searchTagNodesLinks == null || searchTagNodesLinks.Count == 0)
{
SearchTagNodes = [];
}
else
{
SearchTagNodes = [.. searchTagNodesLinks];
}
}
}
private void InitializeCreatorNodes()
{
HtmlNodeCollection creatorNodes = MakerNode.SelectNodes(".//a[contains(@href, 'keyword_creater')]");
if (creatorNodes == null || creatorNodes.Count == 0)
{
CreatorNodes = [];
}
else
{
CreatorNodes = [.. creatorNodes];
}
}
private void InitializeSalesAndDownloadsNodes()
{
HtmlNodeCollection workInfoBox = RightNode.SelectNodes(".//ul[@class='work_info_box']");
if (workInfoBox != null)
{
HtmlNodeCollection salesDateNodes = workInfoBox[0].SelectNodes(".//li[@class='sales_date']");
if (salesDateNodes != null && salesDateNodes.Count > 0)
{
SalesDateNode = salesDateNodes[0];
}
// TODO: Fix!
//DownloadsNode = RightNode.SelectSingleNode(".//span[@class='_dl_count_" + works[rightsIndex].ProductId + "']");
DownloadsNode = RightNode.SelectSingleNode(".//span[contains(@class, '_dl_count_')]");
}
}
private void InitializeStarRatingNode()
{
var ratingsNode = RightNode.SelectSingleNode(".//li[@class='work_rating']");
if (ratingsNode == null)
return;
StarRatingNode = ratingsNode.SelectSingleNode(".//div[contains(@class, 'star_rating')]");
}
private HtmlNode GetProductTextNode()
{
if (ProductLinkNode.ChildNodes.Count > 1 && ProductLinkNode.ChildNodes[0].Name == "#text")
{
return ProductLinkNode.ChildNodes[0];
}
else
{
return ProductLinkNode;
}
}
private HtmlNode GetExpectedDateNode()
{
HtmlNodeCollection expectedDateNodes = ProductNode.SelectNodes(".//p[@class='expected_date']");
if (expectedDateNodes != null && expectedDateNodes.Count > 0)
{
return expectedDateNodes[0];
}
else
{
return null;
}
}
private void InitializeImageNode()
{
HtmlNode linkNode = ThumbNode.SelectNodes(".//a")[0];
ImageNode = linkNode.SelectNodes(".//img")[0];
}
}

View File

@@ -0,0 +1,7 @@
namespace JSMR.Infrastructure.Scanning.Models;
public class ScannedRating
{
public byte Score { get; set; }
public int Votes { get; set; }
}