Initial implementation of voice works scanning.
This commit is contained in:
127
JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs
Normal file
127
JSMR.Infrastructure/Scanning/DLSiteSearchFilterBuilder.cs
Normal file
@@ -0,0 +1,127 @@
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public class DLSiteSearchFilterBuilder
|
||||
{
|
||||
private readonly List<string> _optionsAnd = [];
|
||||
private readonly List<string> _optionsNot = [];
|
||||
private readonly List<string> _excludedMakers = [];
|
||||
|
||||
private ILocale _locale = new JapaneseLocale();
|
||||
|
||||
private void AddToOptionsAnd(string value)
|
||||
{
|
||||
if (_optionsAnd.Contains(value))
|
||||
return;
|
||||
|
||||
_optionsAnd.Add(value);
|
||||
}
|
||||
|
||||
private void AddToOptionsNot(string value)
|
||||
{
|
||||
if (_optionsNot.Contains(value))
|
||||
return;
|
||||
|
||||
_optionsNot.Add(value);
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder UseLocale(ILocale locale)
|
||||
{
|
||||
_locale = locale;
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder IncludeSupportedLanguage(ISupportedLanguage language)
|
||||
{
|
||||
AddToOptionsAnd(language.Code);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder ExcludeMakers(string[] makerIds)
|
||||
{
|
||||
foreach (var makerId in makerIds)
|
||||
ExcludeMaker(makerId);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder ExcludeMaker(string makerId)
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(makerId))
|
||||
return this;
|
||||
|
||||
string trimmedMakerId = makerId.Trim();
|
||||
|
||||
if (_excludedMakers.Contains(trimmedMakerId))
|
||||
return this;
|
||||
|
||||
_excludedMakers.Add(trimmedMakerId);
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder ExcludePartiallyAIGeneratedWorks()
|
||||
{
|
||||
AddToOptionsNot("AIP");
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public DLSiteSearchFilterBuilder ExcludeAIGeneratedWorks()
|
||||
{
|
||||
AddToOptionsNot("AIG");
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
public string BuildSearchQuery(int pageNumber, int pageSize)
|
||||
{
|
||||
ILocale locale = _locale ?? new JapaneseLocale();
|
||||
|
||||
using (var writer = new StringWriter())
|
||||
{
|
||||
writer.Write($"https://www.dlsite.com/maniax/");
|
||||
writer.Write($"fsr/=/language/{locale.Abbreviation}/");
|
||||
|
||||
writer.Write("sex_category[0]/male/");
|
||||
writer.Write("ana_flg/all/");
|
||||
writer.Write("work_category[0]/doujin/");
|
||||
writer.Write("order[0]/release_d/");
|
||||
writer.Write("work_type_category[0]/audio/");
|
||||
writer.Write("work_type_category_name[0]/ボイス・ASMR/");
|
||||
|
||||
if (_optionsAnd.Count > 0)
|
||||
{
|
||||
writer.Write("options_and_or/and/");
|
||||
|
||||
for (int index = 0; index < _optionsAnd.Count; index++)
|
||||
{
|
||||
writer.Write($"options[{index}]/{_optionsAnd[index]}/");
|
||||
}
|
||||
}
|
||||
|
||||
if (_excludedMakers.Count > 0)
|
||||
{
|
||||
List<string> spamMakers = [.. _excludedMakers.Select(x => "-" + x)];
|
||||
string makerFilterValue = string.Join("+", spamMakers).Trim();
|
||||
writer.Write($"keyword_maker_name/{makerFilterValue}/");
|
||||
}
|
||||
|
||||
for (int index = 0; index < _optionsNot.Count; index++)
|
||||
{
|
||||
writer.Write($"options_not[{index}]/{_optionsNot[index]}/");
|
||||
}
|
||||
|
||||
writer.Write($"per_page/{pageSize}/");
|
||||
writer.Write($"page/{pageNumber}/");
|
||||
writer.Write("show_type/1/");
|
||||
writer.Write($"?locale={locale.Code}");
|
||||
|
||||
return writer.ToString();
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -0,0 +1,52 @@
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public static class DLSiteSearchFilterBuilderExtensions
|
||||
{
|
||||
public static DLSiteSearchFilterBuilder UseDefaultLocale(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.UseLocale(new JapaneseLocale());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder UseEnglishLocale(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.UseLocale(new EnglishLocale());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeJapaneseSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new JapaneseLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeEnglishSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new EnglishLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeChineseSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new ChineseLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeSimplifiedChineseSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new SimplifiedChineseLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeTraditionalChineseSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new TraditionalChineseLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeKoreanSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new KoreanLanguage());
|
||||
}
|
||||
|
||||
public static DLSiteSearchFilterBuilder IncludeAlingualSupportedLanguage(this DLSiteSearchFilterBuilder searchFilterBuilder)
|
||||
{
|
||||
return searchFilterBuilder.IncludeSupportedLanguage(new AlingualLanguage());
|
||||
}
|
||||
}
|
||||
164
JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs
Normal file
164
JSMR.Infrastructure/Scanning/EnglishVoiceWorksScanner.cs
Normal file
@@ -0,0 +1,164 @@
|
||||
using JSMR.Application.Scanning;
|
||||
using JSMR.Infrastructure.Caching;
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||
using JSMR.Infrastructure.Http;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public partial class EnglishVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache)
|
||||
: VoiceWorksScanner(loader, spamCircleCache)
|
||||
{
|
||||
[GeneratedRegex(@"Release: (.*?)[/](\d{2})[/](\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
||||
private static partial Regex SalesDateRegex();
|
||||
|
||||
[GeneratedRegex(@"^(Early|Middle|Late)\s(.*?)\s(\d{4})", RegexOptions.IgnoreCase, "en-US")]
|
||||
private static partial Regex EstimatedDateRegex();
|
||||
|
||||
protected override ILocale Locale => new EnglishLocale();
|
||||
|
||||
protected override ISupportedLanguage[] SupportedLanguages =>
|
||||
[
|
||||
new JapaneseLanguage(),
|
||||
new EnglishLanguage(),
|
||||
new AlingualLanguage()
|
||||
];
|
||||
|
||||
protected override DateTime? GetEstimatedReleaseDate(string expectedDate)
|
||||
{
|
||||
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
|
||||
return null;
|
||||
|
||||
Regex textRegex = EstimatedDateRegex();
|
||||
MatchCollection textMatches = textRegex.Matches(expectedDate);
|
||||
|
||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
||||
return null;
|
||||
|
||||
GroupCollection groups = textMatches[0].Groups;
|
||||
|
||||
int releaseYear = Convert.ToInt32(groups[3].Value);
|
||||
|
||||
int releaseMonth = 1;
|
||||
int releaseDay = 1;
|
||||
|
||||
string releaseTime = groups[1].Value;
|
||||
string releaseMonthText = groups[2].Value;
|
||||
|
||||
switch (releaseTime)
|
||||
{
|
||||
case "Early":
|
||||
releaseDay = 1;
|
||||
break;
|
||||
case "Middle":
|
||||
releaseDay = 11;
|
||||
break;
|
||||
case "Late":
|
||||
releaseDay = 21;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (releaseMonthText)
|
||||
{
|
||||
case "Jan.":
|
||||
releaseMonth = 1;
|
||||
break;
|
||||
case "Feb.":
|
||||
releaseMonth = 2;
|
||||
break;
|
||||
case "Mar.":
|
||||
releaseMonth = 3;
|
||||
break;
|
||||
case "Apr.":
|
||||
releaseMonth = 4;
|
||||
break;
|
||||
case "May.":
|
||||
releaseMonth = 5;
|
||||
break;
|
||||
case "Jun.":
|
||||
releaseMonth = 6;
|
||||
break;
|
||||
case "Jul.":
|
||||
releaseMonth = 7;
|
||||
break;
|
||||
case "Aug.":
|
||||
releaseMonth = 8;
|
||||
break;
|
||||
case "Sep.":
|
||||
releaseMonth = 9;
|
||||
break;
|
||||
case "Oct.":
|
||||
releaseMonth = 10;
|
||||
break;
|
||||
case "Nov.":
|
||||
releaseMonth = 11;
|
||||
break;
|
||||
case "Dec.":
|
||||
releaseMonth = 12;
|
||||
break;
|
||||
}
|
||||
|
||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
||||
}
|
||||
|
||||
protected override DateTime? GetSalesDate(string salesDate)
|
||||
{
|
||||
Regex textRegex = SalesDateRegex();
|
||||
MatchCollection textMatches = textRegex.Matches(salesDate);
|
||||
|
||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
||||
return null;
|
||||
|
||||
string month = textMatches[0].Groups[1].Value;
|
||||
int releaseMonth = -1;
|
||||
|
||||
switch (month)
|
||||
{
|
||||
case "Jan":
|
||||
releaseMonth = 1;
|
||||
break;
|
||||
case "Feb":
|
||||
releaseMonth = 2;
|
||||
break;
|
||||
case "Mar":
|
||||
releaseMonth = 3;
|
||||
break;
|
||||
case "Apr":
|
||||
releaseMonth = 4;
|
||||
break;
|
||||
case "May":
|
||||
releaseMonth = 5;
|
||||
break;
|
||||
case "Jun":
|
||||
releaseMonth = 6;
|
||||
break;
|
||||
case "Jul":
|
||||
releaseMonth = 7;
|
||||
break;
|
||||
case "Aug":
|
||||
releaseMonth = 8;
|
||||
break;
|
||||
case "Sep":
|
||||
releaseMonth = 9;
|
||||
break;
|
||||
case "Oct":
|
||||
releaseMonth = 10;
|
||||
break;
|
||||
case "Nov":
|
||||
releaseMonth = 11;
|
||||
break;
|
||||
case "Dec":
|
||||
releaseMonth = 12;
|
||||
break;
|
||||
}
|
||||
|
||||
if (releaseMonth == -1)
|
||||
return null;
|
||||
|
||||
int releaseYear = Convert.ToInt32(textMatches[0].Groups[3].Value);
|
||||
int releaseDay = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
||||
|
||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
||||
}
|
||||
}
|
||||
74
JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs
Normal file
74
JSMR.Infrastructure/Scanning/JapaneseVoiceWorksScanner.cs
Normal file
@@ -0,0 +1,74 @@
|
||||
using JSMR.Infrastructure.Caching;
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||
using JSMR.Infrastructure.Http;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public class JapaneseVoiceWorksScanner(IHtmlLoader loader, ISpamCircleCache spamCircleCache)
|
||||
: VoiceWorksScanner(loader, spamCircleCache)
|
||||
{
|
||||
protected override ILocale Locale => new JapaneseLocale();
|
||||
|
||||
protected override ISupportedLanguage[] SupportedLanguages =>
|
||||
[
|
||||
new JapaneseLanguage(),
|
||||
new EnglishLanguage(),
|
||||
new TraditionalChineseLanguage(),
|
||||
new SimplifiedChineseLanguage(),
|
||||
new KoreanLanguage(),
|
||||
new AlingualLanguage()
|
||||
];
|
||||
|
||||
protected override DateTime? GetEstimatedReleaseDate(string expectedDate)
|
||||
{
|
||||
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
|
||||
return null;
|
||||
|
||||
Regex textRegex = new Regex("(.*?)年(.*?)月(.*)", RegexOptions.IgnoreCase);
|
||||
MatchCollection textMatches = textRegex.Matches(expectedDate);
|
||||
|
||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
||||
return null;
|
||||
|
||||
int releaseYear = Convert.ToInt32(textMatches[0].Groups[1].Value);
|
||||
int releaseMonth = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
||||
int releaseDay = 1;
|
||||
|
||||
string releaseTime = textMatches[0].Groups[3].Value;
|
||||
|
||||
switch (releaseTime)
|
||||
{
|
||||
case "上旬発売予定":
|
||||
case "上旬 発売予定":
|
||||
releaseDay = 1;
|
||||
break;
|
||||
case "中旬発売予定":
|
||||
case "中旬 発売予定":
|
||||
releaseDay = 11;
|
||||
break;
|
||||
case "下旬発売予定":
|
||||
case "下旬 発売予定":
|
||||
releaseDay = 21;
|
||||
break;
|
||||
}
|
||||
|
||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
||||
}
|
||||
|
||||
protected override DateTime? GetSalesDate(string salesDate)
|
||||
{
|
||||
Regex textRegex = new Regex("販売日: (.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase);
|
||||
MatchCollection textMatches = textRegex.Matches(salesDate);
|
||||
|
||||
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
|
||||
return null;
|
||||
|
||||
int releaseYear = Convert.ToInt32(textMatches[0].Groups[1].Value);
|
||||
int releaseMonth = Convert.ToInt32(textMatches[0].Groups[2].Value);
|
||||
int releaseDay = Convert.ToInt32(textMatches[0].Groups[3].Value);
|
||||
|
||||
return new DateTime(releaseYear, releaseMonth, releaseDay);
|
||||
}
|
||||
}
|
||||
37
JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs
Normal file
37
JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs
Normal file
@@ -0,0 +1,37 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning.Models;
|
||||
|
||||
public class DLSiteHtmlDocument
|
||||
{
|
||||
private readonly HtmlNodeCollection _workColumns;
|
||||
private readonly HtmlNodeCollection _workColumnRights;
|
||||
private readonly HtmlNodeCollection _workThumbs;
|
||||
|
||||
public HtmlNode PageTotalNode { get; }
|
||||
|
||||
public DLSiteHtmlDocument(HtmlDocument document)
|
||||
{
|
||||
_workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']");
|
||||
_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']");
|
||||
_workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']");
|
||||
|
||||
PageTotalNode = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0];
|
||||
}
|
||||
|
||||
public List<DLSiteHtmlNode> GetDLSiteNodes()
|
||||
{
|
||||
var nodes = new List<DLSiteHtmlNode>();
|
||||
|
||||
if (_workColumns.Count != _workColumnRights.Count || _workColumns.Count != _workThumbs.Count)
|
||||
throw new Exception("Work column node counts do not match!");
|
||||
|
||||
for (int i = 0; i < _workColumns.Count; i++)
|
||||
{
|
||||
var node = new DLSiteHtmlNode(_workColumns[i], _workColumnRights[i], _workThumbs[i]);
|
||||
nodes.Add(node);
|
||||
}
|
||||
|
||||
return nodes;
|
||||
}
|
||||
}
|
||||
156
JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs
Normal file
156
JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs
Normal file
@@ -0,0 +1,156 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning.Models;
|
||||
|
||||
public class DLSiteHtmlNode
|
||||
{
|
||||
public HtmlNode LeftNode { get; }
|
||||
public HtmlNode RightNode { get; }
|
||||
public HtmlNode ThumbNode { get; }
|
||||
|
||||
public HtmlNode ProductNode { get; private set; }
|
||||
public HtmlNode ProductLinkNode { get; private set; }
|
||||
public HtmlNode ProductTextNode { get; private set; }
|
||||
public HtmlNode DescriptionNode { get; private set; }
|
||||
public HtmlNode MakerNode { get; private set; }
|
||||
public HtmlNode MakerLinkNode { get; private set; }
|
||||
public HtmlNode SalesDateNode { get; private set; }
|
||||
public HtmlNode ExpectedDateNode { get; private set; }
|
||||
public HtmlNode DownloadsNode { get; private set; }
|
||||
public HtmlNode StarRatingNode { get; private set; }
|
||||
public HtmlNode ImageNode { get; private set; }
|
||||
public List<HtmlNode> GenreNodes { get; private set; }
|
||||
public List<HtmlNode> SearchTagNodes { get; private set; }
|
||||
public List<HtmlNode> CreatorNodes { get; private set; }
|
||||
|
||||
public DLSiteHtmlNode(HtmlNode leftNode, HtmlNode rightNode, HtmlNode thumbNode)
|
||||
{
|
||||
LeftNode = leftNode;
|
||||
RightNode = rightNode;
|
||||
ThumbNode = thumbNode;
|
||||
|
||||
ProductNode = LeftNode.SelectNodes(".//dt[@class='work_name']")[0];
|
||||
ProductLinkNode = ProductNode.SelectNodes(".//a")[0];
|
||||
ProductTextNode = GetProductTextNode();
|
||||
|
||||
DescriptionNode = LeftNode.SelectNodes(".//dd[@class='work_text']")[0];
|
||||
|
||||
MakerNode = LeftNode.SelectNodes(".//dd[@class='maker_name']")[0];
|
||||
MakerLinkNode = MakerNode.SelectNodes(".//a[contains(@href, 'maker_id')]")[0];
|
||||
|
||||
ExpectedDateNode = GetExpectedDateNode();
|
||||
|
||||
InitializeGenreNodes();
|
||||
InitializeSearchTagNodes();
|
||||
InitializeCreatorNodes();
|
||||
InitializeSalesAndDownloadsNodes();
|
||||
InitializeStarRatingNode();
|
||||
InitializeImageNode();
|
||||
}
|
||||
|
||||
private void InitializeGenreNodes()
|
||||
{
|
||||
HtmlNode genreNode = LeftNode.SelectNodes(".//dd[@class='work_genre']")[0];
|
||||
|
||||
GenreNodes = [.. genreNode.SelectNodes(".//span")];
|
||||
}
|
||||
|
||||
private void InitializeSearchTagNodes()
|
||||
{
|
||||
HtmlNodeCollection searchTagNodes = LeftNode.SelectNodes(".//dd[@class='search_tag']");
|
||||
|
||||
if (searchTagNodes == null || searchTagNodes.Count == 0)
|
||||
{
|
||||
SearchTagNodes = [];
|
||||
}
|
||||
else
|
||||
{
|
||||
HtmlNodeCollection searchTagNodesLinks = searchTagNodes[0].SelectNodes(".//a");
|
||||
|
||||
if (searchTagNodesLinks == null || searchTagNodesLinks.Count == 0)
|
||||
{
|
||||
SearchTagNodes = [];
|
||||
}
|
||||
else
|
||||
{
|
||||
SearchTagNodes = [.. searchTagNodesLinks];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private void InitializeCreatorNodes()
|
||||
{
|
||||
HtmlNodeCollection creatorNodes = MakerNode.SelectNodes(".//a[contains(@href, 'keyword_creater')]");
|
||||
|
||||
if (creatorNodes == null || creatorNodes.Count == 0)
|
||||
{
|
||||
CreatorNodes = [];
|
||||
}
|
||||
else
|
||||
{
|
||||
CreatorNodes = [.. creatorNodes];
|
||||
}
|
||||
}
|
||||
|
||||
private void InitializeSalesAndDownloadsNodes()
|
||||
{
|
||||
HtmlNodeCollection workInfoBox = RightNode.SelectNodes(".//ul[@class='work_info_box']");
|
||||
|
||||
if (workInfoBox != null)
|
||||
{
|
||||
HtmlNodeCollection salesDateNodes = workInfoBox[0].SelectNodes(".//li[@class='sales_date']");
|
||||
|
||||
if (salesDateNodes != null && salesDateNodes.Count > 0)
|
||||
{
|
||||
SalesDateNode = salesDateNodes[0];
|
||||
}
|
||||
|
||||
// TODO: Fix!
|
||||
//DownloadsNode = RightNode.SelectSingleNode(".//span[@class='_dl_count_" + works[rightsIndex].ProductId + "']");
|
||||
DownloadsNode = RightNode.SelectSingleNode(".//span[contains(@class, '_dl_count_')]");
|
||||
}
|
||||
}
|
||||
|
||||
private void InitializeStarRatingNode()
|
||||
{
|
||||
var ratingsNode = RightNode.SelectSingleNode(".//li[@class='work_rating']");
|
||||
|
||||
if (ratingsNode == null)
|
||||
return;
|
||||
|
||||
StarRatingNode = ratingsNode.SelectSingleNode(".//div[contains(@class, 'star_rating')]");
|
||||
}
|
||||
|
||||
private HtmlNode GetProductTextNode()
|
||||
{
|
||||
if (ProductLinkNode.ChildNodes.Count > 1 && ProductLinkNode.ChildNodes[0].Name == "#text")
|
||||
{
|
||||
return ProductLinkNode.ChildNodes[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
return ProductLinkNode;
|
||||
}
|
||||
}
|
||||
|
||||
private HtmlNode GetExpectedDateNode()
|
||||
{
|
||||
HtmlNodeCollection expectedDateNodes = ProductNode.SelectNodes(".//p[@class='expected_date']");
|
||||
|
||||
if (expectedDateNodes != null && expectedDateNodes.Count > 0)
|
||||
{
|
||||
return expectedDateNodes[0];
|
||||
}
|
||||
else
|
||||
{
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private void InitializeImageNode()
|
||||
{
|
||||
HtmlNode linkNode = ThumbNode.SelectNodes(".//a")[0];
|
||||
|
||||
ImageNode = linkNode.SelectNodes(".//img")[0];
|
||||
}
|
||||
}
|
||||
7
JSMR.Infrastructure/Scanning/Models/ScannedRating.cs
Normal file
7
JSMR.Infrastructure/Scanning/Models/ScannedRating.cs
Normal file
@@ -0,0 +1,7 @@
|
||||
namespace JSMR.Infrastructure.Scanning.Models;
|
||||
|
||||
public class ScannedRating
|
||||
{
|
||||
public byte Score { get; set; }
|
||||
public int Votes { get; set; }
|
||||
}
|
||||
49
JSMR.Infrastructure/Scanning/ScannerUtilities.cs
Normal file
49
JSMR.Infrastructure/Scanning/ScannerUtilities.cs
Normal file
@@ -0,0 +1,49 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Web;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public static class ScannerUtilities
|
||||
{
|
||||
public static List<string> GetStringListFromNodes(List<HtmlNode> nodes)
|
||||
{
|
||||
return nodes
|
||||
.Where(node => string.IsNullOrEmpty(node.InnerHtml) == false)
|
||||
.Select(node => HttpUtility.HtmlDecode(node.InnerHtml))
|
||||
.ToList();
|
||||
}
|
||||
|
||||
public static string GetDecodedText(HtmlNode node)
|
||||
{
|
||||
if (node == null)
|
||||
return string.Empty;
|
||||
|
||||
if (string.IsNullOrWhiteSpace(node.InnerHtml))
|
||||
return string.Empty;
|
||||
|
||||
return HttpUtility.HtmlDecode(node.InnerHtml.Replace("\n", "")).Trim();
|
||||
}
|
||||
|
||||
public static string GetTextBetween(string text, string startText, string endText)
|
||||
{
|
||||
int startIndex = text.IndexOf(startText) + startText.Length;
|
||||
int endIndex = text.IndexOf(endText);
|
||||
|
||||
int length = endIndex - startIndex;
|
||||
|
||||
if (length <= 0)
|
||||
return "";
|
||||
|
||||
return text.Substring(startIndex, length);
|
||||
}
|
||||
|
||||
public static string GetImageSource(HtmlNode imageNode)
|
||||
{
|
||||
string imageSource = imageNode.GetAttributeValue("src", "");
|
||||
|
||||
if (string.IsNullOrEmpty(imageSource))
|
||||
imageSource = imageNode.GetAttributeValue("data-src", "");
|
||||
|
||||
return imageSource;
|
||||
}
|
||||
}
|
||||
164
JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs
Normal file
164
JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs
Normal file
@@ -0,0 +1,164 @@
|
||||
using HtmlAgilityPack;
|
||||
using JSMR.Application.Scanning;
|
||||
using JSMR.Application.Scanning.Contracts;
|
||||
using JSMR.Application.Scanning.Ports;
|
||||
using JSMR.Infrastructure.Caching;
|
||||
using JSMR.Infrastructure.Common.Locales;
|
||||
using JSMR.Infrastructure.Common.SupportedLanguages;
|
||||
using JSMR.Infrastructure.Http;
|
||||
using JSMR.Infrastructure.Scanning.Models;
|
||||
using System.Globalization;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
|
||||
public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader, ISpamCircleCache spamCircleCache) : IVoiceWorksScanner
|
||||
{
|
||||
protected abstract ILocale Locale { get; }
|
||||
protected abstract ISupportedLanguage[] SupportedLanguages { get; }
|
||||
|
||||
protected abstract DateTime? GetEstimatedReleaseDate(string expectedDate);
|
||||
protected abstract DateTime? GetSalesDate(string salesDate);
|
||||
|
||||
protected virtual bool ExcludeSpamCircles => true;
|
||||
protected virtual bool ExcludePartiallyAIGeneratedWorks => true;
|
||||
protected virtual bool ExcludeAIGeneratedWorks => true;
|
||||
|
||||
public async Task<IReadOnlyList<DLSiteWork>> ScanPageAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken = default)
|
||||
{
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(request, cancellationToken);
|
||||
List<DLSiteHtmlNode> nodes = document.GetDLSiteNodes();
|
||||
|
||||
return GetDLSiteWorks(nodes);
|
||||
}
|
||||
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
string url = await GetUrlAsync(request, cancellationToken);
|
||||
|
||||
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
|
||||
|
||||
return new DLSiteHtmlDocument(document);
|
||||
}
|
||||
|
||||
protected virtual async ValueTask<string> GetUrlAsync(ScanVoiceWorksRequest request, CancellationToken cancellationToken)
|
||||
{
|
||||
DLSiteSearchFilterBuilder filterBuilder = new();
|
||||
|
||||
foreach (ISupportedLanguage supprotedLanguage in SupportedLanguages)
|
||||
{
|
||||
filterBuilder.IncludeSupportedLanguage(supprotedLanguage);
|
||||
}
|
||||
|
||||
if (ExcludeSpamCircles)
|
||||
{
|
||||
string[] makerIds = await spamCircleCache.GetAsync(cancellationToken);
|
||||
|
||||
foreach (string makerId in makerIds)
|
||||
filterBuilder.ExcludeMaker(makerId);
|
||||
}
|
||||
|
||||
if (ExcludePartiallyAIGeneratedWorks)
|
||||
filterBuilder.ExcludePartiallyAIGeneratedWorks();
|
||||
|
||||
if (ExcludeAIGeneratedWorks)
|
||||
filterBuilder.ExcludeAIGeneratedWorks();
|
||||
|
||||
return filterBuilder.BuildSearchQuery(request.PageNumber, request.PageSize);
|
||||
}
|
||||
|
||||
private List<DLSiteWork> GetDLSiteWorks(List<DLSiteHtmlNode> nodes)
|
||||
{
|
||||
var works = new List<DLSiteWork>();
|
||||
//var spamCircles = SpamCircleCache.Get();
|
||||
|
||||
foreach (DLSiteHtmlNode node in nodes)
|
||||
{
|
||||
DLSiteWork work = GetDLSiteWork(node);
|
||||
|
||||
//if (spamCircles.Any(circle => circle.MakerId == work.MakerId))
|
||||
// continue;
|
||||
|
||||
works.Add(work);
|
||||
}
|
||||
|
||||
return works;
|
||||
}
|
||||
|
||||
private DLSiteWork GetDLSiteWork(DLSiteHtmlNode node)
|
||||
{
|
||||
DLSiteWork work = new();
|
||||
|
||||
work.ProductName = ScannerUtilities.GetDecodedText(node.ProductTextNode);
|
||||
work.ProductUrl = node.ProductLinkNode.Attributes["href"].Value;
|
||||
work.ProductId = ScannerUtilities.GetTextBetween(work.ProductUrl, "product_id/", ".html");
|
||||
work.Maker = ScannerUtilities.GetDecodedText(node.MakerLinkNode);
|
||||
|
||||
string makerUrl = node.MakerLinkNode.Attributes["href"].Value;
|
||||
work.MakerId = ScannerUtilities.GetTextBetween(makerUrl, "maker_id/", ".html");
|
||||
|
||||
work.Description = ScannerUtilities.GetDecodedText(node.DescriptionNode);
|
||||
|
||||
if (node.ExpectedDateNode != null)
|
||||
{
|
||||
work.ExpectedDate = GetEstimatedReleaseDate(node.ExpectedDateNode.InnerHtml.Trim());
|
||||
}
|
||||
|
||||
if (node.SalesDateNode != null)
|
||||
{
|
||||
work.SalesDate = GetSalesDate(node.SalesDateNode.InnerHtml);
|
||||
}
|
||||
|
||||
if (node.DownloadsNode != null)
|
||||
{
|
||||
work.Downloads = int.Parse(node.DownloadsNode.InnerHtml, NumberStyles.AllowThousands);
|
||||
}
|
||||
|
||||
var rating = GetScannedRating(node.StarRatingNode);
|
||||
|
||||
if (rating != null)
|
||||
{
|
||||
work.StarRating = rating.Score;
|
||||
work.Votes = rating.Votes;
|
||||
}
|
||||
|
||||
work.Genres = ScannerUtilities.GetStringListFromNodes(node.GenreNodes);
|
||||
work.Tags = ScannerUtilities.GetStringListFromNodes(node.SearchTagNodes);
|
||||
work.Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes);
|
||||
|
||||
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
|
||||
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
|
||||
|
||||
work.SmallImageUrl = imageSource;
|
||||
work.ImageUrl = imageUrl;
|
||||
work.Type = imageUrl.Contains("ana/doujin") ? "Ana" : "Work";
|
||||
|
||||
return work;
|
||||
}
|
||||
|
||||
private static ScannedRating? GetScannedRating(HtmlNode starRatingNode)
|
||||
{
|
||||
if (starRatingNode == null)
|
||||
return null;
|
||||
|
||||
string voteText = starRatingNode.InnerText;
|
||||
|
||||
string? ratingClass = starRatingNode.GetClasses().FirstOrDefault(classNames =>
|
||||
classNames.Contains("star_") && classNames != "star_rating");
|
||||
|
||||
if (string.IsNullOrEmpty(ratingClass))
|
||||
return null;
|
||||
|
||||
Regex votesRegex = new Regex(@"\((.*?)\)", RegexOptions.IgnoreCase);
|
||||
MatchCollection voteMatches = votesRegex.Matches(voteText);
|
||||
|
||||
if (voteMatches.Count == 0 || voteMatches[0].Groups.Count < 2)
|
||||
return null;
|
||||
|
||||
ScannedRating rating = new ScannedRating();
|
||||
rating.Score = Convert.ToByte(ratingClass.Replace("star_", ""));
|
||||
rating.Votes = int.Parse(voteMatches[0].Groups[1].Value, NumberStyles.AllowThousands);
|
||||
|
||||
return rating;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user