Updated scanner/upsert logic.

This commit is contained in:
2025-09-17 09:40:24 -04:00
parent 3b2a0e3491
commit db0c3349a2
14 changed files with 434 additions and 66 deletions

View File

@@ -5,8 +5,14 @@ using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning;
public class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader)
public partial class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(loader)
{
[GeneratedRegex("(.*?)年(.*?)月(.*)", RegexOptions.IgnoreCase, "en-US")]
private static partial Regex EstimatedDateRegex();
[GeneratedRegex("販売日: (.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase, "en-US")]
private static partial Regex SalesDateRegex();
protected override ILocale Locale => new JapaneseLocale();
protected override ISupportedLanguage[] SupportedLanguages =>
@@ -24,7 +30,7 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(l
if (expectedDate.Contains("販売中") || expectedDate.Contains("発売予定未定"))
return null;
Regex textRegex = new Regex("(.*?)年(.*?)月(.*)", RegexOptions.IgnoreCase);
Regex textRegex = EstimatedDateRegex();
MatchCollection textMatches = textRegex.Matches(expectedDate);
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)
@@ -57,7 +63,7 @@ public class JapaneseVoiceWorksScanner(IHtmlLoader loader) : VoiceWorksScanner(l
protected override DateOnly? GetSalesDate(string salesDate)
{
Regex textRegex = new Regex("販売日:&nbsp;(.*?)年(.*?)月(.*)日", RegexOptions.IgnoreCase);
Regex textRegex = SalesDateRegex();
MatchCollection textMatches = textRegex.Matches(salesDate);
if (textMatches.Count == 0 || textMatches[0].Groups.Count < 4)

View File

@@ -70,17 +70,28 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
private DLSiteWork GetDLSiteWork(DLSiteHtmlNode node)
{
DLSiteWork work = new();
work.ProductName = ScannerUtilities.GetDecodedText(node.ProductTextNode);
work.ProductUrl = node.ProductLinkNode.Attributes["href"].Value;
work.ProductId = ScannerUtilities.GetTextBetween(work.ProductUrl, "product_id/", ".html");
work.Maker = ScannerUtilities.GetDecodedText(node.MakerLinkNode);
string productUrl = node.ProductLinkNode.Attributes["href"].Value;
string makerUrl = node.MakerLinkNode.Attributes["href"].Value;
work.MakerId = ScannerUtilities.GetTextBetween(makerUrl, "maker_id/", ".html");
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
ScannedRating? rating = GetScannedRating(node.StarRatingNode);
work.Description = ScannerUtilities.GetDecodedText(node.DescriptionNode);
DLSiteWork work = new()
{
ProductName = ScannerUtilities.GetDecodedText(node.ProductTextNode),
Description = ScannerUtilities.GetDecodedText(node.DescriptionNode),
ProductId = ScannerUtilities.GetTextBetween(productUrl, "product_id/", ".html"),
Maker = ScannerUtilities.GetDecodedText(node.MakerLinkNode),
MakerId = ScannerUtilities.GetTextBetween(makerUrl, "maker_id/", ".html"),
Genres = ScannerUtilities.GetStringListFromNodes(node.GenreNodes),
Tags = ScannerUtilities.GetStringListFromNodes(node.SearchTagNodes),
Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes),
SmallImageUrl = imageSource,
ImageUrl = imageUrl,
Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released,
StarRating = rating?.Score,
Votes = rating?.Votes
};
if (node.ExpectedDateNode != null)
{
@@ -97,25 +108,6 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
work.Downloads = int.Parse(node.DownloadsNode.InnerHtml, NumberStyles.AllowThousands);
}
var rating = GetScannedRating(node.StarRatingNode);
if (rating != null)
{
work.StarRating = rating.Score;
work.Votes = rating.Votes;
}
work.Genres = ScannerUtilities.GetStringListFromNodes(node.GenreNodes);
work.Tags = ScannerUtilities.GetStringListFromNodes(node.SearchTagNodes);
work.Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes);
string imageSource = ScannerUtilities.GetImageSource(node.ImageNode);
string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif");
work.SmallImageUrl = imageSource;
work.ImageUrl = imageUrl;
work.Type = imageUrl.Contains("ana/doujin") ? "Ana" : "Work";
return work;
}
@@ -138,9 +130,11 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
if (voteMatches.Count == 0 || voteMatches[0].Groups.Count < 2)
return null;
ScannedRating rating = new ScannedRating();
rating.Score = Convert.ToByte(ratingClass.Replace("star_", ""));
rating.Votes = int.Parse(voteMatches[0].Groups[1].Value, NumberStyles.AllowThousands);
ScannedRating rating = new()
{
Score = Convert.ToByte(ratingClass.Replace("star_", "")),
Votes = int.Parse(voteMatches[0].Groups[1].Value, NumberStyles.AllowThousands)
};
return rating;
}