diff --git a/JSMR.Application/Scanning/Contracts/DLSiteWork.cs b/JSMR.Application/Scanning/Contracts/DLSiteWork.cs index 203f080..3ecb9a8 100644 --- a/JSMR.Application/Scanning/Contracts/DLSiteWork.cs +++ b/JSMR.Application/Scanning/Contracts/DLSiteWork.cs @@ -4,7 +4,7 @@ namespace JSMR.Application.Scanning.Contracts; public class DLSiteWork { - public DLSiteWorkType Type { get; set; } + //public DLSiteWorkType Type { get; set; } public DLSiteWorkCategory Category { get; set; } public required string ProductName { get; set; } public required string ProductId { get; set; } diff --git a/JSMR.Application/Scanning/Contracts/DLSiteWorkType.cs b/JSMR.Application/Scanning/Contracts/DLSiteWorkType.cs index c00fc86..94ce8b8 100644 --- a/JSMR.Application/Scanning/Contracts/DLSiteWorkType.cs +++ b/JSMR.Application/Scanning/Contracts/DLSiteWorkType.cs @@ -1,7 +1,7 @@ namespace JSMR.Application.Scanning.Contracts; -public enum DLSiteWorkType -{ - Released, - Announced -} \ No newline at end of file +//public enum DLSiteWorkType +//{ +// Released, +// Announced +//} \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs b/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs index beda30b..ce8ace8 100644 --- a/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs +++ b/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlDocument.cs @@ -2,37 +2,26 @@ namespace JSMR.Infrastructure.Scanning.Models; -public class DLSiteHtmlDocument +public class DLSiteHtmlDocument(HtmlDocument document) { - private readonly HtmlNodeCollection _workColumns; - private readonly HtmlNodeCollection _workColumnRights; - private readonly HtmlNodeCollection _workThumbs; + private readonly HtmlNodeCollection _workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']"); + private readonly HtmlNodeCollection _workColumnRights = document.DocumentNode.SelectNodes("//td[contains(@class, 'work_1col_right')]"); + private readonly HtmlNodeCollection _workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']"); + public HtmlNode PageTotalNode { get; } = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0]; - public HtmlNode PageTotalNode { get; } - - public DLSiteHtmlDocument(HtmlDocument document) + public DLSiteHtmlNode[] GetDLSiteNodes() { - _workColumns = document.DocumentNode.SelectNodes("//dl[@class='work_1col']"); - //_workColumnRights = document.DocumentNode.SelectNodes("//td[@class='work_1col_right']"); - _workColumnRights = document.DocumentNode.SelectNodes("//td[contains(@class, 'work_1col_right')]"); - _workThumbs = document.DocumentNode.SelectNodes("//div[@class='work_thumb']"); - - PageTotalNode = document.DocumentNode.SelectNodes("//div[@class='page_total']/strong")[0]; - } - - public List GetDLSiteNodes() - { - var nodes = new List(); + List nodes = []; if (_workColumns.Count != _workColumnRights.Count || _workColumns.Count != _workThumbs.Count) throw new Exception("Work column node counts do not match!"); for (int i = 0; i < _workColumns.Count; i++) { - var node = new DLSiteHtmlNode(_workColumns[i], _workColumnRights[i], _workThumbs[i]); + DLSiteHtmlNode node = new(_workColumns[i], _workColumnRights[i], _workThumbs[i]); nodes.Add(node); } - return nodes; + return [.. nodes]; } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs b/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs index 64990f0..2145ba2 100644 --- a/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs +++ b/JSMR.Infrastructure/Scanning/Models/DLSiteHtmlNode.cs @@ -19,7 +19,8 @@ public class DLSiteHtmlNode public HtmlNode? SalesDateNode { get; private set; } public HtmlNode DownloadsNode { get; private set; } public HtmlNode? StarRatingNode { get; private set; } - public HtmlNode ImageNode { get; private set; } + public HtmlNode? ImageNode { get; private set; } + public HtmlNode? ThumbWithNgFilterBlockNode { get; private set; } public HtmlNode[] GenreNodes { get; private set; } public HtmlNode[] SearchTagNodes { get; private set; } public HtmlNode[] CreatorNodes { get; private set; } @@ -55,7 +56,8 @@ public class DLSiteHtmlNode //InitializeSalesAndDownloadsNodes(); StarRatingNode = GetStarRatingNode(); - ImageNode = GetImageNode(); + ImageNode = TryGetImageNode(); + ThumbWithNgFilterBlockNode = ThumbNode.SelectSingleNode(".//thumb-with-ng-filter-block"); } private HtmlNode[] GetGenreNodes() @@ -165,10 +167,13 @@ public class DLSiteHtmlNode // } //} - private HtmlNode GetImageNode() + private HtmlNode? TryGetImageNode() { - HtmlNode linkNode = ThumbNode.SelectNodes(".//a")[0]; + HtmlNode? linkNode = ThumbNode.SelectSingleNode(".//a"); - return linkNode.SelectNodes(".//img")[0]; + if (linkNode is null) + return null; + + return linkNode.SelectSingleNode(".//img"); } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/ScannerUtilities.cs b/JSMR.Infrastructure/Scanning/ScannerUtilities.cs index 86eae57..8601fb0 100644 --- a/JSMR.Infrastructure/Scanning/ScannerUtilities.cs +++ b/JSMR.Infrastructure/Scanning/ScannerUtilities.cs @@ -1,4 +1,5 @@ using HtmlAgilityPack; +using System.Text.Json; using System.Web; namespace JSMR.Infrastructure.Scanning; @@ -45,4 +46,26 @@ public static class ScannerUtilities return imageSource; } + + public static string[] ParseJavaScriptArray(string value) + { + try + { + string json = NormalizeJavaScriptArray(value); + + return JsonSerializer.Deserialize(json) ?? []; + } + catch + { + return [.. value + .Trim('[', ']') + .Split(',', StringSplitOptions.RemoveEmptyEntries) + .Select(x => x.Trim().Trim('\'', '"'))]; + } + } + + private static string NormalizeJavaScriptArray(string input) + { + return input.Trim().Replace('\'', '"'); + } } \ No newline at end of file diff --git a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs index f24df70..42189bb 100644 --- a/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs +++ b/JSMR.Infrastructure/Scanning/VoiceWorksScanner.cs @@ -7,6 +7,7 @@ using JSMR.Domain.ValueObjects; using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Scanning.Models; using System.Globalization; +using System.Text.Json; using System.Text.RegularExpressions; namespace JSMR.Infrastructure.Scanning; @@ -22,7 +23,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca public async Task> ScanPageAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken = default) { DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken); - List nodes = document.GetDLSiteNodes(); + DLSiteHtmlNode[] nodes = document.GetDLSiteNodes(); return GetDLSiteWorks(nodes, options); } @@ -52,7 +53,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize); } - private List GetDLSiteWorks(List nodes, VoiceWorkScanOptions options) + private List GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options) { var works = new List(); @@ -73,8 +74,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca { string productUrl = node.ProductLinkNode.Attributes["href"].Value; string makerUrl = node.MakerLinkNode.Attributes["href"].Value; - string imageSource = ScannerUtilities.GetImageSource(node.ImageNode); - string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif"); + (string imageSource, string imageUrl) = TryGetImageSourceAndUrl(node); ScannedRating? rating = GetScannedRating(node.StarRatingNode); DLSiteWork work = new() @@ -89,7 +89,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca Creators = ScannerUtilities.GetStringListFromNodes(node.CreatorNodes), SmallImageUrl = imageSource, ImageUrl = imageUrl, - Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released, + //Type = imageUrl.Contains("ana/doujin") ? DLSiteWorkType.Announced : DLSiteWorkType.Released, StarRating = rating?.Score, Votes = rating?.Votes, AgeRating = GetAgeRating(node.GenreNodes) @@ -113,6 +113,36 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca return work; } + private static (string, string) TryGetImageSourceAndUrl(DLSiteHtmlNode node) + { + if (node.ThumbWithNgFilterBlockNode is not null) + { + string candidates = node.ThumbWithNgFilterBlockNode.GetAttributeValue(":thumb-candidates", string.Empty); + string[] imageUrls = ScannerUtilities.ParseJavaScriptArray(candidates); + + if (imageUrls.Length == 0) + { + throw new Exception("No thumb candidartes found"); + } + + string imageSource = imageUrls[0]; + string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif"); + + return (imageSource, imageUrl); + } + else if (node.ImageNode is not null) + { + string imageSource = ScannerUtilities.GetImageSource(node.ImageNode); + string imageUrl = imageSource.Replace("_sam.jpg", "_main.jpg").Replace("_sam.gif", "_main.gif"); + + return (imageSource, imageUrl); + } + else + { + throw new Exception("Unable to find image source and/or url"); + } + } + private static AgeRating GetAgeRating(HtmlNode[] genreNodes) { List genres = ScannerUtilities.GetStringListFromNodes(genreNodes); diff --git a/JSMR.Tests/JSMR.Tests.csproj b/JSMR.Tests/JSMR.Tests.csproj index 60b932b..ec66cbf 100644 --- a/JSMR.Tests/JSMR.Tests.csproj +++ b/JSMR.Tests/JSMR.Tests.csproj @@ -15,6 +15,7 @@ + diff --git a/JSMR.Tests/Scanning/Japanese-Page-Updated.html b/JSMR.Tests/Scanning/Japanese-Page-Updated.html new file mode 100644 index 0000000..b49e524 --- /dev/null +++ b/JSMR.Tests/Scanning/Japanese-Page-Updated.html @@ -0,0 +1,183 @@ + + +
+
+
+
+
+
+
+ 6670 + 件中 + 1~30 + 件目 +
+
+
+
+ + + + + + + + + + + + + + + + + +
+
+
+ + +
+ 珈琲屋 綴 / いつもいつでも〜Alone with you〜 [喫茶綴] +
+
+ +
+
+
+

2026年12月下旬 発売予定

+
+ + 珈琲屋 綴 / いつもいつでも〜Alone with you〜 + +
+
+ 喫茶綴 + / + + 野上菜月 + +
+
+ 珈琲に特化した喫茶店、喫茶綴、外伝。『珈琲屋 綴』の従業員、綴明日菜が、大好きな珈琲と、貴方との時間を大切に育みます。珈琲に特化した喫茶店、喫茶綴の外伝です。CV:野上菜月様 +
+
+ 全年齢 + +
+
+ ASMR + 癒し + オールハッピー + バイノーラル/ダミヘ + 日常/生活 + 耳かき +
+
+
+
    +
  • + 予告開始日: 2026年01月01日 +
  • +
  • +
    +
  • +
+ +
+
+
+ +
+ + +
+ +
+
+ +

+ 2026年12月下旬 発売予定 + +

+ +
+ + + + + +
+ アダルトグッズショップの店長にオナ禁でオモチャにされる話 +
+
+ 平たい胸族 +
+ + +
アダルトグッズショップでダウナーなセンパイと仕事中にオナ禁サポートをしてサボっていたことが店長にバレてしまった。今度はセンパイの詩乃と店長のミチル、2人にオナ禁でオモチャにされることになってしまった。
+ +
+ + +
+ +
+ + ASMR + バイノーラル/ダミヘ + 色仕掛け + 浮気 + 百合 + レズ/女同士 + ツルペタ + 貧乳/微乳 + +
+ +
+
+
    +
  • + 予告開始日: 2025年05月15日 +
  • +
  • +
    +
  • +
+ +
+
+
+
+
+
+ + \ No newline at end of file diff --git a/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs index c97bd77..da63d90 100644 --- a/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs +++ b/JSMR.Tests/Scanning/VoiceWorkScannerTests.cs @@ -49,10 +49,58 @@ public class VoiceWorkScannerTests result[0].Creators.ShouldBe(["柚木つばめ"]); result[0].Genres.ShouldBe(["体験版"]); result[0].Tags.ShouldBe(["バイノーラル/ダミヘ", "手コキ", "足コキ", "パイズリ", "言葉責め", "焦らし", "乳首責め", "本番なし"]); - result[0].Type.ShouldBe(DLSiteWorkType.Released); result[0].Downloads.ShouldBe(1220); } + [Fact] + public async Task Scan_With_Updated_Japanese_Locale() + { + string html = await ReadResourceAsync("Japanese-Page-Updated.html"); + + IHttpService httpService = Substitute.For(); + + httpService.GetStringAsync(Arg.Any(), CancellationToken.None) + .Returns(Task.FromResult(html)); + + HtmlLoader loader = new(httpService); + JapaneseVoiceWorksScanner scanner = new(loader); + + VoiceWorkScanOptions options = new( + PageNumber: 1, + PageSize: 100, + ExcludeAIGeneratedWorks: true, + ExcludePartiallyAIGeneratedWorks: true, + ExcludedMakerIds: [] + ); + + var result = await scanner.ScanPageAsync(options, CancellationToken.None); + + result.Count.ShouldBe(2); + + result[0].SalesDate.ShouldBeNull(); + result[0].ExpectedDate.ShouldBe(new DateOnly(2026, 12, 21)); + result[0].ProductId.ShouldBe("RJ01536422"); + result[0].ProductName.ShouldBe("珈琲屋 綴 / いつもいつでも〜Alone with you〜"); + result[0].Description.ShouldBe("珈琲に特化した喫茶店、喫茶綴、外伝。『珈琲屋 綴』の従業員、綴明日菜が、大好きな珈琲と、貴方との時間を大切に育みます。珈琲に特化した喫茶店、喫茶綴の外伝です。CV:野上菜月様"); + result[0].Maker.ShouldBe("喫茶綴"); + result[0].MakerId.ShouldBe("RG36156"); + result[0].Creators.ShouldBe(["野上菜月"]); + result[0].Genres.ShouldBe(["全年齢"]); + result[0].Tags.ShouldBe(["ASMR", "癒し", "オールハッピー", "バイノーラル/ダミヘ", "日常/生活", "耳かき"]); + // TODO: Wishlist count? + + result[1].SalesDate.ShouldBeNull(); + result[1].ExpectedDate.ShouldBe(new DateOnly(2026, 12, 21)); + result[1].ProductId.ShouldBe("RJ01393816"); + result[1].ProductName.ShouldBe("アダルトグッズショップの店長にオナ禁でオモチャにされる話"); + result[1].Description.ShouldBe("アダルトグッズショップでダウナーなセンパイと仕事中にオナ禁サポートをしてサボっていたことが店長にバレてしまった。今度はセンパイの詩乃と店長のミチル、2人にオナ禁でオモチャにされることになってしまった。"); + result[1].Maker.ShouldBe("平たい胸族"); + result[1].MakerId.ShouldBe("RG01044380"); + result[1].Creators.ShouldBe([]); + result[1].Genres.ShouldBe([]); + result[1].Tags.ShouldBe(["ASMR", "バイノーラル/ダミヘ", "色仕掛け", "浮気", "百合", "レズ/女同士", "ツルペタ", "貧乳/微乳"]); + } + [Fact] public async Task Scan_With_English_Locale() { @@ -88,13 +136,11 @@ public class VoiceWorkScannerTests result[0].Creators.ShouldBe(["Some Creator"]); result[0].Genres.ShouldBe(["Voice", "Trial version"]); result[0].Tags.ShouldBe(["Male Protagonist", "Gal", "Uniform", "Harem", "Big Breasts", "Tanned Skin / Suntan"]); - result[0].Type.ShouldBe(DLSiteWorkType.Released); result[0].Downloads.ShouldBe(1000); result[1].ExpectedDate.ShouldBe(new DateOnly(2025, 10, 11)); result[1].SalesDate.ShouldBeNull(); result[1].ProductId.ShouldBe("RJ00000002"); - result[1].Type.ShouldBe(DLSiteWorkType.Announced); } [Fact] @@ -132,7 +178,6 @@ public class VoiceWorkScannerTests result[0].Creators.ShouldBe(["沼倉愛美"]); result[0].Genres.ShouldBe(["All Ages", "Trial version"]); result[0].Tags.ShouldBe(["Moe", "Healing", "Binaural", "ASMR", "Ear Cleaning", "Slice of Life / Daily Living", "Heartwarming", "Whispering"]); - result[0].Type.ShouldBe(DLSiteWorkType.Released); result[0].Downloads.ShouldBe(1); } } \ No newline at end of file