Updated scanner logic, and added initial scanner tests.

This commit is contained in:
2025-09-14 21:12:00 -04:00
parent 39274165cb
commit 646cf41476
16 changed files with 412 additions and 192 deletions

View File

@@ -0,0 +1,227 @@
<html>
<body>
<div data-toggle="found" class="sort_box border_b pb10">
<div class="status_select">
Reorder&nbsp;:
<select name="order" id="query_order" class="_change_submit"><!----><!----><option value="trend">Sort by popularity</option><option value="release_d">Release date - New to Old</option><option value="release">Release date - Old to New</option><option value="dl_d">Best Selling</option><option value="price">Price - Low to High</option><option value="price_d">Price - High to Low</option><option value="rate_d">Rating - High to Low</option><option value="review_d">Reviews - High to Low</option></select>
</div>
<div class="page_total">
<strong>626609</strong>
<span> total. Showing: </span>
<strong>130</strong>
</div>
<div class="display_type_select">
<span>Display&nbsp;</span>
<ul>
<li class="on display_normal">
<a>1カラム表示</a>
</li>
<li class="display_block">
<a href="#">画像表示</a>
</li>
</ul>
</div>
<div class="display_num_select">
<ul>
<li class="on">
<a>30</a>
</li>
<li class="">
<a href="#">50</a>
</li>
<li class="">
<a href="#">100</a>
</li>
</ul>
<span>Items per page&nbsp;</span>
</div>
</div>
<div id="search_result_list">
<table class="work_1col_table n_worklist">
<tbody>
<!-- RJ00000001 -->
<tr data-list_item_product_id="RJ00000001">
<td class="work_1col_thumb">
<div class="work_thumb">
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ01464588.html" id="_link_RJ00000001" class="work_thumb_inner">
<img alt="Title of Product" src="//img.dlsite.jp/resize/images2/work/doujin/RJ00000000/RJ00000001_img_main_240x240.jpg" class="lazy">
<div class="work_img_popover">
<img src="//img.dlsite.jp/modpub/images2/work/doujin/RJ00000000/RJ00000001_img_main.jpg" alt="Title of Product">
</div>
</a>
<div class="work_category type_SOU"><a href="https://www.dlsite.com/maniax/fsr/=/work_type/SOU">Voice / ASMR</a></div>
</div>
</td>
<td>
<dl class="work_1col">
<dt class="work_name">
<span class="period_date">Oct. 3, 23:59 (JST) Discounted for a limited time.</span>
<div class="icon_wrap">
<span class="icon_lead_01 type_exclusive" title="DL Exclusive">DL Exclusive</span>
</div>
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ00000001.html" title="Title of Product">
Title of Product
</a>
</dt>
<dd class="maker_name">
<a href="https://www.dlsite.com/maniax/circle/profile/=/maker_id/RG00001.html">The Maker</a>
</dd>
<dd class="work_price_wrap">
<span class="work_price">
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">12.52</span> <span class="work_price_suffix"></span></span>
</span>
<span class="strike">
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">17.88</span> <span class="work_price_suffix"></span></span>
</span>
<span class="icon_campaign type_sale">30%OFF</span>
<span class="separator">/</span>
<span class="work_point">168pt (10%Earn)</span>
</dd>
<dd class="work_text">Description of the product.</dd>
<dd class="work_genre">
<span class="icon_SND" title="Voice">Voice</span>
<span class="icon_TRI" title="Trial version">Trial version</span>
<span style="display: none;"></span>
<input type="hidden" class="__product_attributes" name="__product_attributes" id="_RJ00000001" value="RG00001,adl,male,SND,MS2,TRI,JPN,REV,502,046,415,074,182,321" disabled="disabled">
</dd>
<dd class="search_tag">
<a href="https://www.dlsite.com/maniax/fsr/=/genre/502/from/work.genre">Male Protagonist</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/415/from/work.genre">Gal</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/074/from/work.genre">Uniform</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/046/from/work.genre">Harem</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/182/from/work.genre">Big Breasts</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/321/from/work.genre">Tanned Skin / Suntan</a>
</dd>
</dl>
</td>
<td class="work_1col_right">
<ul class="work_info_box">
<li class="sales_date">Release date:&nbsp;Sep/06/2025</li>
<li class="work_dl clear">
<div class="_work_dl_RJ00000001">Purchased:&nbsp;<span class="_dl_count_RJ00000001">1,000</span></div>
</li>
<li class="work_review clear">
<span class="work_to_review">
<div title="Reviews">
<a href="https://www.dlsite.com/maniax/work/reviewlist/=/product_id/RJ00000001.html">(44)</a>
</div>
</span>
</li>
<li class="work_rating"><div class="star_rating star_45">(1,924)</div></li>
</ul>
<div class="work_btn_box">
<div hidden="hidden" class="ga4_event_item_RJ00000001" data-product_id="RJ00000001" data-work_name="Title of Product" data-maker_id="RG00001" data-work_type="SOU" data-options="SND#MS2#TRI#JPN#REV" data-price="1848" data-official_price="2640"></div>
<ul class="work_btn_list btn_free_sample">
<li class="work_btn_list_item">
<p class="work_cart_xs">
<a href="/maniax/cart/=/product_id/RJ00000001.html" class="btn_cart">Add to Cart</a>
</p>
</li>
<li class="work_btn_list_item">
<p class="work_favorite_xs">
<a href="/maniax/mypage/wishlist/=/product_id/RJ00000001.html" class="btn_favorite">Favorites</a>
</p>
</li>
<li class="work_btn_list_item">
<p class="work_free_sample">
<a href="#" class="btn_free_sample" data-product-id="RJ00000001">Samples</a>
</p>
</li>
</ul>
</div>
</td>
</tr>
<!-- RJ00000002 -->
<tr data-list_item_product_id="RJ00000002">
<td class="work_1col_thumb">
<div class="work_thumb">
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ01464588.html" id="_link_RJ00000002" class="work_thumb_inner">
<img alt="Title of Product" src="//img.dlsite.jp/resize/images2/work/doujin/RJ00000000/RJ00000002_img_main_240x240.jpg" class="lazy">
<div class="work_img_popover">
<img src="//img.dlsite.jp/modpub/images2/work/doujin/RJ00000000/RJ00000002_img_main.jpg" alt="Title of Product">
</div>
</a>
<div class="work_category type_SOU"><a href="https://www.dlsite.com/maniax/fsr/=/work_type/SOU">Voice / ASMR</a></div>
</div>
</td>
<td>
<dl class="work_1col">
<dt class="work_name">
<p class="expected_date">Middle Oct. 2025 Upcoming works</p>
<a href="https://www.dlsite.com/maniax/work/=/product_id/RJ00000002.html" title="Title of Product">
Title of Product
</a>
</dt>
<dd class="maker_name">
<a href="https://www.dlsite.com/maniax/circle/profile/=/maker_id/RG00002.html">The Maker</a>
</dd>
<dd class="work_price_wrap">
<span class="work_price">
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">12.52</span> <span class="work_price_suffix"></span></span>
</span>
<span class="strike">
<span class="work_price_parts"><span class="work_price_prefix">$</span> <span class="work_price_base">17.88</span> <span class="work_price_suffix"></span></span>
</span>
<span class="icon_campaign type_sale">30%OFF</span>
<span class="separator">/</span>
<span class="work_point">168pt (10%Earn)</span>
</dd>
<dd class="work_text">Description of the product.</dd>
<dd class="work_genre">
<span class="icon_SND" title="Voice">Voice</span>
<span class="icon_TRI" title="Trial version">Trial version</span>
<span style="display: none;"></span>
<input type="hidden" class="__product_attributes" name="__product_attributes" id="_RJ00000001" value="RG00001,adl,male,SND,MS2,TRI,JPN,REV,502,046,415,074,182,321" disabled="disabled">
</dd>
<dd class="search_tag">
<a href="https://www.dlsite.com/maniax/fsr/=/genre/502/from/work.genre">Male Protagonist</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/415/from/work.genre">Gal</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/074/from/work.genre">Uniform</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/046/from/work.genre">Harem</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/182/from/work.genre">Big Breasts</a>
<a href="https://www.dlsite.com/maniax/fsr/=/genre/321/from/work.genre">Tanned Skin / Suntan</a>
</dd>
</dl>
</td>
<td class="work_1col_right">
<ul class="work_info_box">
<li class="sales_date">Announced:: Sep/05/2025</li>
<li class="work_dl clear">
<div>Favorited: <span>500</span></div>
</li>
<li class="work_review clear">
<span class="work_to_review">
<div title="Reviews">
<a href="https://www.dlsite.com/maniax/work/reviewlist/=/product_id/RJ00000002.html">(44)</a>
</div>
</span>
</li>
<li class="work_rating"><div class="star_rating star_45">(1,924)</div></li>
</ul>
<div class="work_btn_box">
<div hidden="hidden" class="ga4_event_item_RJ00000002" data-product_id="RJ00000002" data-work_name="Title of Product" data-maker_id="RG00002" data-work_type="SOU" data-options="SND#MS2#TRI#JPN#REV" data-price="1848" data-official_price="2640"></div>
<ul class="work_btn_list btn_free_sample">
<li class="work_btn_list_item">
<p class="work_cart_xs">
<a href="/maniax/cart/=/product_id/RJ00000002.html" class="btn_cart">Add to Cart</a>
</p>
</li>
<li class="work_btn_list_item">
<p class="work_favorite_xs">
<a href="/maniax/mypage/wishlist/=/product_id/RJ00000002.html" class="btn_favorite">Favorites</a>
</p>
</li>
<li class="work_btn_list_item">
<p class="work_free_sample">
<a href="#" class="btn_free_sample" data-product-id="RJ00000002">Samples</a>
</p>
</li>
</ul>
</div>
</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>

View File

@@ -0,0 +1,53 @@
using JSMR.Application.Scanning.Contracts;
using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning;
using JSMR.Tests.Utilities;
using NSubstitute;
using Shouldly;
namespace JSMR.Tests.Integrations.DLSite;
public class VoiceWorkScannerTests
{
private static async Task<string> ReadResourceAsync(string resourceName)
{
return await ResourceHelper.ReadAsync($"JSMR.Tests.Scanning.{resourceName}");
}
[Fact]
public async Task Scan_With_English_Locale()
{
string englishPageHtml = await ReadResourceAsync("English-Page.html");
IHttpService httpService = Substitute.For<IHttpService>();
httpService.GetStringAsync(Arg.Any<string>(), CancellationToken.None)
.Returns(Task.FromResult(englishPageHtml));
HtmlLoader loader = new(httpService);
EnglishVoiceWorksScanner scanner = new(loader);
VoiceWorkScanOptions options = new(
PageNumber: 1,
PageSize: 100,
ExcludeAIGeneratedWorks: true,
ExcludePartiallyAIGeneratedWorks: true,
ExcludedMakerIds: []
);
var result = await scanner.ScanPageAsync(options, CancellationToken.None);
result.Count.ShouldBe(2);
result[0].ExpectedDate.ShouldBeNull();
result[0].SalesDate.ShouldBe(new DateOnly(2025, 9, 6));
result[0].ProductId.ShouldBe("RJ00000001");
result[0].ProductName.ShouldBe("Title of Product");
result[0].Description.ShouldBe("Description of the product.");
result[0].Downloads.ShouldBe(1000);
result[1].ExpectedDate.ShouldBe(new DateOnly(2025, 10, 11));
result[1].SalesDate.ShouldBeNull();
result[1].ProductId.ShouldBe("RJ00000002");
}
}