Updated various parts of scanning and ingestion, either for bug fixes, or for enhancements.
All checks were successful
ci / build-test (push) Successful in 2m22s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-03-01 22:07:20 -05:00
parent 704a6fc433
commit 83655f13e9
20 changed files with 555 additions and 90 deletions

View File

@@ -0,0 +1,62 @@
using JSMR.Application.Scanning.Contracts;
namespace JSMR.Infrastructure.Scanning.Extensions;
public static class DLSiteWorkExtensions
{
public static void InferAndUpdateExpectedDates(this DLSiteWork[] works)
{
// Precompute nearest known effective date on the left and right for each index.
var left = new DateOnly?[works.Length];
var right = new DateOnly?[works.Length];
DateOnly? last = null;
for (int i = 0; i < works.Length; i++)
{
var effective = GetEffectiveDate(works[i]);
if (effective.HasValue)
last = effective;
left[i] = last;
}
DateOnly? next = null;
for (int i = works.Length - 1; i >= 0; i--)
{
var effective = GetEffectiveDate(works[i]);
if (effective.HasValue)
next = effective;
right[i] = next;
}
// Fill only when BOTH sides exist and match.
for (int i = 0; i < works.Length; i++)
{
DLSiteWork work = works[i];
if (work.SalesDate.HasValue || work.ExpectedDate.HasValue)
continue;
DateOnly? previous = (i > 0) ? left[i - 1] : null;
DateOnly? nxt = (i < works.Length - 1) ? right[i + 1] : null;
if (previous.HasValue && nxt.HasValue && previous.Value == nxt.Value)
work.ExpectedDate = previous.Value;
}
}
private static DateOnly? GetEffectiveDate(DLSiteWork work)
{
if (work.ExpectedDate.HasValue)
return work.ExpectedDate.Value;
if (!work.SalesDate.HasValue)
return null;
// Bucket sales day to Early/Middle/Late => 1/11/21
var d = work.SalesDate.Value;
int day = d.Day >= 21 ? 21 : d.Day >= 11 ? 11 : 1;
return new DateOnly(d.Year, d.Month, day);
}
}

View File

@@ -5,9 +5,9 @@ using JSMR.Application.Scanning.Ports;
using JSMR.Domain.Enums;
using JSMR.Domain.ValueObjects;
using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Scanning.Extensions;
using JSMR.Infrastructure.Scanning.Models;
using System.Globalization;
using System.Text.Json;
using System.Text.RegularExpressions;
namespace JSMR.Infrastructure.Scanning;
@@ -25,7 +25,10 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
return GetDLSiteWorks(nodes, options);
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
works.InferAndUpdateExpectedDates();
return works;
}
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
@@ -53,7 +56,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
}
private List<DLSiteWork> GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
private DLSiteWork[] GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
{
var works = new List<DLSiteWork>();
@@ -67,7 +70,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
works.Add(work);
}
return works;
return [.. works];
}
private DLSiteWork GetDLSiteWork(DLSiteHtmlNode node)