Updated various parts of scanning and ingestion, either for bug fixes, or for enhancements.
This commit is contained in:
@@ -0,0 +1,62 @@
|
||||
using JSMR.Application.Scanning.Contracts;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning.Extensions;
|
||||
|
||||
public static class DLSiteWorkExtensions
|
||||
{
|
||||
public static void InferAndUpdateExpectedDates(this DLSiteWork[] works)
|
||||
{
|
||||
// Precompute nearest known effective date on the left and right for each index.
|
||||
var left = new DateOnly?[works.Length];
|
||||
var right = new DateOnly?[works.Length];
|
||||
|
||||
DateOnly? last = null;
|
||||
for (int i = 0; i < works.Length; i++)
|
||||
{
|
||||
var effective = GetEffectiveDate(works[i]);
|
||||
if (effective.HasValue)
|
||||
last = effective;
|
||||
|
||||
left[i] = last;
|
||||
}
|
||||
|
||||
DateOnly? next = null;
|
||||
for (int i = works.Length - 1; i >= 0; i--)
|
||||
{
|
||||
var effective = GetEffectiveDate(works[i]);
|
||||
if (effective.HasValue)
|
||||
next = effective;
|
||||
|
||||
right[i] = next;
|
||||
}
|
||||
|
||||
// Fill only when BOTH sides exist and match.
|
||||
for (int i = 0; i < works.Length; i++)
|
||||
{
|
||||
DLSiteWork work = works[i];
|
||||
|
||||
if (work.SalesDate.HasValue || work.ExpectedDate.HasValue)
|
||||
continue;
|
||||
|
||||
DateOnly? previous = (i > 0) ? left[i - 1] : null;
|
||||
DateOnly? nxt = (i < works.Length - 1) ? right[i + 1] : null;
|
||||
|
||||
if (previous.HasValue && nxt.HasValue && previous.Value == nxt.Value)
|
||||
work.ExpectedDate = previous.Value;
|
||||
}
|
||||
}
|
||||
|
||||
private static DateOnly? GetEffectiveDate(DLSiteWork work)
|
||||
{
|
||||
if (work.ExpectedDate.HasValue)
|
||||
return work.ExpectedDate.Value;
|
||||
|
||||
if (!work.SalesDate.HasValue)
|
||||
return null;
|
||||
|
||||
// Bucket sales day to Early/Middle/Late => 1/11/21
|
||||
var d = work.SalesDate.Value;
|
||||
int day = d.Day >= 21 ? 21 : d.Day >= 11 ? 11 : 1;
|
||||
return new DateOnly(d.Year, d.Month, day);
|
||||
}
|
||||
}
|
||||
@@ -5,9 +5,9 @@ using JSMR.Application.Scanning.Ports;
|
||||
using JSMR.Domain.Enums;
|
||||
using JSMR.Domain.ValueObjects;
|
||||
using JSMR.Infrastructure.Http;
|
||||
using JSMR.Infrastructure.Scanning.Extensions;
|
||||
using JSMR.Infrastructure.Scanning.Models;
|
||||
using System.Globalization;
|
||||
using System.Text.Json;
|
||||
using System.Text.RegularExpressions;
|
||||
|
||||
namespace JSMR.Infrastructure.Scanning;
|
||||
@@ -25,7 +25,10 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
DLSiteHtmlDocument document = await GetDLSiteHtmlCollectionAsync(options, cancellationToken);
|
||||
DLSiteHtmlNode[] nodes = document.GetDLSiteNodes();
|
||||
|
||||
return GetDLSiteWorks(nodes, options);
|
||||
DLSiteWork[] works = GetDLSiteWorks(nodes, options);
|
||||
works.InferAndUpdateExpectedDates();
|
||||
|
||||
return works;
|
||||
}
|
||||
|
||||
private async Task<DLSiteHtmlDocument> GetDLSiteHtmlCollectionAsync(VoiceWorkScanOptions options, CancellationToken cancellationToken)
|
||||
@@ -53,7 +56,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
return filterBuilder.BuildSearchQuery(options.PageNumber, options.PageSize);
|
||||
}
|
||||
|
||||
private List<DLSiteWork> GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
|
||||
private DLSiteWork[] GetDLSiteWorks(DLSiteHtmlNode[] nodes, VoiceWorkScanOptions options)
|
||||
{
|
||||
var works = new List<DLSiteWork>();
|
||||
|
||||
@@ -67,7 +70,7 @@ public abstract class VoiceWorksScanner(IHtmlLoader htmlLoader) : IVoiceWorksSca
|
||||
works.Add(work);
|
||||
}
|
||||
|
||||
return works;
|
||||
return [.. works];
|
||||
}
|
||||
|
||||
private DLSiteWork GetDLSiteWork(DLSiteHtmlNode node)
|
||||
|
||||
Reference in New Issue
Block a user