From 340c62d18bda0d8cb50a32a6a00b9f8245af3bd5 Mon Sep 17 00:00:00 2001 From: Brian Bicknell Date: Sun, 1 Feb 2026 21:41:23 -0500 Subject: [PATCH] Added worker app. --- .../ApplicationServiceCollectionExtensions.cs | 3 +- .../Scanning/ScanVoiceWorksRequest.cs | 2 +- ...frastructureServiceCollectionExtensions.cs | 59 ++++++---- .../JSMR.Infrastructure.csproj | 1 + JSMR.Worker/JSMR.Worker.csproj | 21 ++++ JSMR.Worker/Options/ScanOptions.cs | 11 ++ JSMR.Worker/Program.cs | 109 ++++++++++++++++++ JSMR.Worker/Services/FileCheckpointStore.cs | 28 +++++ JSMR.Worker/Services/ICheckpointStore.cs | 7 ++ JSMR.Worker/Services/ScanJob.cs | 28 +++++ JSMR.Worker/Services/ScanRunner.cs | 59 ++++++++++ JSMR.sln | 6 + 12 files changed, 309 insertions(+), 25 deletions(-) create mode 100644 JSMR.Worker/JSMR.Worker.csproj create mode 100644 JSMR.Worker/Options/ScanOptions.cs create mode 100644 JSMR.Worker/Program.cs create mode 100644 JSMR.Worker/Services/FileCheckpointStore.cs create mode 100644 JSMR.Worker/Services/ICheckpointStore.cs create mode 100644 JSMR.Worker/Services/ScanJob.cs create mode 100644 JSMR.Worker/Services/ScanRunner.cs diff --git a/JSMR.Application/DI/ApplicationServiceCollectionExtensions.cs b/JSMR.Application/DI/ApplicationServiceCollectionExtensions.cs index f5d61f2..beb3c1e 100644 --- a/JSMR.Application/DI/ApplicationServiceCollectionExtensions.cs +++ b/JSMR.Application/DI/ApplicationServiceCollectionExtensions.cs @@ -1,5 +1,6 @@ using JSMR.Application.Circles.Queries.Search; using JSMR.Application.Creators.Queries.Search; +using JSMR.Application.Scanning; using JSMR.Application.Tags.Commands.SetEnglishName; using JSMR.Application.Tags.Commands.UpdateTagStatus; using JSMR.Application.Tags.Queries.Search; @@ -16,7 +17,7 @@ public static class ApplicationServiceCollectionExtensions services.AddScoped(); services.AddScoped(); - //services.AddScoped(); + services.AddScoped(); services.AddScoped(); services.AddScoped(); diff --git a/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs b/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs index 3f44f43..a13276f 100644 --- a/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs +++ b/JSMR.Application/Scanning/ScanVoiceWorksRequest.cs @@ -2,4 +2,4 @@ namespace JSMR.Application.Scanning; -public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale, string[] ExcludedMakerIds); \ No newline at end of file +public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale); \ No newline at end of file diff --git a/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs b/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs index 6ef80bb..20b7502 100644 --- a/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs +++ b/JSMR.Infrastructure/DI/InfrastructureServiceCollectionExtensions.cs @@ -5,6 +5,7 @@ using JSMR.Application.Common.Caching; using JSMR.Application.Creators.Ports; using JSMR.Application.Creators.Queries.Search.Ports; using JSMR.Application.Enums; +using JSMR.Application.Integrations.Ports; using JSMR.Application.Scanning.Ports; using JSMR.Application.Tags.Ports; using JSMR.Application.Tags.Queries.Search.Ports; @@ -21,8 +22,12 @@ using JSMR.Infrastructure.Data.Repositories.Tags; using JSMR.Infrastructure.Data.Repositories.VoiceWorks; using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Ingestion; +using JSMR.Infrastructure.Integrations.DLSite; using JSMR.Infrastructure.Scanning; using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Http.Resilience; +using Polly; +using System.Net; namespace JSMR.Infrastructure.DI; @@ -72,29 +77,37 @@ public static class InfrastructureServiceCollectionExtensions services.AddSingleton(); services.AddSingleton(); + services.AddHttpClient(httpClient => + { + httpClient.BaseAddress = new Uri("https://www.dlsite.com/"); + httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0 (+contact@example.com)"); + httpClient.Timeout = TimeSpan.FromSeconds(15); + }) + .AddResilienceHandler("dlsite", builder => + { + builder.AddRetry(new HttpRetryStrategyOptions + { + MaxRetryAttempts = 3, + UseJitter = true, + Delay = TimeSpan.FromMilliseconds(200), + BackoffType = DelayBackoffType.Exponential, + ShouldHandle = new PredicateBuilder() + .Handle() + .HandleResult(msg => + msg.StatusCode == (HttpStatusCode)429 || + (int)msg.StatusCode >= 500) + }); + + // (Optional) add a circuit breaker: + // builder.AddCircuitBreaker(new HttpCircuitBreakerStrategyOptions + // { + // FailureRatio = 0.2, + // SamplingDuration = TimeSpan.FromSeconds(30), + // MinimumThroughput = 20, + // BreakDuration = TimeSpan.FromSeconds(15) + // }); + }); + return services; } - - //public static IServiceCollection AddDLSiteClient(this IServiceCollection services) - //{ - // var retryPolicy = HttpPolicyExtensions - // .HandleTransientHttpError() - // .OrResult(msg => (int)msg.StatusCode == 429) // Too Many Requests - // .WaitAndRetryAsync(new[] - // { - // TimeSpan.FromMilliseconds(200), - // TimeSpan.FromMilliseconds(500), - // TimeSpan.FromSeconds(1.5) - // }); - - // services.AddHttpClient(c => - // { - // c.BaseAddress = new Uri("https://www.dlsite.com/"); - // c.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0 (+contact@example.com)"); - // c.Timeout = TimeSpan.FromSeconds(15); - // }) - // .AddPolicyHandler(retryPolicy); - - // return services; - //} } \ No newline at end of file diff --git a/JSMR.Infrastructure/JSMR.Infrastructure.csproj b/JSMR.Infrastructure/JSMR.Infrastructure.csproj index 2f6256b..89b1ce2 100644 --- a/JSMR.Infrastructure/JSMR.Infrastructure.csproj +++ b/JSMR.Infrastructure/JSMR.Infrastructure.csproj @@ -20,6 +20,7 @@ + diff --git a/JSMR.Worker/JSMR.Worker.csproj b/JSMR.Worker/JSMR.Worker.csproj new file mode 100644 index 0000000..b22fb16 --- /dev/null +++ b/JSMR.Worker/JSMR.Worker.csproj @@ -0,0 +1,21 @@ + + + + Exe + net9.0 + enable + enable + + + + + + + + + + + + + + diff --git a/JSMR.Worker/Options/ScanOptions.cs b/JSMR.Worker/Options/ScanOptions.cs new file mode 100644 index 0000000..6b29067 --- /dev/null +++ b/JSMR.Worker/Options/ScanOptions.cs @@ -0,0 +1,11 @@ +namespace JSMR.Worker.Options; + +public sealed class ScanOptions +{ + public string? Locale { get; init; } = "Japanese"; // maps to your Locale enum + public int? StartPage { get; init; } // if null, resume from checkpoint or 1 + public int? EndPage { get; init; } // optional cap + public int? PageSize { get; init; } // override default + public bool Watch { get; init; } // loop forever + public TimeSpan Interval { get; init; } = TimeSpan.FromMinutes(5); +} \ No newline at end of file diff --git a/JSMR.Worker/Program.cs b/JSMR.Worker/Program.cs new file mode 100644 index 0000000..5bef40b --- /dev/null +++ b/JSMR.Worker/Program.cs @@ -0,0 +1,109 @@ +using JSMR.Application.DI; +using JSMR.Infrastructure.Data; +using JSMR.Infrastructure.DI; +using JSMR.Worker.Options; +using JSMR.Worker.Services; +using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Configuration; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Hosting; +using System.CommandLine; + +HostApplicationBuilder builder = Host.CreateApplicationBuilder(args); + +//builder.Services.AddSerilog(o => o +// .WriteTo.Console() +// .MinimumLevel.Information()); + +builder.Services + .AddApplication() + .AddInfrastructure(); + +string connectionString = builder.Configuration.GetConnectionString("AppDb") + ?? throw new InvalidOperationException("Missing ConnectionStrings:AppDb2"); + +builder.Services.AddDbContextFactory(optionsBuilder => + optionsBuilder + .UseMySql(connectionString, ServerVersion.AutoDetect(connectionString)) + .EnableSensitiveDataLogging(false)); + +// Worker services +builder.Services.AddSingleton(); +builder.Services.AddTransient(); + +RootCommand rootCommand = new("JSMR worker"); +Command scan = new("scan", "Scan and update the database"); + +Option localeOption = new("--locale", "-l") +{ + Description = "Locale (Japanese/English)", + Required = false +}; + +Option startOption = new("--start", "-s") +{ + Description = "Start page (default = checkpoint+1 or 1)" +}; + +Option endOption = new("--end", "-e") +{ + Description = "End page (optional)" +}; + +Option sizeOption = new("--pageSize", "-ps") +{ + Description = "Page size (default from config or 100)", + DefaultValueFactory = _ => 100 +}; + +Option watchOption = new("--watch", "-w") +{ + Description = "Loop forever", + DefaultValueFactory = _ => false +}; + +Option everyOption = new("--every", "-e") +{ + Description = "Interval when --watch is set", + DefaultValueFactory = _ => TimeSpan.FromMinutes(5) +}; + + +scan.Add(localeOption); +scan.Add(startOption); +scan.Add(endOption); +scan.Add(sizeOption); +scan.Add(watchOption); +scan.Add(everyOption); + +scan.SetAction(async (parseResult, cancellationToken) => +{ + using var host = builder.Build(); + var runner = host.Services.GetRequiredService(); + + ScanOptions options = new() + { + Locale = parseResult.GetValue(localeOption), + StartPage = parseResult.GetValue(startOption), + EndPage = parseResult.GetValue(endOption), + PageSize = parseResult.GetValue(sizeOption), + Watch = parseResult.GetValue(watchOption), + Interval = parseResult.GetValue(everyOption) + }; + + using CancellationTokenSource cancellationTokenSource = new(); + + Console.CancelKeyPress += (_, eventArgs) => + { + eventArgs.Cancel = true; + cancellationTokenSource.Cancel(); + }; + + await runner.RunAsync(options, cancellationTokenSource.Token); +}); + +rootCommand.Add(scan); + +//rootCommand.SetAction(async (parseResult, cancellationToken) => await rootCommand.InvokeAsync("scan")); + +return await rootCommand.Parse(args).InvokeAsync(); \ No newline at end of file diff --git a/JSMR.Worker/Services/FileCheckpointStore.cs b/JSMR.Worker/Services/FileCheckpointStore.cs new file mode 100644 index 0000000..8c9b203 --- /dev/null +++ b/JSMR.Worker/Services/FileCheckpointStore.cs @@ -0,0 +1,28 @@ +namespace JSMR.Worker.Services; + +public sealed class FileCheckpointStore : ICheckpointStore +{ + private readonly string _root = Path.Combine(AppContext.BaseDirectory, "State"); + + public FileCheckpointStore() => Directory.CreateDirectory(_root); + + public Task GetLastPageAsync(string locale, CancellationToken ct) + { + string path = Path.Combine(_root, $"scan.{locale}.page"); + if (!File.Exists(path)) + return Task.FromResult(null); + + if (int.TryParse(File.ReadAllText(path).Trim(), out var n)) + return Task.FromResult(n); + + return Task.FromResult(null); + } + + public Task SaveLastPageAsync(string locale, int page, CancellationToken ct) + { + string path = Path.Combine(_root, $"scan.{locale}.page"); + File.WriteAllText(path, page.ToString()); + + return Task.CompletedTask; + } +} \ No newline at end of file diff --git a/JSMR.Worker/Services/ICheckpointStore.cs b/JSMR.Worker/Services/ICheckpointStore.cs new file mode 100644 index 0000000..2388002 --- /dev/null +++ b/JSMR.Worker/Services/ICheckpointStore.cs @@ -0,0 +1,7 @@ +namespace JSMR.Worker.Services; + +public interface ICheckpointStore +{ + Task GetLastPageAsync(string locale, CancellationToken cancellationToken); + Task SaveLastPageAsync(string locale, int page, CancellationToken cancellationToken); +} \ No newline at end of file diff --git a/JSMR.Worker/Services/ScanJob.cs b/JSMR.Worker/Services/ScanJob.cs new file mode 100644 index 0000000..14391e5 --- /dev/null +++ b/JSMR.Worker/Services/ScanJob.cs @@ -0,0 +1,28 @@ +using JSMR.Application.Enums; +using JSMR.Application.Scanning; +using JSMR.Worker.Options; +using Microsoft.Extensions.Logging; +using Microsoft.Extensions.Options; + +namespace JSMR.Worker.Services; + +public sealed class ScanJob(ILogger log, IOptions options, ScanVoiceWorksHandler scanVoiceWorksHandler) +{ + private readonly ScanOptions _options = options.Value; + + public async Task RunOnceAsync(CancellationToken cancellationToken) + { + log.LogInformation("Starting scan: Locale={Locale}, Start Page={StartPage}, EndPage={EndPage}", + _options.Locale, _options.StartPage, _options.EndPage); + + ScanVoiceWorksRequest request = new( + PageNumber: 1, + PageSize: 100, + Locale: Enum.Parse(_options.Locale, true) + ); + + await scanVoiceWorksHandler.HandleAsync(request, cancellationToken); + + log.LogInformation("Scan completed."); + } +} \ No newline at end of file diff --git a/JSMR.Worker/Services/ScanRunner.cs b/JSMR.Worker/Services/ScanRunner.cs new file mode 100644 index 0000000..074cfe8 --- /dev/null +++ b/JSMR.Worker/Services/ScanRunner.cs @@ -0,0 +1,59 @@ +using JSMR.Application.Enums; +using JSMR.Application.Scanning; +using JSMR.Worker.Options; +using Microsoft.Extensions.DependencyInjection; +using Microsoft.Extensions.Logging; + +namespace JSMR.Worker.Services; + +public sealed class PagedScanRunner( + ILogger log, + IServiceProvider serviceProvider, + ICheckpointStore checkpoints) +{ + public async Task RunAsync(ScanOptions options, CancellationToken cancellationToken) + { + if (Enum.TryParse(options.Locale, ignoreCase: true, out Locale locale) == false) + throw new ArgumentException($"Unknown locale '{options.Locale}'."); + + int pageSize = options.PageSize ?? 100; + + int startPage = options.StartPage + ?? (await checkpoints.GetLastPageAsync(options.Locale, cancellationToken)).GetValueOrDefault(0) + 1; + + while (!cancellationToken.IsCancellationRequested) + { + int currentPage = startPage; + int? end = options.EndPage; + + // Iterate until empty page or end reached + for (; !cancellationToken.IsCancellationRequested && (!end.HasValue || currentPage <= end.Value); currentPage++) + { + ScanVoiceWorksHandler handler = serviceProvider.GetRequiredService(); + + log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale); + + ScanVoiceWorksRequest request = new( + PageNumber: currentPage, + PageSize: 100, + Locale: locale + ); + + ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken); + + // Save checkpoint + await checkpoints.SaveLastPageAsync(options.Locale, currentPage, cancellationToken); + } + + if (!options.Watch) break; + + log.LogInformation("Watch mode: sleeping {Interval}…", options.Interval); + await Task.Delay(options.Interval, cancellationToken); + + // Compute next “start” for next cycle: + // - If you want to re-scan the latest N pages every loop to catch late updates, + // modify logic here (e.g., start = Math.Max(1, current - 2)) + startPage = currentPage; // continue from where we left off + } + } +} \ No newline at end of file diff --git a/JSMR.sln b/JSMR.sln index 39c33d6..3d57d1a 100644 --- a/JSMR.sln +++ b/JSMR.sln @@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.Api", "JSMR.Api\JSMR.A EndProject Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.UI.Blazor", "JSMR.UI.Blazor\JSMR.UI.Blazor.csproj", "{074919FA-5234-4AE6-9E37-7A02F2BD3EED}" EndProject +Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.Worker", "JSMR.Worker\JSMR.Worker.csproj", "{964BD375-FAE3-4044-A09B-5C43919C9B52}" +EndProject Global GlobalSection(SolutionConfigurationPlatforms) = preSolution Debug|Any CPU = Debug|Any CPU @@ -45,6 +47,10 @@ Global {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Debug|Any CPU.Build.0 = Debug|Any CPU {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.ActiveCfg = Release|Any CPU {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.Build.0 = Release|Any CPU + {964BD375-FAE3-4044-A09B-5C43919C9B52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU + {964BD375-FAE3-4044-A09B-5C43919C9B52}.Debug|Any CPU.Build.0 = Debug|Any CPU + {964BD375-FAE3-4044-A09B-5C43919C9B52}.Release|Any CPU.ActiveCfg = Release|Any CPU + {964BD375-FAE3-4044-A09B-5C43919C9B52}.Release|Any CPU.Build.0 = Release|Any CPU EndGlobalSection GlobalSection(SolutionProperties) = preSolution HideSolutionNode = FALSE