Added worker app.
All checks were successful
ci / build-test (push) Successful in 2m16s
ci / publish-image (push) Has been skipped

This commit is contained in:
2026-02-01 21:41:23 -05:00
parent c51775592e
commit 340c62d18b
12 changed files with 309 additions and 25 deletions

View File

@@ -1,5 +1,6 @@
using JSMR.Application.Circles.Queries.Search; using JSMR.Application.Circles.Queries.Search;
using JSMR.Application.Creators.Queries.Search; using JSMR.Application.Creators.Queries.Search;
using JSMR.Application.Scanning;
using JSMR.Application.Tags.Commands.SetEnglishName; using JSMR.Application.Tags.Commands.SetEnglishName;
using JSMR.Application.Tags.Commands.UpdateTagStatus; using JSMR.Application.Tags.Commands.UpdateTagStatus;
using JSMR.Application.Tags.Queries.Search; using JSMR.Application.Tags.Queries.Search;
@@ -16,7 +17,7 @@ public static class ApplicationServiceCollectionExtensions
services.AddScoped<SearchCirclesHandler>(); services.AddScoped<SearchCirclesHandler>();
services.AddScoped<SearchVoiceWorksHandler>(); services.AddScoped<SearchVoiceWorksHandler>();
//services.AddScoped<ScanVoiceWorksHandler>(); services.AddScoped<ScanVoiceWorksHandler>();
services.AddScoped<SearchTagsHandler>(); services.AddScoped<SearchTagsHandler>();
services.AddScoped<SetTagEnglishNameHandler>(); services.AddScoped<SetTagEnglishNameHandler>();

View File

@@ -2,4 +2,4 @@
namespace JSMR.Application.Scanning; namespace JSMR.Application.Scanning;
public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale, string[] ExcludedMakerIds); public sealed record ScanVoiceWorksRequest(int PageNumber, int PageSize, Locale Locale);

View File

@@ -5,6 +5,7 @@ using JSMR.Application.Common.Caching;
using JSMR.Application.Creators.Ports; using JSMR.Application.Creators.Ports;
using JSMR.Application.Creators.Queries.Search.Ports; using JSMR.Application.Creators.Queries.Search.Ports;
using JSMR.Application.Enums; using JSMR.Application.Enums;
using JSMR.Application.Integrations.Ports;
using JSMR.Application.Scanning.Ports; using JSMR.Application.Scanning.Ports;
using JSMR.Application.Tags.Ports; using JSMR.Application.Tags.Ports;
using JSMR.Application.Tags.Queries.Search.Ports; using JSMR.Application.Tags.Queries.Search.Ports;
@@ -21,8 +22,12 @@ using JSMR.Infrastructure.Data.Repositories.Tags;
using JSMR.Infrastructure.Data.Repositories.VoiceWorks; using JSMR.Infrastructure.Data.Repositories.VoiceWorks;
using JSMR.Infrastructure.Http; using JSMR.Infrastructure.Http;
using JSMR.Infrastructure.Ingestion; using JSMR.Infrastructure.Ingestion;
using JSMR.Infrastructure.Integrations.DLSite;
using JSMR.Infrastructure.Scanning; using JSMR.Infrastructure.Scanning;
using Microsoft.Extensions.DependencyInjection; using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Http.Resilience;
using Polly;
using System.Net;
namespace JSMR.Infrastructure.DI; namespace JSMR.Infrastructure.DI;
@@ -72,29 +77,37 @@ public static class InfrastructureServiceCollectionExtensions
services.AddSingleton<IClock, Clock>(); services.AddSingleton<IClock, Clock>();
services.AddSingleton<ITimeProvider, TokyoTimeProvider>(); services.AddSingleton<ITimeProvider, TokyoTimeProvider>();
services.AddHttpClient<IDLSiteClient, DLSiteClient>(httpClient =>
{
httpClient.BaseAddress = new Uri("https://www.dlsite.com/");
httpClient.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0 (+contact@example.com)");
httpClient.Timeout = TimeSpan.FromSeconds(15);
})
.AddResilienceHandler("dlsite", builder =>
{
builder.AddRetry(new HttpRetryStrategyOptions
{
MaxRetryAttempts = 3,
UseJitter = true,
Delay = TimeSpan.FromMilliseconds(200),
BackoffType = DelayBackoffType.Exponential,
ShouldHandle = new PredicateBuilder<HttpResponseMessage>()
.Handle<HttpRequestException>()
.HandleResult(msg =>
msg.StatusCode == (HttpStatusCode)429 ||
(int)msg.StatusCode >= 500)
});
// (Optional) add a circuit breaker:
// builder.AddCircuitBreaker(new HttpCircuitBreakerStrategyOptions
// {
// FailureRatio = 0.2,
// SamplingDuration = TimeSpan.FromSeconds(30),
// MinimumThroughput = 20,
// BreakDuration = TimeSpan.FromSeconds(15)
// });
});
return services; return services;
} }
//public static IServiceCollection AddDLSiteClient(this IServiceCollection services)
//{
// var retryPolicy = HttpPolicyExtensions
// .HandleTransientHttpError()
// .OrResult(msg => (int)msg.StatusCode == 429) // Too Many Requests
// .WaitAndRetryAsync(new[]
// {
// TimeSpan.FromMilliseconds(200),
// TimeSpan.FromMilliseconds(500),
// TimeSpan.FromSeconds(1.5)
// });
// services.AddHttpClient<IDLSiteClient, DLSiteClient>(c =>
// {
// c.BaseAddress = new Uri("https://www.dlsite.com/");
// c.DefaultRequestHeaders.UserAgent.ParseAdd("JSMR/1.0 (+contact@example.com)");
// c.Timeout = TimeSpan.FromSeconds(15);
// })
// .AddPolicyHandler(retryPolicy);
// return services;
//}
} }

View File

@@ -20,6 +20,7 @@
<PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="9.0.10" /> <PackageReference Include="Microsoft.EntityFrameworkCore.Relational" Version="9.0.10" />
<PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.10" /> <PackageReference Include="Microsoft.Extensions.Caching.Abstractions" Version="9.0.10" />
<PackageReference Include="Microsoft.Extensions.Http" Version="9.0.10" /> <PackageReference Include="Microsoft.Extensions.Http" Version="9.0.10" />
<PackageReference Include="Microsoft.Extensions.Http.Resilience" Version="9.10.0" />
<PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.4" /> <PackageReference Include="Npgsql.EntityFrameworkCore.PostgreSQL" Version="9.0.4" />
<PackageReference Include="NTextCat" Version="0.3.65" /> <PackageReference Include="NTextCat" Version="0.3.65" />
<PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="9.0.0" /> <PackageReference Include="Pomelo.EntityFrameworkCore.MySql" Version="9.0.0" />

View File

@@ -0,0 +1,21 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<OutputType>Exe</OutputType>
<TargetFramework>net9.0</TargetFramework>
<ImplicitUsings>enable</ImplicitUsings>
<Nullable>enable</Nullable>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="Microsoft.Extensions.Hosting" Version="9.0.12" />
<PackageReference Include="Serilog" Version="4.3.0" />
<PackageReference Include="System.CommandLine" Version="2.0.2" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\JSMR.Application\JSMR.Application.csproj" />
<ProjectReference Include="..\JSMR.Infrastructure\JSMR.Infrastructure.csproj" />
</ItemGroup>
</Project>

View File

@@ -0,0 +1,11 @@
namespace JSMR.Worker.Options;
public sealed class ScanOptions
{
public string? Locale { get; init; } = "Japanese"; // maps to your Locale enum
public int? StartPage { get; init; } // if null, resume from checkpoint or 1
public int? EndPage { get; init; } // optional cap
public int? PageSize { get; init; } // override default
public bool Watch { get; init; } // loop forever
public TimeSpan Interval { get; init; } = TimeSpan.FromMinutes(5);
}

109
JSMR.Worker/Program.cs Normal file
View File

@@ -0,0 +1,109 @@
using JSMR.Application.DI;
using JSMR.Infrastructure.Data;
using JSMR.Infrastructure.DI;
using JSMR.Worker.Options;
using JSMR.Worker.Services;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Configuration;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Hosting;
using System.CommandLine;
HostApplicationBuilder builder = Host.CreateApplicationBuilder(args);
//builder.Services.AddSerilog(o => o
// .WriteTo.Console()
// .MinimumLevel.Information());
builder.Services
.AddApplication()
.AddInfrastructure();
string connectionString = builder.Configuration.GetConnectionString("AppDb")
?? throw new InvalidOperationException("Missing ConnectionStrings:AppDb2");
builder.Services.AddDbContextFactory<AppDbContext>(optionsBuilder =>
optionsBuilder
.UseMySql(connectionString, ServerVersion.AutoDetect(connectionString))
.EnableSensitiveDataLogging(false));
// Worker services
builder.Services.AddSingleton<ICheckpointStore, FileCheckpointStore>();
builder.Services.AddTransient<PagedScanRunner>();
RootCommand rootCommand = new("JSMR worker");
Command scan = new("scan", "Scan and update the database");
Option<string?> localeOption = new("--locale", "-l")
{
Description = "Locale (Japanese/English)",
Required = false
};
Option<int?> startOption = new("--start", "-s")
{
Description = "Start page (default = checkpoint+1 or 1)"
};
Option<int?> endOption = new("--end", "-e")
{
Description = "End page (optional)"
};
Option<int?> sizeOption = new("--pageSize", "-ps")
{
Description = "Page size (default from config or 100)",
DefaultValueFactory = _ => 100
};
Option<bool> watchOption = new("--watch", "-w")
{
Description = "Loop forever",
DefaultValueFactory = _ => false
};
Option<TimeSpan> everyOption = new("--every", "-e")
{
Description = "Interval when --watch is set",
DefaultValueFactory = _ => TimeSpan.FromMinutes(5)
};
scan.Add(localeOption);
scan.Add(startOption);
scan.Add(endOption);
scan.Add(sizeOption);
scan.Add(watchOption);
scan.Add(everyOption);
scan.SetAction(async (parseResult, cancellationToken) =>
{
using var host = builder.Build();
var runner = host.Services.GetRequiredService<PagedScanRunner>();
ScanOptions options = new()
{
Locale = parseResult.GetValue(localeOption),
StartPage = parseResult.GetValue(startOption),
EndPage = parseResult.GetValue(endOption),
PageSize = parseResult.GetValue(sizeOption),
Watch = parseResult.GetValue(watchOption),
Interval = parseResult.GetValue(everyOption)
};
using CancellationTokenSource cancellationTokenSource = new();
Console.CancelKeyPress += (_, eventArgs) =>
{
eventArgs.Cancel = true;
cancellationTokenSource.Cancel();
};
await runner.RunAsync(options, cancellationTokenSource.Token);
});
rootCommand.Add(scan);
//rootCommand.SetAction(async (parseResult, cancellationToken) => await rootCommand.InvokeAsync("scan"));
return await rootCommand.Parse(args).InvokeAsync();

View File

@@ -0,0 +1,28 @@
namespace JSMR.Worker.Services;
public sealed class FileCheckpointStore : ICheckpointStore
{
private readonly string _root = Path.Combine(AppContext.BaseDirectory, "State");
public FileCheckpointStore() => Directory.CreateDirectory(_root);
public Task<int?> GetLastPageAsync(string locale, CancellationToken ct)
{
string path = Path.Combine(_root, $"scan.{locale}.page");
if (!File.Exists(path))
return Task.FromResult<int?>(null);
if (int.TryParse(File.ReadAllText(path).Trim(), out var n))
return Task.FromResult<int?>(n);
return Task.FromResult<int?>(null);
}
public Task SaveLastPageAsync(string locale, int page, CancellationToken ct)
{
string path = Path.Combine(_root, $"scan.{locale}.page");
File.WriteAllText(path, page.ToString());
return Task.CompletedTask;
}
}

View File

@@ -0,0 +1,7 @@
namespace JSMR.Worker.Services;
public interface ICheckpointStore
{
Task<int?> GetLastPageAsync(string locale, CancellationToken cancellationToken);
Task SaveLastPageAsync(string locale, int page, CancellationToken cancellationToken);
}

View File

@@ -0,0 +1,28 @@
using JSMR.Application.Enums;
using JSMR.Application.Scanning;
using JSMR.Worker.Options;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Options;
namespace JSMR.Worker.Services;
public sealed class ScanJob(ILogger<ScanJob> log, IOptions<ScanOptions> options, ScanVoiceWorksHandler scanVoiceWorksHandler)
{
private readonly ScanOptions _options = options.Value;
public async Task RunOnceAsync(CancellationToken cancellationToken)
{
log.LogInformation("Starting scan: Locale={Locale}, Start Page={StartPage}, EndPage={EndPage}",
_options.Locale, _options.StartPage, _options.EndPage);
ScanVoiceWorksRequest request = new(
PageNumber: 1,
PageSize: 100,
Locale: Enum.Parse<Locale>(_options.Locale, true)
);
await scanVoiceWorksHandler.HandleAsync(request, cancellationToken);
log.LogInformation("Scan completed.");
}
}

View File

@@ -0,0 +1,59 @@
using JSMR.Application.Enums;
using JSMR.Application.Scanning;
using JSMR.Worker.Options;
using Microsoft.Extensions.DependencyInjection;
using Microsoft.Extensions.Logging;
namespace JSMR.Worker.Services;
public sealed class PagedScanRunner(
ILogger<PagedScanRunner> log,
IServiceProvider serviceProvider,
ICheckpointStore checkpoints)
{
public async Task RunAsync(ScanOptions options, CancellationToken cancellationToken)
{
if (Enum.TryParse(options.Locale, ignoreCase: true, out Locale locale) == false)
throw new ArgumentException($"Unknown locale '{options.Locale}'.");
int pageSize = options.PageSize ?? 100;
int startPage = options.StartPage
?? (await checkpoints.GetLastPageAsync(options.Locale, cancellationToken)).GetValueOrDefault(0) + 1;
while (!cancellationToken.IsCancellationRequested)
{
int currentPage = startPage;
int? end = options.EndPage;
// Iterate until empty page or end reached
for (; !cancellationToken.IsCancellationRequested && (!end.HasValue || currentPage <= end.Value); currentPage++)
{
ScanVoiceWorksHandler handler = serviceProvider.GetRequiredService<ScanVoiceWorksHandler>();
log.LogInformation("Scanning page {Page} (size {Size}, locale {Locale})…", currentPage, pageSize, locale);
ScanVoiceWorksRequest request = new(
PageNumber: currentPage,
PageSize: 100,
Locale: locale
);
ScanVoiceWorksResponse response = await handler.HandleAsync(request, cancellationToken);
// Save checkpoint
await checkpoints.SaveLastPageAsync(options.Locale, currentPage, cancellationToken);
}
if (!options.Watch) break;
log.LogInformation("Watch mode: sleeping {Interval}…", options.Interval);
await Task.Delay(options.Interval, cancellationToken);
// Compute next “start” for next cycle:
// - If you want to re-scan the latest N pages every loop to catch late updates,
// modify logic here (e.g., start = Math.Max(1, current - 2))
startPage = currentPage; // continue from where we left off
}
}
}

View File

@@ -15,6 +15,8 @@ Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.Api", "JSMR.Api\JSMR.A
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.UI.Blazor", "JSMR.UI.Blazor\JSMR.UI.Blazor.csproj", "{074919FA-5234-4AE6-9E37-7A02F2BD3EED}" Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.UI.Blazor", "JSMR.UI.Blazor\JSMR.UI.Blazor.csproj", "{074919FA-5234-4AE6-9E37-7A02F2BD3EED}"
EndProject EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "JSMR.Worker", "JSMR.Worker\JSMR.Worker.csproj", "{964BD375-FAE3-4044-A09B-5C43919C9B52}"
EndProject
Global Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU Debug|Any CPU = Debug|Any CPU
@@ -45,6 +47,10 @@ Global
{074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Debug|Any CPU.Build.0 = Debug|Any CPU {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Debug|Any CPU.Build.0 = Debug|Any CPU
{074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.ActiveCfg = Release|Any CPU {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.ActiveCfg = Release|Any CPU
{074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.Build.0 = Release|Any CPU {074919FA-5234-4AE6-9E37-7A02F2BD3EED}.Release|Any CPU.Build.0 = Release|Any CPU
{964BD375-FAE3-4044-A09B-5C43919C9B52}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{964BD375-FAE3-4044-A09B-5C43919C9B52}.Debug|Any CPU.Build.0 = Debug|Any CPU
{964BD375-FAE3-4044-A09B-5C43919C9B52}.Release|Any CPU.ActiveCfg = Release|Any CPU
{964BD375-FAE3-4044-A09B-5C43919C9B52}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection EndGlobalSection
GlobalSection(SolutionProperties) = preSolution GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE HideSolutionNode = FALSE