Updated web crawler logic.

This commit is contained in:
2025-05-26 22:55:03 -04:00
parent c73209ed36
commit f3404f8a2e
11 changed files with 86 additions and 51 deletions

View File

@@ -1,6 +1,8 @@
namespace MangaReader.Core.Metadata; using MangaReader.Core.Sources;
public interface IMangaMetadataProvider namespace MangaReader.Core.Metadata;
public interface IMangaMetadataProvider : IMangaSourceComponent
{ {
SourceManga GetManga(string url); Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
} }

View File

@@ -4,15 +4,18 @@ namespace MangaReader.Core.Metadata;
public abstract class MangaWebCrawler : IMangaMetadataProvider public abstract class MangaWebCrawler : IMangaMetadataProvider
{ {
public abstract SourceManga GetManga(string url); public abstract string SourceId { get; }
public abstract Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
protected virtual HtmlDocument GetHtmlDocument(string url) protected virtual async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{ {
HtmlWeb web = new() HtmlWeb web = new()
{ {
UsingCacheIfExists = false UsingCacheIfExists = false
}; };
return web.Load(url); //return web.Load(url);
return await web.LoadFromWebAsync(url, cancellationToken);
} }
} }

View File

@@ -1,6 +1,8 @@
namespace MangaReader.Core.Search; using MangaReader.Core.Sources;
public interface IMangaSearchProvider namespace MangaReader.Core.Search;
public interface IMangaSearchProvider : IMangaSourceComponent
{ {
Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken); Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken);
} }

View File

@@ -5,6 +5,8 @@ namespace MangaReader.Core.Search;
public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider
{ {
public abstract string SourceId { get;}
private static readonly JsonSerializerOptions _jsonSerializerOptions = new() private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
{ {
PropertyNameCaseInsensitive = true PropertyNameCaseInsensitive = true

View File

@@ -1,36 +1,32 @@
using MangaReader.Core.HttpService; using MangaReader.Core.Metadata;
using MangaReader.Core.Metadata; using MangaReader.Core.Sources.MangaDex.Api;
namespace MangaReader.Core.Sources.MangaDex.Metadata; namespace MangaReader.Core.Sources.MangaDex.Metadata;
//public class MangaDexMetadataProvider(IHttpService httpService) : IMangaMetadataProvider, IMangaSourceComponent public class MangaDexMetadataProvider(IMangaDexClient mangaDexClient) : IMangaMetadataProvider
//{ {
// public string SourceId => "MangaDex"; public string SourceId => "MangaDex";
// public async Task<SourceManga> GetManga(string url) public async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
// { {
// Guid mangaGuid = GetSourceMangaGuid(url); Guid mangaGuid = GetSourceMangaGuid(url);
// await GetSomething(mangaGuid); MangaDexResponse? mangaDexResponse = await mangaDexClient.GetMangaAsync(mangaGuid, cancellationToken);
// throw new NotImplementedException(); if (mangaDexResponse == null)
// } return null;
// private static Guid GetSourceMangaGuid(string url) throw new NotImplementedException();
// { }
// string[] parts = url.Split('/');
// if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false) private static Guid GetSourceMangaGuid(string url)
// { {
// throw new Exception("Unable to get guid from MangaDex url: " + url); string[] parts = url.Split('/');
// }
// return mangaGuid; if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
// } {
throw new Exception("Unable to get guid from MangaDex url: " + url);
}
// private async Task GetSomething(Guid mangaGuid) return mangaGuid;
// { }
// // https://api.mangadex.org/manga/ee96e2b7-9af2-4864-9656-649f4d3b6fec?includes[]=artist&includes[]=author&includes[]=cover_art }
// await httpService.GetStringAsync($"https://api.mangadex.org/manga/{mangaGuid}/feed?translatedLanguage[]=en&limit=96&includes[]=scanlation_group&includes[]=user&order[volume]=desc&order[chapter]=desc&offset=0&contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic");
// }
//}

View File

@@ -4,7 +4,7 @@ using System.Text.RegularExpressions;
namespace MangaReader.Core.Sources.MangaDex.Search; namespace MangaReader.Core.Sources.MangaDex.Search;
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider, IMangaSourceComponent public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider
{ {
[GeneratedRegex(@"[^a-z0-9\s-]")] [GeneratedRegex(@"[^a-z0-9\s-]")]
private static partial Regex InvalidSlugCharactersRegex(); private static partial Regex InvalidSlugCharactersRegex();

View File

@@ -7,9 +7,11 @@ namespace MangaReader.Core.Sources.MangaNato.Metadata;
public class MangaNatoWebCrawler : MangaWebCrawler public class MangaNatoWebCrawler : MangaWebCrawler
{ {
public override SourceManga GetManga(string url) public override string SourceId => "MangaNato";
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{ {
HtmlDocument document = GetHtmlDocument(url); HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
MangaNatoMangaDocument node = new(document); MangaNatoMangaDocument node = new(document);
SourceManga manga = new() SourceManga manga = new()

View File

@@ -3,13 +3,13 @@ using MangaReader.Core.Metadata;
namespace MangaReader.Core.Sources.NatoManga.Metadata; namespace MangaReader.Core.Sources.NatoManga.Metadata;
public class NatoMangaWebCrawler : MangaWebCrawler, IMangaSourceComponent public class NatoMangaWebCrawler : MangaWebCrawler
{ {
public string SourceId => "NatoManga"; public override string SourceId => "NatoManga";
public override SourceManga GetManga(string url) public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{ {
HtmlDocument document = GetHtmlDocument(url); HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
NatoMangaHtmlDocument node = new(document); NatoMangaHtmlDocument node = new(document);
SourceManga manga = new() SourceManga manga = new()

View File

@@ -3,7 +3,7 @@ using MangaReader.Core.Sources.NatoManga.Api;
namespace MangaReader.Core.Sources.NatoManga.Search; namespace MangaReader.Core.Sources.NatoManga.Search;
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider, IMangaSourceComponent public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider
{ {
public string SourceId => "NatoManga"; public string SourceId => "NatoManga";

View File

@@ -1,17 +1,31 @@
using MangaReader.Core.Sources.NatoManga.Metadata; using HtmlAgilityPack;
using MangaReader.Core.Sources.NatoManga.Metadata;
using Shouldly; using Shouldly;
namespace MangaReader.Tests.WebCrawlers.NatoManga; namespace MangaReader.Tests.WebCrawlers.NatoManga;
public class NatoMangaWebCrawlerTests public class NatoMangaWebCrawlerTests
{ {
class TestNatoMangaWebCrawler : NatoMangaWebCrawler
{
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{
HtmlWeb web = new()
{
UsingCacheIfExists = false
};
return Task.FromResult(web.Load(url));
}
}
[Fact] [Fact]
public void Get_Manga() public async Task Get_Manga()
{ {
string sampleFilePath = Path.Combine(AppContext.BaseDirectory, "WebCrawlers", "NatoManga", "SampleMangaPage.html"); string sampleFilePath = Path.Combine(AppContext.BaseDirectory, "WebCrawlers", "NatoManga", "SampleMangaPage.html");
var webCrawler = new NatoMangaWebCrawler(); var webCrawler = new TestNatoMangaWebCrawler();
var manga = webCrawler.GetManga(sampleFilePath); var manga = await webCrawler.GetMangaAsync(sampleFilePath, CancellationToken.None);
manga.ShouldNotBeNull(); manga.ShouldNotBeNull();

View File

@@ -1,4 +1,5 @@
using MangaReader.Core.Metadata; using HtmlAgilityPack;
using MangaReader.Core.Metadata;
using MangaReader.Core.Sources.MangaNato.Metadata; using MangaReader.Core.Sources.MangaNato.Metadata;
using Shouldly; using Shouldly;
@@ -6,6 +7,19 @@ namespace MangaReader.Tests.WebCrawlers;
public class UnitTest1 public class UnitTest1
{ {
class TestMangaNatoWebCrawler : MangaNatoWebCrawler
{
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{
HtmlWeb web = new()
{
UsingCacheIfExists = false
};
return Task.FromResult(web.Load(url));
}
}
private readonly string samplesPath; private readonly string samplesPath;
private readonly string mangaNatoSampleFilePath; private readonly string mangaNatoSampleFilePath;
@@ -16,10 +30,10 @@ public class UnitTest1
} }
[Fact] [Fact]
public void Get_Manga() public async Task Get_Manga()
{ {
var webCrawler = new MangaNatoWebCrawler(); var webCrawler = new TestMangaNatoWebCrawler();
var manga = webCrawler.GetManga(mangaNatoSampleFilePath); var manga = await webCrawler.GetMangaAsync(mangaNatoSampleFilePath, CancellationToken.None);
manga.ShouldNotBeNull(); manga.ShouldNotBeNull();