Updated web crawler logic.

This commit is contained in:
2025-05-26 22:55:03 -04:00
parent c73209ed36
commit f3404f8a2e
11 changed files with 86 additions and 51 deletions

View File

@@ -1,6 +1,8 @@
namespace MangaReader.Core.Metadata;
using MangaReader.Core.Sources;
public interface IMangaMetadataProvider
namespace MangaReader.Core.Metadata;
public interface IMangaMetadataProvider : IMangaSourceComponent
{
SourceManga GetManga(string url);
Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
}

View File

@@ -4,15 +4,18 @@ namespace MangaReader.Core.Metadata;
public abstract class MangaWebCrawler : IMangaMetadataProvider
{
public abstract SourceManga GetManga(string url);
public abstract string SourceId { get; }
public abstract Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
protected virtual HtmlDocument GetHtmlDocument(string url)
protected virtual async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
{
HtmlWeb web = new()
{
UsingCacheIfExists = false
};
return web.Load(url);
//return web.Load(url);
return await web.LoadFromWebAsync(url, cancellationToken);
}
}

View File

@@ -1,6 +1,8 @@
namespace MangaReader.Core.Search;
using MangaReader.Core.Sources;
public interface IMangaSearchProvider
namespace MangaReader.Core.Search;
public interface IMangaSearchProvider : IMangaSourceComponent
{
Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken);
}

View File

@@ -5,6 +5,8 @@ namespace MangaReader.Core.Search;
public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider
{
public abstract string SourceId { get;}
private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
{
PropertyNameCaseInsensitive = true

View File

@@ -1,36 +1,32 @@
using MangaReader.Core.HttpService;
using MangaReader.Core.Metadata;
using MangaReader.Core.Metadata;
using MangaReader.Core.Sources.MangaDex.Api;
namespace MangaReader.Core.Sources.MangaDex.Metadata;
//public class MangaDexMetadataProvider(IHttpService httpService) : IMangaMetadataProvider, IMangaSourceComponent
//{
// public string SourceId => "MangaDex";
public class MangaDexMetadataProvider(IMangaDexClient mangaDexClient) : IMangaMetadataProvider
{
public string SourceId => "MangaDex";
// public async Task<SourceManga> GetManga(string url)
// {
// Guid mangaGuid = GetSourceMangaGuid(url);
// await GetSomething(mangaGuid);
public async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{
Guid mangaGuid = GetSourceMangaGuid(url);
MangaDexResponse? mangaDexResponse = await mangaDexClient.GetMangaAsync(mangaGuid, cancellationToken);
// throw new NotImplementedException();
// }
if (mangaDexResponse == null)
return null;
// private static Guid GetSourceMangaGuid(string url)
// {
// string[] parts = url.Split('/');
throw new NotImplementedException();
}
// if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
// {
// throw new Exception("Unable to get guid from MangaDex url: " + url);
// }
private static Guid GetSourceMangaGuid(string url)
{
string[] parts = url.Split('/');
// return mangaGuid;
// }
if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
{
throw new Exception("Unable to get guid from MangaDex url: " + url);
}
// private async Task GetSomething(Guid mangaGuid)
// {
// // https://api.mangadex.org/manga/ee96e2b7-9af2-4864-9656-649f4d3b6fec?includes[]=artist&includes[]=author&includes[]=cover_art
// await httpService.GetStringAsync($"https://api.mangadex.org/manga/{mangaGuid}/feed?translatedLanguage[]=en&limit=96&includes[]=scanlation_group&includes[]=user&order[volume]=desc&order[chapter]=desc&offset=0&contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic");
// }
//}
return mangaGuid;
}
}

View File

@@ -4,7 +4,7 @@ using System.Text.RegularExpressions;
namespace MangaReader.Core.Sources.MangaDex.Search;
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider, IMangaSourceComponent
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider
{
[GeneratedRegex(@"[^a-z0-9\s-]")]
private static partial Regex InvalidSlugCharactersRegex();

View File

@@ -7,9 +7,11 @@ namespace MangaReader.Core.Sources.MangaNato.Metadata;
public class MangaNatoWebCrawler : MangaWebCrawler
{
public override SourceManga GetManga(string url)
public override string SourceId => "MangaNato";
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{
HtmlDocument document = GetHtmlDocument(url);
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
MangaNatoMangaDocument node = new(document);
SourceManga manga = new()

View File

@@ -3,13 +3,13 @@ using MangaReader.Core.Metadata;
namespace MangaReader.Core.Sources.NatoManga.Metadata;
public class NatoMangaWebCrawler : MangaWebCrawler, IMangaSourceComponent
public class NatoMangaWebCrawler : MangaWebCrawler
{
public string SourceId => "NatoManga";
public override string SourceId => "NatoManga";
public override SourceManga GetManga(string url)
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{
HtmlDocument document = GetHtmlDocument(url);
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
NatoMangaHtmlDocument node = new(document);
SourceManga manga = new()

View File

@@ -3,7 +3,7 @@ using MangaReader.Core.Sources.NatoManga.Api;
namespace MangaReader.Core.Sources.NatoManga.Search;
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider, IMangaSourceComponent
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider
{
public string SourceId => "NatoManga";