Updated web crawler logic.
This commit is contained in:
@@ -1,6 +1,8 @@
|
||||
namespace MangaReader.Core.Metadata;
|
||||
using MangaReader.Core.Sources;
|
||||
|
||||
public interface IMangaMetadataProvider
|
||||
namespace MangaReader.Core.Metadata;
|
||||
|
||||
public interface IMangaMetadataProvider : IMangaSourceComponent
|
||||
{
|
||||
SourceManga GetManga(string url);
|
||||
Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -4,15 +4,18 @@ namespace MangaReader.Core.Metadata;
|
||||
|
||||
public abstract class MangaWebCrawler : IMangaMetadataProvider
|
||||
{
|
||||
public abstract SourceManga GetManga(string url);
|
||||
public abstract string SourceId { get; }
|
||||
public abstract Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
|
||||
|
||||
protected virtual HtmlDocument GetHtmlDocument(string url)
|
||||
protected virtual async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
HtmlWeb web = new()
|
||||
{
|
||||
UsingCacheIfExists = false
|
||||
};
|
||||
|
||||
return web.Load(url);
|
||||
//return web.Load(url);
|
||||
|
||||
return await web.LoadFromWebAsync(url, cancellationToken);
|
||||
}
|
||||
}
|
||||
@@ -1,6 +1,8 @@
|
||||
namespace MangaReader.Core.Search;
|
||||
using MangaReader.Core.Sources;
|
||||
|
||||
public interface IMangaSearchProvider
|
||||
namespace MangaReader.Core.Search;
|
||||
|
||||
public interface IMangaSearchProvider : IMangaSourceComponent
|
||||
{
|
||||
Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken);
|
||||
}
|
||||
@@ -5,6 +5,8 @@ namespace MangaReader.Core.Search;
|
||||
|
||||
public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider
|
||||
{
|
||||
public abstract string SourceId { get;}
|
||||
|
||||
private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
|
||||
{
|
||||
PropertyNameCaseInsensitive = true
|
||||
|
||||
@@ -1,36 +1,32 @@
|
||||
using MangaReader.Core.HttpService;
|
||||
using MangaReader.Core.Metadata;
|
||||
using MangaReader.Core.Metadata;
|
||||
using MangaReader.Core.Sources.MangaDex.Api;
|
||||
|
||||
namespace MangaReader.Core.Sources.MangaDex.Metadata;
|
||||
|
||||
//public class MangaDexMetadataProvider(IHttpService httpService) : IMangaMetadataProvider, IMangaSourceComponent
|
||||
//{
|
||||
// public string SourceId => "MangaDex";
|
||||
public class MangaDexMetadataProvider(IMangaDexClient mangaDexClient) : IMangaMetadataProvider
|
||||
{
|
||||
public string SourceId => "MangaDex";
|
||||
|
||||
// public async Task<SourceManga> GetManga(string url)
|
||||
// {
|
||||
// Guid mangaGuid = GetSourceMangaGuid(url);
|
||||
// await GetSomething(mangaGuid);
|
||||
public async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
Guid mangaGuid = GetSourceMangaGuid(url);
|
||||
MangaDexResponse? mangaDexResponse = await mangaDexClient.GetMangaAsync(mangaGuid, cancellationToken);
|
||||
|
||||
// throw new NotImplementedException();
|
||||
// }
|
||||
if (mangaDexResponse == null)
|
||||
return null;
|
||||
|
||||
// private static Guid GetSourceMangaGuid(string url)
|
||||
// {
|
||||
// string[] parts = url.Split('/');
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
|
||||
// if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
|
||||
// {
|
||||
// throw new Exception("Unable to get guid from MangaDex url: " + url);
|
||||
// }
|
||||
private static Guid GetSourceMangaGuid(string url)
|
||||
{
|
||||
string[] parts = url.Split('/');
|
||||
|
||||
// return mangaGuid;
|
||||
// }
|
||||
if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
|
||||
{
|
||||
throw new Exception("Unable to get guid from MangaDex url: " + url);
|
||||
}
|
||||
|
||||
// private async Task GetSomething(Guid mangaGuid)
|
||||
// {
|
||||
// // https://api.mangadex.org/manga/ee96e2b7-9af2-4864-9656-649f4d3b6fec?includes[]=artist&includes[]=author&includes[]=cover_art
|
||||
|
||||
// await httpService.GetStringAsync($"https://api.mangadex.org/manga/{mangaGuid}/feed?translatedLanguage[]=en&limit=96&includes[]=scanlation_group&includes[]=user&order[volume]=desc&order[chapter]=desc&offset=0&contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic");
|
||||
// }
|
||||
//}
|
||||
return mangaGuid;
|
||||
}
|
||||
}
|
||||
@@ -4,7 +4,7 @@ using System.Text.RegularExpressions;
|
||||
|
||||
namespace MangaReader.Core.Sources.MangaDex.Search;
|
||||
|
||||
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider, IMangaSourceComponent
|
||||
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider
|
||||
{
|
||||
[GeneratedRegex(@"[^a-z0-9\s-]")]
|
||||
private static partial Regex InvalidSlugCharactersRegex();
|
||||
|
||||
@@ -7,9 +7,11 @@ namespace MangaReader.Core.Sources.MangaNato.Metadata;
|
||||
|
||||
public class MangaNatoWebCrawler : MangaWebCrawler
|
||||
{
|
||||
public override SourceManga GetManga(string url)
|
||||
public override string SourceId => "MangaNato";
|
||||
|
||||
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
HtmlDocument document = GetHtmlDocument(url);
|
||||
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
|
||||
MangaNatoMangaDocument node = new(document);
|
||||
|
||||
SourceManga manga = new()
|
||||
|
||||
@@ -3,13 +3,13 @@ using MangaReader.Core.Metadata;
|
||||
|
||||
namespace MangaReader.Core.Sources.NatoManga.Metadata;
|
||||
|
||||
public class NatoMangaWebCrawler : MangaWebCrawler, IMangaSourceComponent
|
||||
public class NatoMangaWebCrawler : MangaWebCrawler
|
||||
{
|
||||
public string SourceId => "NatoManga";
|
||||
public override string SourceId => "NatoManga";
|
||||
|
||||
public override SourceManga GetManga(string url)
|
||||
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
HtmlDocument document = GetHtmlDocument(url);
|
||||
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
|
||||
NatoMangaHtmlDocument node = new(document);
|
||||
|
||||
SourceManga manga = new()
|
||||
|
||||
@@ -3,7 +3,7 @@ using MangaReader.Core.Sources.NatoManga.Api;
|
||||
|
||||
namespace MangaReader.Core.Sources.NatoManga.Search;
|
||||
|
||||
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider, IMangaSourceComponent
|
||||
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider
|
||||
{
|
||||
public string SourceId => "NatoManga";
|
||||
|
||||
|
||||
@@ -1,17 +1,31 @@
|
||||
using MangaReader.Core.Sources.NatoManga.Metadata;
|
||||
using HtmlAgilityPack;
|
||||
using MangaReader.Core.Sources.NatoManga.Metadata;
|
||||
using Shouldly;
|
||||
|
||||
namespace MangaReader.Tests.WebCrawlers.NatoManga;
|
||||
|
||||
public class NatoMangaWebCrawlerTests
|
||||
{
|
||||
class TestNatoMangaWebCrawler : NatoMangaWebCrawler
|
||||
{
|
||||
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
HtmlWeb web = new()
|
||||
{
|
||||
UsingCacheIfExists = false
|
||||
};
|
||||
|
||||
return Task.FromResult(web.Load(url));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Get_Manga()
|
||||
public async Task Get_Manga()
|
||||
{
|
||||
string sampleFilePath = Path.Combine(AppContext.BaseDirectory, "WebCrawlers", "NatoManga", "SampleMangaPage.html");
|
||||
|
||||
var webCrawler = new NatoMangaWebCrawler();
|
||||
var manga = webCrawler.GetManga(sampleFilePath);
|
||||
var webCrawler = new TestNatoMangaWebCrawler();
|
||||
var manga = await webCrawler.GetMangaAsync(sampleFilePath, CancellationToken.None);
|
||||
|
||||
manga.ShouldNotBeNull();
|
||||
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
using MangaReader.Core.Metadata;
|
||||
using HtmlAgilityPack;
|
||||
using MangaReader.Core.Metadata;
|
||||
using MangaReader.Core.Sources.MangaNato.Metadata;
|
||||
using Shouldly;
|
||||
|
||||
@@ -6,6 +7,19 @@ namespace MangaReader.Tests.WebCrawlers;
|
||||
|
||||
public class UnitTest1
|
||||
{
|
||||
class TestMangaNatoWebCrawler : MangaNatoWebCrawler
|
||||
{
|
||||
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||
{
|
||||
HtmlWeb web = new()
|
||||
{
|
||||
UsingCacheIfExists = false
|
||||
};
|
||||
|
||||
return Task.FromResult(web.Load(url));
|
||||
}
|
||||
}
|
||||
|
||||
private readonly string samplesPath;
|
||||
private readonly string mangaNatoSampleFilePath;
|
||||
|
||||
@@ -16,10 +30,10 @@ public class UnitTest1
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void Get_Manga()
|
||||
public async Task Get_Manga()
|
||||
{
|
||||
var webCrawler = new MangaNatoWebCrawler();
|
||||
var manga = webCrawler.GetManga(mangaNatoSampleFilePath);
|
||||
var webCrawler = new TestMangaNatoWebCrawler();
|
||||
var manga = await webCrawler.GetMangaAsync(mangaNatoSampleFilePath, CancellationToken.None);
|
||||
|
||||
manga.ShouldNotBeNull();
|
||||
|
||||
|
||||
Reference in New Issue
Block a user