Updated web crawler logic.
This commit is contained in:
@@ -1,6 +1,8 @@
|
|||||||
namespace MangaReader.Core.Metadata;
|
using MangaReader.Core.Sources;
|
||||||
|
|
||||||
public interface IMangaMetadataProvider
|
namespace MangaReader.Core.Metadata;
|
||||||
|
|
||||||
|
public interface IMangaMetadataProvider : IMangaSourceComponent
|
||||||
{
|
{
|
||||||
SourceManga GetManga(string url);
|
Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
|
||||||
}
|
}
|
||||||
@@ -4,15 +4,18 @@ namespace MangaReader.Core.Metadata;
|
|||||||
|
|
||||||
public abstract class MangaWebCrawler : IMangaMetadataProvider
|
public abstract class MangaWebCrawler : IMangaMetadataProvider
|
||||||
{
|
{
|
||||||
public abstract SourceManga GetManga(string url);
|
public abstract string SourceId { get; }
|
||||||
|
public abstract Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken);
|
||||||
|
|
||||||
protected virtual HtmlDocument GetHtmlDocument(string url)
|
protected virtual async Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
HtmlWeb web = new()
|
HtmlWeb web = new()
|
||||||
{
|
{
|
||||||
UsingCacheIfExists = false
|
UsingCacheIfExists = false
|
||||||
};
|
};
|
||||||
|
|
||||||
return web.Load(url);
|
//return web.Load(url);
|
||||||
|
|
||||||
|
return await web.LoadFromWebAsync(url, cancellationToken);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -1,6 +1,8 @@
|
|||||||
namespace MangaReader.Core.Search;
|
using MangaReader.Core.Sources;
|
||||||
|
|
||||||
public interface IMangaSearchProvider
|
namespace MangaReader.Core.Search;
|
||||||
|
|
||||||
|
public interface IMangaSearchProvider : IMangaSourceComponent
|
||||||
{
|
{
|
||||||
Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken);
|
Task<MangaSearchResult[]> SearchAsync(string keyword, CancellationToken cancellationToken);
|
||||||
}
|
}
|
||||||
@@ -5,6 +5,8 @@ namespace MangaReader.Core.Search;
|
|||||||
|
|
||||||
public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider
|
public abstract class MangaSearchProviderBase<T>(IHttpService httpService) : IMangaSearchProvider
|
||||||
{
|
{
|
||||||
|
public abstract string SourceId { get;}
|
||||||
|
|
||||||
private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
|
private static readonly JsonSerializerOptions _jsonSerializerOptions = new()
|
||||||
{
|
{
|
||||||
PropertyNameCaseInsensitive = true
|
PropertyNameCaseInsensitive = true
|
||||||
|
|||||||
@@ -1,36 +1,32 @@
|
|||||||
using MangaReader.Core.HttpService;
|
using MangaReader.Core.Metadata;
|
||||||
using MangaReader.Core.Metadata;
|
using MangaReader.Core.Sources.MangaDex.Api;
|
||||||
|
|
||||||
namespace MangaReader.Core.Sources.MangaDex.Metadata;
|
namespace MangaReader.Core.Sources.MangaDex.Metadata;
|
||||||
|
|
||||||
//public class MangaDexMetadataProvider(IHttpService httpService) : IMangaMetadataProvider, IMangaSourceComponent
|
public class MangaDexMetadataProvider(IMangaDexClient mangaDexClient) : IMangaMetadataProvider
|
||||||
//{
|
{
|
||||||
// public string SourceId => "MangaDex";
|
public string SourceId => "MangaDex";
|
||||||
|
|
||||||
// public async Task<SourceManga> GetManga(string url)
|
public async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||||
// {
|
{
|
||||||
// Guid mangaGuid = GetSourceMangaGuid(url);
|
Guid mangaGuid = GetSourceMangaGuid(url);
|
||||||
// await GetSomething(mangaGuid);
|
MangaDexResponse? mangaDexResponse = await mangaDexClient.GetMangaAsync(mangaGuid, cancellationToken);
|
||||||
|
|
||||||
// throw new NotImplementedException();
|
if (mangaDexResponse == null)
|
||||||
// }
|
return null;
|
||||||
|
|
||||||
// private static Guid GetSourceMangaGuid(string url)
|
throw new NotImplementedException();
|
||||||
// {
|
}
|
||||||
// string[] parts = url.Split('/');
|
|
||||||
|
|
||||||
// if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
|
private static Guid GetSourceMangaGuid(string url)
|
||||||
// {
|
{
|
||||||
// throw new Exception("Unable to get guid from MangaDex url: " + url);
|
string[] parts = url.Split('/');
|
||||||
// }
|
|
||||||
|
|
||||||
// return mangaGuid;
|
if (parts.Length < 5 || Guid.TryParse(parts[4], out Guid mangaGuid) == false)
|
||||||
// }
|
{
|
||||||
|
throw new Exception("Unable to get guid from MangaDex url: " + url);
|
||||||
|
}
|
||||||
|
|
||||||
// private async Task GetSomething(Guid mangaGuid)
|
return mangaGuid;
|
||||||
// {
|
}
|
||||||
// // https://api.mangadex.org/manga/ee96e2b7-9af2-4864-9656-649f4d3b6fec?includes[]=artist&includes[]=author&includes[]=cover_art
|
}
|
||||||
|
|
||||||
// await httpService.GetStringAsync($"https://api.mangadex.org/manga/{mangaGuid}/feed?translatedLanguage[]=en&limit=96&includes[]=scanlation_group&includes[]=user&order[volume]=desc&order[chapter]=desc&offset=0&contentRating[]=safe&contentRating[]=suggestive&contentRating[]=erotica&contentRating[]=pornographic");
|
|
||||||
// }
|
|
||||||
//}
|
|
||||||
@@ -4,7 +4,7 @@ using System.Text.RegularExpressions;
|
|||||||
|
|
||||||
namespace MangaReader.Core.Sources.MangaDex.Search;
|
namespace MangaReader.Core.Sources.MangaDex.Search;
|
||||||
|
|
||||||
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider, IMangaSourceComponent
|
public partial class MangaDexSearchProvider(IMangaDexClient mangaDexClient) : IMangaSearchProvider
|
||||||
{
|
{
|
||||||
[GeneratedRegex(@"[^a-z0-9\s-]")]
|
[GeneratedRegex(@"[^a-z0-9\s-]")]
|
||||||
private static partial Regex InvalidSlugCharactersRegex();
|
private static partial Regex InvalidSlugCharactersRegex();
|
||||||
|
|||||||
@@ -7,9 +7,11 @@ namespace MangaReader.Core.Sources.MangaNato.Metadata;
|
|||||||
|
|
||||||
public class MangaNatoWebCrawler : MangaWebCrawler
|
public class MangaNatoWebCrawler : MangaWebCrawler
|
||||||
{
|
{
|
||||||
public override SourceManga GetManga(string url)
|
public override string SourceId => "MangaNato";
|
||||||
|
|
||||||
|
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
HtmlDocument document = GetHtmlDocument(url);
|
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
|
||||||
MangaNatoMangaDocument node = new(document);
|
MangaNatoMangaDocument node = new(document);
|
||||||
|
|
||||||
SourceManga manga = new()
|
SourceManga manga = new()
|
||||||
|
|||||||
@@ -3,13 +3,13 @@ using MangaReader.Core.Metadata;
|
|||||||
|
|
||||||
namespace MangaReader.Core.Sources.NatoManga.Metadata;
|
namespace MangaReader.Core.Sources.NatoManga.Metadata;
|
||||||
|
|
||||||
public class NatoMangaWebCrawler : MangaWebCrawler, IMangaSourceComponent
|
public class NatoMangaWebCrawler : MangaWebCrawler
|
||||||
{
|
{
|
||||||
public string SourceId => "NatoManga";
|
public override string SourceId => "NatoManga";
|
||||||
|
|
||||||
public override SourceManga GetManga(string url)
|
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
|
||||||
{
|
{
|
||||||
HtmlDocument document = GetHtmlDocument(url);
|
HtmlDocument document = await GetHtmlDocumentAsync(url, cancellationToken);
|
||||||
NatoMangaHtmlDocument node = new(document);
|
NatoMangaHtmlDocument node = new(document);
|
||||||
|
|
||||||
SourceManga manga = new()
|
SourceManga manga = new()
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ using MangaReader.Core.Sources.NatoManga.Api;
|
|||||||
|
|
||||||
namespace MangaReader.Core.Sources.NatoManga.Search;
|
namespace MangaReader.Core.Sources.NatoManga.Search;
|
||||||
|
|
||||||
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider, IMangaSourceComponent
|
public partial class NatoMangaSearchProvider(INatoMangaClient natoMangaClient) : IMangaSearchProvider
|
||||||
{
|
{
|
||||||
public string SourceId => "NatoManga";
|
public string SourceId => "NatoManga";
|
||||||
|
|
||||||
|
|||||||
@@ -1,17 +1,31 @@
|
|||||||
using MangaReader.Core.Sources.NatoManga.Metadata;
|
using HtmlAgilityPack;
|
||||||
|
using MangaReader.Core.Sources.NatoManga.Metadata;
|
||||||
using Shouldly;
|
using Shouldly;
|
||||||
|
|
||||||
namespace MangaReader.Tests.WebCrawlers.NatoManga;
|
namespace MangaReader.Tests.WebCrawlers.NatoManga;
|
||||||
|
|
||||||
public class NatoMangaWebCrawlerTests
|
public class NatoMangaWebCrawlerTests
|
||||||
{
|
{
|
||||||
|
class TestNatoMangaWebCrawler : NatoMangaWebCrawler
|
||||||
|
{
|
||||||
|
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
HtmlWeb web = new()
|
||||||
|
{
|
||||||
|
UsingCacheIfExists = false
|
||||||
|
};
|
||||||
|
|
||||||
|
return Task.FromResult(web.Load(url));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void Get_Manga()
|
public async Task Get_Manga()
|
||||||
{
|
{
|
||||||
string sampleFilePath = Path.Combine(AppContext.BaseDirectory, "WebCrawlers", "NatoManga", "SampleMangaPage.html");
|
string sampleFilePath = Path.Combine(AppContext.BaseDirectory, "WebCrawlers", "NatoManga", "SampleMangaPage.html");
|
||||||
|
|
||||||
var webCrawler = new NatoMangaWebCrawler();
|
var webCrawler = new TestNatoMangaWebCrawler();
|
||||||
var manga = webCrawler.GetManga(sampleFilePath);
|
var manga = await webCrawler.GetMangaAsync(sampleFilePath, CancellationToken.None);
|
||||||
|
|
||||||
manga.ShouldNotBeNull();
|
manga.ShouldNotBeNull();
|
||||||
|
|
||||||
|
|||||||
@@ -1,4 +1,5 @@
|
|||||||
using MangaReader.Core.Metadata;
|
using HtmlAgilityPack;
|
||||||
|
using MangaReader.Core.Metadata;
|
||||||
using MangaReader.Core.Sources.MangaNato.Metadata;
|
using MangaReader.Core.Sources.MangaNato.Metadata;
|
||||||
using Shouldly;
|
using Shouldly;
|
||||||
|
|
||||||
@@ -6,6 +7,19 @@ namespace MangaReader.Tests.WebCrawlers;
|
|||||||
|
|
||||||
public class UnitTest1
|
public class UnitTest1
|
||||||
{
|
{
|
||||||
|
class TestMangaNatoWebCrawler : MangaNatoWebCrawler
|
||||||
|
{
|
||||||
|
protected override Task<HtmlDocument> GetHtmlDocumentAsync(string url, CancellationToken cancellationToken)
|
||||||
|
{
|
||||||
|
HtmlWeb web = new()
|
||||||
|
{
|
||||||
|
UsingCacheIfExists = false
|
||||||
|
};
|
||||||
|
|
||||||
|
return Task.FromResult(web.Load(url));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private readonly string samplesPath;
|
private readonly string samplesPath;
|
||||||
private readonly string mangaNatoSampleFilePath;
|
private readonly string mangaNatoSampleFilePath;
|
||||||
|
|
||||||
@@ -16,10 +30,10 @@ public class UnitTest1
|
|||||||
}
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void Get_Manga()
|
public async Task Get_Manga()
|
||||||
{
|
{
|
||||||
var webCrawler = new MangaNatoWebCrawler();
|
var webCrawler = new TestMangaNatoWebCrawler();
|
||||||
var manga = webCrawler.GetManga(mangaNatoSampleFilePath);
|
var manga = await webCrawler.GetMangaAsync(mangaNatoSampleFilePath, CancellationToken.None);
|
||||||
|
|
||||||
manga.ShouldNotBeNull();
|
manga.ShouldNotBeNull();
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user