Add project files.

This commit is contained in:
2025-05-21 19:39:09 -04:00
parent 7d2b71fe95
commit ec1713c95f
27 changed files with 5843 additions and 0 deletions

View File

@@ -0,0 +1,6 @@
namespace MangaReader.Core.WebCrawlers;
public interface IMangaWebCrawler
{
MangaDTO GetManga(string url);
}

View File

@@ -0,0 +1,11 @@
namespace MangaReader.Core.WebCrawlers;
public class MangaChapterDTO
{
public int? Volume { get; set; }
public float? Number { get; set; }
public string? Name { get; set; }
public required string Url { get; set; }
public long? Views { get; set; }
public DateTime? UploadDate { get; set; }
}

View File

@@ -0,0 +1,16 @@
namespace MangaReader.Core.WebCrawlers;
public class MangaDTO
{
public required string Title { get; set; }
public string? Description { get; set; }
public List<string> AlternateTitles { get; set; } = [];
public List<string> Authors { get; set; } = [];
public MangaStatus Status { get; set; } = MangaStatus.Unknown;
public List<string> Genres { get; set; } = [];
public DateTime? UpdateDate { get; set; }
public long? Views { get; set; }
public float? RatingPercent { get; set; }
public int? Votes { get; set; }
public List<MangaChapterDTO> Chapters { get; set; } = [];
}

View File

@@ -0,0 +1,61 @@
using HtmlAgilityPack;
namespace MangaReader.Core.WebCrawlers.MangaNato;
public class MangaNatoMangaDocument
{
public HtmlNode StoryInfoNode { get; }
public HtmlNode TitleNode { get; }
public HtmlNode StoryInfoRightNode { get; }
public HtmlNode VariationsTableInfo { get; }
public HtmlNodeCollection VariationsTableValueNodes { get; }
public HtmlNode AlternateTitlesNode { get; }
public HtmlNode AuthorsNode { get; }
public HtmlNode StatusNode { get; }
public HtmlNode GenresNode { get; }
public HtmlNode StoryInfoRightExtentNode { get; }
public HtmlNodeCollection StoryInfoRightExtentValueNodes { get; }
public HtmlNode UpdateDateNode { get; }
public HtmlNode ViewsNode { get; }
public HtmlNode ReviewAggregateNode { get; }
public HtmlNode RatingNode { get; }
public HtmlNode AverageRatingNode { get; }
public HtmlNode BestRatingNode { get; }
public HtmlNode VotesNode { get; set; }
public HtmlNode StoryDescriptionNode { get; }
public List<HtmlNode> StoryDescriptionTextNodes { get; }
public HtmlNode StoryChapterListNode { get; }
public HtmlNodeCollection ChapterNodes { get; }
public MangaNatoMangaDocument(HtmlDocument document)
{
StoryInfoNode = document.DocumentNode.SelectSingleNode(".//div[@class='panel-story-info']");
TitleNode = StoryInfoNode.SelectSingleNode(".//h1");
StoryDescriptionNode = StoryInfoNode.SelectSingleNode(".//div[@class='panel-story-info-description']");
StoryDescriptionTextNodes = StoryDescriptionNode.ChildNodes.Skip(2).Take(StoryDescriptionNode.ChildNodes.Count - 2).ToList();
StoryInfoRightNode = StoryInfoNode.SelectSingleNode(".//div[@class='story-info-right']");
VariationsTableInfo = StoryInfoRightNode.SelectSingleNode(".//table[@class='variations-tableInfo']");
VariationsTableValueNodes = VariationsTableInfo.SelectNodes(".//td[@class='table-value']");
AlternateTitlesNode = VariationsTableValueNodes[0];
AuthorsNode = VariationsTableValueNodes[1];
StatusNode = VariationsTableValueNodes[2];
GenresNode = VariationsTableValueNodes[3];
StoryInfoRightExtentNode = StoryInfoRightNode.SelectSingleNode(".//div[@class='story-info-right-extent']");
StoryInfoRightExtentValueNodes = StoryInfoRightExtentNode.SelectNodes(".//span[@class='stre-value']");
UpdateDateNode = StoryInfoRightExtentValueNodes[0];
ViewsNode = StoryInfoRightExtentValueNodes[1];
// v:Review-aggregate
ReviewAggregateNode = StoryInfoRightNode.SelectSingleNode(".//em[@typeof='v:Review-aggregate']");
RatingNode = ReviewAggregateNode.SelectSingleNode(".//em[@typeof='v:Rating']");
AverageRatingNode = RatingNode.SelectSingleNode(".//em[@property='v:average']");
BestRatingNode = RatingNode.SelectSingleNode(".//em[@property='v:best']");
VotesNode = ReviewAggregateNode.SelectSingleNode(".//em[@property='v:votes']");
StoryChapterListNode = document.DocumentNode.SelectSingleNode(".//div[@class='panel-story-chapter-list']");
ChapterNodes = StoryChapterListNode.SelectNodes(".//li[@class='a-h']");
}
}

View File

@@ -0,0 +1,160 @@
using HtmlAgilityPack;
using System.Text;
using System.Web;
namespace MangaReader.Core.WebCrawlers.MangaNato;
public class MangaNatoWebCrawler : MangaWebCrawler
{
public override MangaDTO GetManga(string url)
{
HtmlDocument document = GetHtmlDocument(url);
MangaNatoMangaDocument node = new(document);
MangaDTO manga = new()
{
Title = node.TitleNode.InnerText,
AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
Authors = GetAuthors(node.AuthorsNode),
Status = GetStatus(node.StatusNode),
Genres = GetGenres(node.GenresNode),
UpdateDate = GetUpdateDate(node.UpdateDateNode),
RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
Votes = int.Parse(node.VotesNode.InnerText),
Views = GetViews(node.ViewsNode),
Description = GetTextFromNodes(node.StoryDescriptionTextNodes),
Chapters = GetChapters(node.ChapterNodes)
};
return manga;
}
private static List<string> GetAlternateTitles(HtmlNode node)
{
return node.InnerText.Split(';').Select(x => x.Trim()).ToList();
}
private static List<string> GetAuthors(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static MangaStatus GetStatus(HtmlNode node)
{
return node.InnerText switch
{
"Ongoing" => MangaStatus.Ongoing,
"Completed" => MangaStatus.Complete,
_ => MangaStatus.Unknown,
};
}
private static List<string> GetGenres(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static DateTime GetUpdateDate(HtmlNode node)
{
List<string> dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList();
DateOnly date = DateOnly.Parse(dateAndTime[0]);
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
return date.ToDateTime(time);
}
private static long GetViews(HtmlNode node)
{
string text = node.InnerText;
if (int.TryParse(text, out int number))
return number;
ReadOnlySpan<char> shortText = text.AsSpan(0, text.Length - 1);
if (double.TryParse(shortText, out double formattedNumber) == false)
return 0;
char suffix = text[^1];
long multiplier = GetMultiplier(suffix);
return (int)(formattedNumber * multiplier);
}
private static long GetMultiplier(char c)
{
return c switch
{
'K' => 1_000,
'M' => 1_000_000,
'B' => 1_000_000_000,
'T' => 1_000_000_000_000,
_ => 0,
};
}
private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode)
{
double average = Convert.ToDouble(averageNode.InnerText);
double best = Convert.ToDouble(bestNode.InnerText);
return (int)Math.Round(average / best * 100);
}
private static List<MangaChapterDTO> GetChapters(HtmlNodeCollection chapterNodes)
{
List<MangaChapterDTO> chapters = [];
foreach (var node in chapterNodes)
{
HtmlNode chapterNameNode = node.SelectSingleNode(".//a[contains(@class, 'chapter-name')]");
HtmlNode chapterViewNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-view')]");
HtmlNode chapterTimeNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-time')]");
MangaChapterDTO chapter = new()
{
Number = GetChapterNumber(chapterNameNode),
Name = chapterNameNode.InnerText,
Url = chapterNameNode.Attributes["href"].Value,
Views = GetViews(chapterViewNode),
UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value)
};
chapters.Add(chapter);
}
return chapters;
}
private static float GetChapterNumber(HtmlNode chapterNameNode)
{
string url = chapterNameNode.Attributes["href"].Value;
int index = url.IndexOf("/chapter-");
if (index == -1)
return 0;
string chapterNumber = url[(index + "/chapter-".Length)..];
return float.Parse(chapterNumber);
}
private static string GetTextFromNodes(List<HtmlNode> nodes)
{
StringBuilder stringBuilder = new();
foreach (HtmlNode node in nodes)
{
if (node.Name == "br")
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
}
}
return stringBuilder.ToString();
}
}

View File

@@ -0,0 +1,8 @@
namespace MangaReader.Core.WebCrawlers;
public enum MangaStatus
{
Unknown,
Ongoing,
Complete
}

View File

@@ -0,0 +1,18 @@
using HtmlAgilityPack;
namespace MangaReader.Core.WebCrawlers;
public abstract class MangaWebCrawler : IMangaWebCrawler
{
public abstract MangaDTO GetManga(string url);
protected virtual HtmlDocument GetHtmlDocument(string url)
{
HtmlWeb web = new()
{
UsingCacheIfExists = false
};
return web.Load(url);
}
}

View File

@@ -0,0 +1,21 @@
using HtmlAgilityPack;
namespace MangaReader.Core.WebCrawlers.NatoManga;
public class NatoMangaHtmlDocument
{
public HtmlNode? MangaInfoTextNode { get; }
public HtmlNode? TitleNode { get; }
public HtmlNode? GenresNode { get; }
public HtmlNode? ChapterListNode { get; }
public HtmlNodeCollection? ChapterNodes { get; }
public NatoMangaHtmlDocument(HtmlDocument document)
{
MangaInfoTextNode = document.DocumentNode.SelectSingleNode(".//ul[@class='manga-info-text']");
TitleNode = MangaInfoTextNode?.SelectSingleNode(".//li//h1");
GenresNode = MangaInfoTextNode?.SelectSingleNode(".//li[@class='genres']");
ChapterListNode = document.DocumentNode.SelectSingleNode(".//div[@class='chapter-list']");
ChapterNodes = ChapterListNode?.SelectNodes(".//div[@class='row']");
}
}

View File

@@ -0,0 +1,186 @@
using HtmlAgilityPack;
using System.Text;
using System.Web;
namespace MangaReader.Core.WebCrawlers.NatoManga;
public class NatoMangaWebCrawler : MangaWebCrawler
{
public override MangaDTO GetManga(string url)
{
HtmlDocument document = GetHtmlDocument(url);
NatoMangaHtmlDocument node = new(document);
MangaDTO manga = new()
{
Title = node.TitleNode?.InnerText ?? string.Empty,
//AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
//Authors = GetAuthors(node.AuthorsNode),
//Status = GetStatus(node.StatusNode),
Genres = GetGenres(node.GenresNode),
//UpdateDate = GetUpdateDate(node.UpdateDateNode),
//RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
//Votes = int.Parse(node.VotesNode.InnerText),
//Views = GetViews(node.ViewsNode),
//Description = GetTextFromNodes(node.StoryDescriptionTextNodes),
Chapters = GetChapters(node.ChapterNodes)
};
return manga;
}
private static List<string> GetAlternateTitles(HtmlNode node)
{
return node.InnerText.Split(';').Select(x => x.Trim()).ToList();
}
private static List<string> GetAuthors(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static MangaStatus GetStatus(HtmlNode node)
{
return node.InnerText switch
{
"Ongoing" => MangaStatus.Ongoing,
"Completed" => MangaStatus.Complete,
_ => MangaStatus.Unknown,
};
}
private static List<string> GetGenres(HtmlNode? node)
{
if (node == null)
return [];
HtmlNodeCollection genreNodes = node.SelectNodes(".//a");
if (genreNodes == null)
return [];
return [.. genreNodes.Select(genreNode => genreNode.InnerText.Trim())];
}
private static DateTime GetUpdateDate(HtmlNode node)
{
List<string> dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList();
DateOnly date = DateOnly.Parse(dateAndTime[0]);
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
return date.ToDateTime(time);
}
private static long GetViews(HtmlNode node)
{
string text = node.InnerText.Trim();
if (int.TryParse(text, out int number))
return number;
if (double.TryParse(text, out double doubleNumber))
return (int)doubleNumber;
ReadOnlySpan<char> shortText = text.AsSpan(0, text.Length - 1);
if (double.TryParse(shortText, out double formattedNumber) == false)
return 0;
char suffix = text[^1];
//if (char.GetNumericValue(suffix) > -1)
// return (int)formattedNumber;
long multiplier = GetMultiplier(suffix);
return (int)(formattedNumber * multiplier);
}
private static long GetMultiplier(char c)
{
return c switch
{
'K' => 1_000,
'M' => 1_000_000,
'B' => 1_000_000_000,
'T' => 1_000_000_000_000,
_ => 0,
};
}
private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode)
{
double average = Convert.ToDouble(averageNode.InnerText);
double best = Convert.ToDouble(bestNode.InnerText);
return (int)Math.Round(average / best * 100);
}
private static List<MangaChapterDTO> GetChapters(HtmlNodeCollection? chapterNodes)
{
List<MangaChapterDTO> chapters = [];
if (chapterNodes == null)
return chapters;
foreach (var node in chapterNodes)
{
HtmlNodeCollection? chapterPropertyNodes = node.SelectNodes(".//span");
if (chapterPropertyNodes == null || chapterPropertyNodes.Count < 3)
continue;
HtmlNode? chapterNameNode = chapterPropertyNodes[0].SelectSingleNode(".//a");
HtmlNode chapterViewNode = chapterPropertyNodes[1];
HtmlNode chapterTimeNode = chapterPropertyNodes[2];
if (chapterNameNode == null)
continue;
MangaChapterDTO chapter = new()
{
Number = GetChapterNumber(chapterNameNode),
Name = chapterNameNode.InnerText,
Url = chapterNameNode.Attributes["href"].Value,
Views = GetViews(chapterViewNode),
UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value)
};
chapters.Add(chapter);
}
return chapters;
}
private static float GetChapterNumber(HtmlNode chapterNameNode)
{
string url = chapterNameNode.Attributes["href"].Value;
int index = url.IndexOf("/chapter-");
if (index == -1)
return 0;
string chapterNumber = url[(index + "/chapter-".Length)..].Replace('-', '.');
return float.Parse(chapterNumber);
}
private static string GetTextFromNodes(List<HtmlNode> nodes)
{
StringBuilder stringBuilder = new();
foreach (HtmlNode node in nodes)
{
if (node.Name == "br")
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
}
}
return stringBuilder.ToString();
}
}