using HtmlAgilityPack;
using System.Text;
using System.Web;
namespace MangaReader.Core.WebCrawlers.MangaNato;
public class MangaNatoWebCrawler : MangaWebCrawler
{
public override SourceManga GetManga(string url)
{
HtmlDocument document = GetHtmlDocument(url);
MangaNatoMangaDocument node = new(document);
SourceManga manga = new()
{
Title = node.TitleNode.InnerText,
AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
Authors = GetAuthors(node.AuthorsNode),
Status = GetStatus(node.StatusNode),
Genres = GetGenres(node.GenresNode),
UpdateDate = GetUpdateDate(node.UpdateDateNode),
RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
Votes = int.Parse(node.VotesNode.InnerText),
Views = GetViews(node.ViewsNode),
Description = GetTextFromNodes(node.StoryDescriptionTextNodes),
Chapters = GetChapters(node.ChapterNodes)
};
return manga;
}
private static List GetAlternateTitles(HtmlNode node)
{
return node.InnerText.Split(';').Select(x => x.Trim()).ToList();
}
private static List GetAuthors(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static MangaStatus GetStatus(HtmlNode node)
{
return node.InnerText switch
{
"Ongoing" => MangaStatus.Ongoing,
"Completed" => MangaStatus.Complete,
_ => MangaStatus.Unknown,
};
}
private static List GetGenres(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static DateTime GetUpdateDate(HtmlNode node)
{
List dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList();
DateOnly date = DateOnly.Parse(dateAndTime[0]);
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
return date.ToDateTime(time);
}
private static long GetViews(HtmlNode node)
{
string text = node.InnerText;
if (int.TryParse(text, out int number))
return number;
ReadOnlySpan shortText = text.AsSpan(0, text.Length - 1);
if (double.TryParse(shortText, out double formattedNumber) == false)
return 0;
char suffix = text[^1];
long multiplier = GetMultiplier(suffix);
return (int)(formattedNumber * multiplier);
}
private static long GetMultiplier(char c)
{
return c switch
{
'K' => 1_000,
'M' => 1_000_000,
'B' => 1_000_000_000,
'T' => 1_000_000_000_000,
_ => 0,
};
}
private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode)
{
double average = Convert.ToDouble(averageNode.InnerText);
double best = Convert.ToDouble(bestNode.InnerText);
return (int)Math.Round(average / best * 100);
}
private static List GetChapters(HtmlNodeCollection chapterNodes)
{
List chapters = [];
foreach (var node in chapterNodes)
{
HtmlNode chapterNameNode = node.SelectSingleNode(".//a[contains(@class, 'chapter-name')]");
HtmlNode chapterViewNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-view')]");
HtmlNode chapterTimeNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-time')]");
SourceMangaChapter chapter = new()
{
Number = GetChapterNumber(chapterNameNode),
Name = chapterNameNode.InnerText,
Url = chapterNameNode.Attributes["href"].Value,
Views = GetViews(chapterViewNode),
UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value)
};
chapters.Add(chapter);
}
return chapters;
}
private static float GetChapterNumber(HtmlNode chapterNameNode)
{
string url = chapterNameNode.Attributes["href"].Value;
int index = url.IndexOf("/chapter-");
if (index == -1)
return 0;
string chapterNumber = url[(index + "/chapter-".Length)..];
return float.Parse(chapterNumber);
}
private static string GetTextFromNodes(List nodes)
{
StringBuilder stringBuilder = new();
foreach (HtmlNode node in nodes)
{
if (node.Name == "br")
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
}
}
return stringBuilder.ToString();
}
}