using HtmlAgilityPack;
using System.Text;
using System.Web;
namespace MangaReader.Core.WebCrawlers.NatoManga;
public class NatoMangaWebCrawler : MangaWebCrawler
{
public override SourceManga GetManga(string url)
{
HtmlDocument document = GetHtmlDocument(url);
NatoMangaHtmlDocument node = new(document);
SourceManga manga = new()
{
Title = node.TitleNode?.InnerText ?? string.Empty,
//AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
//Authors = GetAuthors(node.AuthorsNode),
//Status = GetStatus(node.StatusNode),
Genres = GetGenres(node.GenresNode),
//UpdateDate = GetUpdateDate(node.UpdateDateNode),
//RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
//Votes = int.Parse(node.VotesNode.InnerText),
//Views = GetViews(node.ViewsNode),
//Description = GetTextFromNodes(node.StoryDescriptionTextNodes),
Chapters = GetChapters(node.ChapterNodes)
};
return manga;
}
private static List GetAlternateTitles(HtmlNode node)
{
return node.InnerText.Split(';').Select(x => x.Trim()).ToList();
}
private static List GetAuthors(HtmlNode node)
{
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
}
private static MangaStatus GetStatus(HtmlNode node)
{
return node.InnerText switch
{
"Ongoing" => MangaStatus.Ongoing,
"Completed" => MangaStatus.Complete,
_ => MangaStatus.Unknown,
};
}
private static List GetGenres(HtmlNode? node)
{
if (node == null)
return [];
HtmlNodeCollection genreNodes = node.SelectNodes(".//a");
if (genreNodes == null)
return [];
return [.. genreNodes.Select(genreNode => genreNode.InnerText.Trim())];
}
private static DateTime GetUpdateDate(HtmlNode node)
{
List dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList();
DateOnly date = DateOnly.Parse(dateAndTime[0]);
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
return date.ToDateTime(time);
}
private static long GetViews(HtmlNode node)
{
string text = node.InnerText.Trim();
if (int.TryParse(text, out int number))
return number;
if (double.TryParse(text, out double doubleNumber))
return (int)doubleNumber;
ReadOnlySpan shortText = text.AsSpan(0, text.Length - 1);
if (double.TryParse(shortText, out double formattedNumber) == false)
return 0;
char suffix = text[^1];
//if (char.GetNumericValue(suffix) > -1)
// return (int)formattedNumber;
long multiplier = GetMultiplier(suffix);
return (int)(formattedNumber * multiplier);
}
private static long GetMultiplier(char c)
{
return c switch
{
'K' => 1_000,
'M' => 1_000_000,
'B' => 1_000_000_000,
'T' => 1_000_000_000_000,
_ => 0,
};
}
private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode)
{
double average = Convert.ToDouble(averageNode.InnerText);
double best = Convert.ToDouble(bestNode.InnerText);
return (int)Math.Round(average / best * 100);
}
private static List GetChapters(HtmlNodeCollection? chapterNodes)
{
List chapters = [];
if (chapterNodes == null)
return chapters;
foreach (var node in chapterNodes)
{
HtmlNodeCollection? chapterPropertyNodes = node.SelectNodes(".//span");
if (chapterPropertyNodes == null || chapterPropertyNodes.Count < 3)
continue;
HtmlNode? chapterNameNode = chapterPropertyNodes[0].SelectSingleNode(".//a");
HtmlNode chapterViewNode = chapterPropertyNodes[1];
HtmlNode chapterTimeNode = chapterPropertyNodes[2];
if (chapterNameNode == null)
continue;
SourceMangaChapter chapter = new()
{
Number = GetChapterNumber(chapterNameNode),
Name = chapterNameNode.InnerText,
Url = chapterNameNode.Attributes["href"].Value,
Views = GetViews(chapterViewNode),
UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value)
};
chapters.Add(chapter);
}
return chapters;
}
private static float GetChapterNumber(HtmlNode chapterNameNode)
{
string url = chapterNameNode.Attributes["href"].Value;
int index = url.IndexOf("/chapter-");
if (index == -1)
return 0;
string chapterNumber = url[(index + "/chapter-".Length)..].Replace('-', '.');
return float.Parse(chapterNumber);
}
private static string GetTextFromNodes(List nodes)
{
StringBuilder stringBuilder = new();
foreach (HtmlNode node in nodes)
{
if (node.Name == "br")
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
}
}
return stringBuilder.ToString();
}
}