using HtmlAgilityPack; using System.Text; using System.Web; namespace MangaReader.Core.WebCrawlers.MangaNato; public class MangaNatoWebCrawler : MangaWebCrawler { public override MangaDTO GetManga(string url) { HtmlDocument document = GetHtmlDocument(url); MangaNatoMangaDocument node = new(document); MangaDTO manga = new() { Title = node.TitleNode.InnerText, AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode), Authors = GetAuthors(node.AuthorsNode), Status = GetStatus(node.StatusNode), Genres = GetGenres(node.GenresNode), UpdateDate = GetUpdateDate(node.UpdateDateNode), RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode), Votes = int.Parse(node.VotesNode.InnerText), Views = GetViews(node.ViewsNode), Description = GetTextFromNodes(node.StoryDescriptionTextNodes), Chapters = GetChapters(node.ChapterNodes) }; return manga; } private static List GetAlternateTitles(HtmlNode node) { return node.InnerText.Split(';').Select(x => x.Trim()).ToList(); } private static List GetAuthors(HtmlNode node) { return node.InnerText.Split('-').Select(x => x.Trim()).ToList(); } private static MangaStatus GetStatus(HtmlNode node) { return node.InnerText switch { "Ongoing" => MangaStatus.Ongoing, "Completed" => MangaStatus.Complete, _ => MangaStatus.Unknown, }; } private static List GetGenres(HtmlNode node) { return node.InnerText.Split('-').Select(x => x.Trim()).ToList(); } private static DateTime GetUpdateDate(HtmlNode node) { List dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList(); DateOnly date = DateOnly.Parse(dateAndTime[0]); TimeOnly time = TimeOnly.Parse(dateAndTime[1]); return date.ToDateTime(time); } private static long GetViews(HtmlNode node) { string text = node.InnerText; if (int.TryParse(text, out int number)) return number; ReadOnlySpan shortText = text.AsSpan(0, text.Length - 1); if (double.TryParse(shortText, out double formattedNumber) == false) return 0; char suffix = text[^1]; long multiplier = GetMultiplier(suffix); return (int)(formattedNumber * multiplier); } private static long GetMultiplier(char c) { return c switch { 'K' => 1_000, 'M' => 1_000_000, 'B' => 1_000_000_000, 'T' => 1_000_000_000_000, _ => 0, }; } private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode) { double average = Convert.ToDouble(averageNode.InnerText); double best = Convert.ToDouble(bestNode.InnerText); return (int)Math.Round(average / best * 100); } private static List GetChapters(HtmlNodeCollection chapterNodes) { List chapters = []; foreach (var node in chapterNodes) { HtmlNode chapterNameNode = node.SelectSingleNode(".//a[contains(@class, 'chapter-name')]"); HtmlNode chapterViewNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-view')]"); HtmlNode chapterTimeNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-time')]"); MangaChapterDTO chapter = new() { Number = GetChapterNumber(chapterNameNode), Name = chapterNameNode.InnerText, Url = chapterNameNode.Attributes["href"].Value, Views = GetViews(chapterViewNode), UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value) }; chapters.Add(chapter); } return chapters; } private static float GetChapterNumber(HtmlNode chapterNameNode) { string url = chapterNameNode.Attributes["href"].Value; int index = url.IndexOf("/chapter-"); if (index == -1) return 0; string chapterNumber = url[(index + "/chapter-".Length)..]; return float.Parse(chapterNumber); } private static string GetTextFromNodes(List nodes) { StringBuilder stringBuilder = new(); foreach (HtmlNode node in nodes) { if (node.Name == "br") { stringBuilder.AppendLine(); } else { stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim()); } } return stringBuilder.ToString(); } }