Files
manga-reader/MangaReader.Core/Sources/MangaNato/Metadata/MangaNatoWebCrawler.cs
2025-06-25 10:40:03 -04:00

227 lines
6.6 KiB
C#

using HtmlAgilityPack;
using MangaReader.Core.Common;
using MangaReader.Core.Http;
using MangaReader.Core.Metadata;
using System.Text;
using System.Web;
namespace MangaReader.Core.Sources.MangaNato.Metadata;
public class MangaNatoWebCrawler(IHtmlLoader htmlLoader) : MangaWebCrawler
{
public override string SourceId => "MangaNato";
public override async Task<SourceManga?> GetMangaAsync(string url, CancellationToken cancellationToken)
{
HtmlDocument document = await htmlLoader.GetHtmlDocumentAsync(url, cancellationToken);
MangaNatoMangaDocument node = new(document);
SourceManga manga = new()
{
Title = new()
{
Name = node.TitleNode?.InnerText ?? string.Empty,
Language = Language.Unknown
},
AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
Contributors = GetContributors(node.AuthorsNode),
Status = GetStatus(node.StatusNode),
Genres = GetGenres(node.GenresNode),
UpdateDate = GetUpdateDate(node.UpdateDateNode),
RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
Votes = node.VotesNode != null ? int.Parse(node.VotesNode.InnerText) : 0,
Views = GetViews(node.ViewsNode),
Description = new()
{
Name = GetTextFromNodes(node.StoryDescriptionTextNodes),
Language = Language.Unknown
},
Chapters = GetChapters(node.ChapterNodes)
};
return manga;
}
private static List<SourceMangaTitle> GetAlternateTitles(HtmlNode? node)
{
if (node == null)
return [];
List<SourceMangaTitle> sourceMangaTitles = [];
string[] titles = [.. node.InnerText.Split(';').Select(x => x.Trim())];
foreach (string title in titles)
{
SourceMangaTitle sourceMangaTitle = new()
{
Name = title,
Language = Language.Unknown
};
sourceMangaTitles.Add(sourceMangaTitle);
}
return [.. sourceMangaTitles];
}
private static SourceMangaContributor[] GetContributors(HtmlNode? node)
{
if (node == null)
return [];
List<SourceMangaContributor> contributors = [];
string[] names = [.. node.InnerText.Split('-').Select(x => x.Trim())];
foreach (string name in names)
{
SourceMangaContributor contributor = new()
{
Name = name,
Role = ContributorRole.Author
};
contributors.Add(contributor);
}
return [.. contributors];
}
private static MangaStatus GetStatus(HtmlNode? node)
{
return node?.InnerText switch
{
"Ongoing" => MangaStatus.Ongoing,
"Completed" => MangaStatus.Complete,
_ => MangaStatus.Unknown,
};
}
private static List<string> GetGenres(HtmlNode? node)
{
if (node == null)
return [];
return [.. node.InnerText.Split('-').Select(x => x.Trim())];
}
private static DateTime? GetUpdateDate(HtmlNode? node)
{
if (node == null)
return null;
List<string> dateAndTime = [.. node.InnerText.Split('-').Select(x => x.Trim())];
DateOnly date = DateOnly.Parse(dateAndTime[0]);
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
return date.ToDateTime(time);
}
private static long GetViews(HtmlNode? node)
{
if (node == null)
return 0;
string text = node.InnerText;
if (int.TryParse(text, out int number))
return number;
ReadOnlySpan<char> shortText = text.AsSpan(0, text.Length - 1);
if (double.TryParse(shortText, out double formattedNumber) == false)
return 0;
char suffix = text[^1];
long multiplier = GetMultiplier(suffix);
return (int)(formattedNumber * multiplier);
}
private static long GetMultiplier(char c)
{
return c switch
{
'K' => 1_000,
'M' => 1_000_000,
'B' => 1_000_000_000,
'T' => 1_000_000_000_000,
_ => 0,
};
}
private static int GetRatingPercent(HtmlNode? averageNode, HtmlNode? bestNode)
{
if (averageNode == null || bestNode == null)
return 0;
double average = Convert.ToDouble(averageNode.InnerText);
double best = Convert.ToDouble(bestNode.InnerText);
return (int)Math.Round(average / best * 100);
}
private static List<SourceMangaChapter> GetChapters(HtmlNodeCollection? chapterNodes)
{
List<SourceMangaChapter> chapters = [];
if (chapterNodes == null)
return chapters;
foreach (var node in chapterNodes)
{
HtmlNode? chapterNameNode = node.SelectSingleNode(".//a[contains(@class, 'chapter-name')]");
HtmlNode? chapterViewNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-view')]");
HtmlNode? chapterTimeNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-time')]");
SourceMangaChapter chapter = new()
{
Number = GetChapterNumber(chapterNameNode),
Title = chapterNameNode?.InnerText ?? string.Empty,
Url = chapterNameNode?.Attributes["href"].Value ?? string.Empty,
Views = GetViews(chapterViewNode),
UploadDate = chapterTimeNode != null ? DateTime.Parse(chapterTimeNode.Attributes["title"].Value) : null
};
chapters.Add(chapter);
}
return chapters;
}
private static float GetChapterNumber(HtmlNode? chapterNameNode)
{
if (chapterNameNode == null)
return 0;
string url = chapterNameNode.Attributes["href"].Value;
int index = url.IndexOf("/chapter-");
if (index == -1)
return 0;
string chapterNumber = url[(index + "/chapter-".Length)..];
return float.Parse(chapterNumber);
}
private static string GetTextFromNodes(List<HtmlNode> nodes)
{
StringBuilder stringBuilder = new();
foreach (HtmlNode node in nodes)
{
if (node.Name == "br")
{
stringBuilder.AppendLine();
}
else
{
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
}
}
return stringBuilder.ToString();
}
}