Add project files.
This commit is contained in:
@@ -0,0 +1,61 @@
|
||||
using HtmlAgilityPack;
|
||||
|
||||
namespace MangaReader.Core.WebCrawlers.MangaNato;
|
||||
|
||||
public class MangaNatoMangaDocument
|
||||
{
|
||||
public HtmlNode StoryInfoNode { get; }
|
||||
public HtmlNode TitleNode { get; }
|
||||
public HtmlNode StoryInfoRightNode { get; }
|
||||
public HtmlNode VariationsTableInfo { get; }
|
||||
public HtmlNodeCollection VariationsTableValueNodes { get; }
|
||||
public HtmlNode AlternateTitlesNode { get; }
|
||||
public HtmlNode AuthorsNode { get; }
|
||||
public HtmlNode StatusNode { get; }
|
||||
public HtmlNode GenresNode { get; }
|
||||
public HtmlNode StoryInfoRightExtentNode { get; }
|
||||
public HtmlNodeCollection StoryInfoRightExtentValueNodes { get; }
|
||||
public HtmlNode UpdateDateNode { get; }
|
||||
public HtmlNode ViewsNode { get; }
|
||||
public HtmlNode ReviewAggregateNode { get; }
|
||||
public HtmlNode RatingNode { get; }
|
||||
public HtmlNode AverageRatingNode { get; }
|
||||
public HtmlNode BestRatingNode { get; }
|
||||
public HtmlNode VotesNode { get; set; }
|
||||
public HtmlNode StoryDescriptionNode { get; }
|
||||
public List<HtmlNode> StoryDescriptionTextNodes { get; }
|
||||
public HtmlNode StoryChapterListNode { get; }
|
||||
public HtmlNodeCollection ChapterNodes { get; }
|
||||
|
||||
public MangaNatoMangaDocument(HtmlDocument document)
|
||||
{
|
||||
StoryInfoNode = document.DocumentNode.SelectSingleNode(".//div[@class='panel-story-info']");
|
||||
TitleNode = StoryInfoNode.SelectSingleNode(".//h1");
|
||||
StoryDescriptionNode = StoryInfoNode.SelectSingleNode(".//div[@class='panel-story-info-description']");
|
||||
StoryDescriptionTextNodes = StoryDescriptionNode.ChildNodes.Skip(2).Take(StoryDescriptionNode.ChildNodes.Count - 2).ToList();
|
||||
|
||||
StoryInfoRightNode = StoryInfoNode.SelectSingleNode(".//div[@class='story-info-right']");
|
||||
|
||||
VariationsTableInfo = StoryInfoRightNode.SelectSingleNode(".//table[@class='variations-tableInfo']");
|
||||
VariationsTableValueNodes = VariationsTableInfo.SelectNodes(".//td[@class='table-value']");
|
||||
AlternateTitlesNode = VariationsTableValueNodes[0];
|
||||
AuthorsNode = VariationsTableValueNodes[1];
|
||||
StatusNode = VariationsTableValueNodes[2];
|
||||
GenresNode = VariationsTableValueNodes[3];
|
||||
|
||||
StoryInfoRightExtentNode = StoryInfoRightNode.SelectSingleNode(".//div[@class='story-info-right-extent']");
|
||||
StoryInfoRightExtentValueNodes = StoryInfoRightExtentNode.SelectNodes(".//span[@class='stre-value']");
|
||||
UpdateDateNode = StoryInfoRightExtentValueNodes[0];
|
||||
ViewsNode = StoryInfoRightExtentValueNodes[1];
|
||||
|
||||
// v:Review-aggregate
|
||||
ReviewAggregateNode = StoryInfoRightNode.SelectSingleNode(".//em[@typeof='v:Review-aggregate']");
|
||||
RatingNode = ReviewAggregateNode.SelectSingleNode(".//em[@typeof='v:Rating']");
|
||||
AverageRatingNode = RatingNode.SelectSingleNode(".//em[@property='v:average']");
|
||||
BestRatingNode = RatingNode.SelectSingleNode(".//em[@property='v:best']");
|
||||
VotesNode = ReviewAggregateNode.SelectSingleNode(".//em[@property='v:votes']");
|
||||
|
||||
StoryChapterListNode = document.DocumentNode.SelectSingleNode(".//div[@class='panel-story-chapter-list']");
|
||||
ChapterNodes = StoryChapterListNode.SelectNodes(".//li[@class='a-h']");
|
||||
}
|
||||
}
|
||||
160
MangaReader.Core/WebCrawlers/MangaNato/MangaNatoWebCrawler.cs
Normal file
160
MangaReader.Core/WebCrawlers/MangaNato/MangaNatoWebCrawler.cs
Normal file
@@ -0,0 +1,160 @@
|
||||
using HtmlAgilityPack;
|
||||
using System.Text;
|
||||
using System.Web;
|
||||
|
||||
namespace MangaReader.Core.WebCrawlers.MangaNato;
|
||||
|
||||
public class MangaNatoWebCrawler : MangaWebCrawler
|
||||
{
|
||||
public override MangaDTO GetManga(string url)
|
||||
{
|
||||
HtmlDocument document = GetHtmlDocument(url);
|
||||
MangaNatoMangaDocument node = new(document);
|
||||
|
||||
MangaDTO manga = new()
|
||||
{
|
||||
Title = node.TitleNode.InnerText,
|
||||
AlternateTitles = GetAlternateTitles(node.AlternateTitlesNode),
|
||||
Authors = GetAuthors(node.AuthorsNode),
|
||||
Status = GetStatus(node.StatusNode),
|
||||
Genres = GetGenres(node.GenresNode),
|
||||
UpdateDate = GetUpdateDate(node.UpdateDateNode),
|
||||
RatingPercent = GetRatingPercent(node.AverageRatingNode, node.BestRatingNode),
|
||||
Votes = int.Parse(node.VotesNode.InnerText),
|
||||
Views = GetViews(node.ViewsNode),
|
||||
Description = GetTextFromNodes(node.StoryDescriptionTextNodes),
|
||||
Chapters = GetChapters(node.ChapterNodes)
|
||||
};
|
||||
|
||||
return manga;
|
||||
}
|
||||
|
||||
private static List<string> GetAlternateTitles(HtmlNode node)
|
||||
{
|
||||
return node.InnerText.Split(';').Select(x => x.Trim()).ToList();
|
||||
}
|
||||
|
||||
private static List<string> GetAuthors(HtmlNode node)
|
||||
{
|
||||
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
|
||||
}
|
||||
|
||||
private static MangaStatus GetStatus(HtmlNode node)
|
||||
{
|
||||
return node.InnerText switch
|
||||
{
|
||||
"Ongoing" => MangaStatus.Ongoing,
|
||||
"Completed" => MangaStatus.Complete,
|
||||
_ => MangaStatus.Unknown,
|
||||
};
|
||||
}
|
||||
|
||||
private static List<string> GetGenres(HtmlNode node)
|
||||
{
|
||||
return node.InnerText.Split('-').Select(x => x.Trim()).ToList();
|
||||
}
|
||||
|
||||
private static DateTime GetUpdateDate(HtmlNode node)
|
||||
{
|
||||
List<string> dateAndTime = node.InnerText.Split('-').Select(x => x.Trim()).ToList();
|
||||
DateOnly date = DateOnly.Parse(dateAndTime[0]);
|
||||
TimeOnly time = TimeOnly.Parse(dateAndTime[1]);
|
||||
|
||||
return date.ToDateTime(time);
|
||||
}
|
||||
|
||||
private static long GetViews(HtmlNode node)
|
||||
{
|
||||
string text = node.InnerText;
|
||||
|
||||
if (int.TryParse(text, out int number))
|
||||
return number;
|
||||
|
||||
ReadOnlySpan<char> shortText = text.AsSpan(0, text.Length - 1);
|
||||
|
||||
if (double.TryParse(shortText, out double formattedNumber) == false)
|
||||
return 0;
|
||||
|
||||
char suffix = text[^1];
|
||||
long multiplier = GetMultiplier(suffix);
|
||||
|
||||
return (int)(formattedNumber * multiplier);
|
||||
}
|
||||
|
||||
private static long GetMultiplier(char c)
|
||||
{
|
||||
return c switch
|
||||
{
|
||||
'K' => 1_000,
|
||||
'M' => 1_000_000,
|
||||
'B' => 1_000_000_000,
|
||||
'T' => 1_000_000_000_000,
|
||||
_ => 0,
|
||||
};
|
||||
}
|
||||
|
||||
private static int GetRatingPercent(HtmlNode averageNode, HtmlNode bestNode)
|
||||
{
|
||||
double average = Convert.ToDouble(averageNode.InnerText);
|
||||
double best = Convert.ToDouble(bestNode.InnerText);
|
||||
|
||||
return (int)Math.Round(average / best * 100);
|
||||
}
|
||||
|
||||
private static List<MangaChapterDTO> GetChapters(HtmlNodeCollection chapterNodes)
|
||||
{
|
||||
List<MangaChapterDTO> chapters = [];
|
||||
|
||||
foreach (var node in chapterNodes)
|
||||
{
|
||||
HtmlNode chapterNameNode = node.SelectSingleNode(".//a[contains(@class, 'chapter-name')]");
|
||||
HtmlNode chapterViewNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-view')]");
|
||||
HtmlNode chapterTimeNode = node.SelectSingleNode(".//span[contains(@class, 'chapter-time')]");
|
||||
|
||||
MangaChapterDTO chapter = new()
|
||||
{
|
||||
Number = GetChapterNumber(chapterNameNode),
|
||||
Name = chapterNameNode.InnerText,
|
||||
Url = chapterNameNode.Attributes["href"].Value,
|
||||
Views = GetViews(chapterViewNode),
|
||||
UploadDate = DateTime.Parse(chapterTimeNode.Attributes["title"].Value)
|
||||
};
|
||||
|
||||
chapters.Add(chapter);
|
||||
}
|
||||
|
||||
return chapters;
|
||||
}
|
||||
|
||||
private static float GetChapterNumber(HtmlNode chapterNameNode)
|
||||
{
|
||||
string url = chapterNameNode.Attributes["href"].Value;
|
||||
int index = url.IndexOf("/chapter-");
|
||||
|
||||
if (index == -1)
|
||||
return 0;
|
||||
|
||||
string chapterNumber = url[(index + "/chapter-".Length)..];
|
||||
|
||||
return float.Parse(chapterNumber);
|
||||
}
|
||||
|
||||
private static string GetTextFromNodes(List<HtmlNode> nodes)
|
||||
{
|
||||
StringBuilder stringBuilder = new();
|
||||
|
||||
foreach (HtmlNode node in nodes)
|
||||
{
|
||||
if (node.Name == "br")
|
||||
{
|
||||
stringBuilder.AppendLine();
|
||||
}
|
||||
else
|
||||
{
|
||||
stringBuilder.Append(HttpUtility.HtmlDecode(node.InnerText).Replace("\r\n", "").Trim());
|
||||
}
|
||||
}
|
||||
|
||||
return stringBuilder.ToString();
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user