71 lines
1.9 KiB
C#
71 lines
1.9 KiB
C#
using HtmlAgilityPack;
|
|
using System.Text.Json;
|
|
using System.Web;
|
|
|
|
namespace JSMR.Infrastructure.Scanning;
|
|
|
|
public static class ScannerUtilities
|
|
{
|
|
public static List<string> GetStringListFromNodes(HtmlNode[] nodes)
|
|
{
|
|
return [.. nodes
|
|
.Where(node => string.IsNullOrEmpty(node.InnerHtml) == false)
|
|
.Select(node => HttpUtility.HtmlDecode(node.InnerHtml))];
|
|
}
|
|
|
|
public static string GetDecodedText(HtmlNode? node)
|
|
{
|
|
if (node == null)
|
|
return string.Empty;
|
|
|
|
if (string.IsNullOrWhiteSpace(node.InnerHtml))
|
|
return string.Empty;
|
|
|
|
return HttpUtility.HtmlDecode(node.InnerHtml.Replace("\n", "")).Trim();
|
|
}
|
|
|
|
public static string GetTextBetween(string text, string startText, string endText)
|
|
{
|
|
int startIndex = text.IndexOf(startText) + startText.Length;
|
|
int endIndex = text.IndexOf(endText);
|
|
|
|
int length = endIndex - startIndex;
|
|
|
|
if (length <= 0)
|
|
return "";
|
|
|
|
return text.Substring(startIndex, length);
|
|
}
|
|
|
|
public static string GetImageSource(HtmlNode imageNode)
|
|
{
|
|
string imageSource = imageNode.GetAttributeValue("src", "");
|
|
|
|
if (string.IsNullOrEmpty(imageSource))
|
|
imageSource = imageNode.GetAttributeValue("data-src", "");
|
|
|
|
return imageSource;
|
|
}
|
|
|
|
public static string[] ParseJavaScriptArray(string value)
|
|
{
|
|
try
|
|
{
|
|
string json = NormalizeJavaScriptArray(value);
|
|
|
|
return JsonSerializer.Deserialize<string[]>(json) ?? [];
|
|
}
|
|
catch
|
|
{
|
|
return [.. value
|
|
.Trim('[', ']')
|
|
.Split(',', StringSplitOptions.RemoveEmptyEntries)
|
|
.Select(x => x.Trim().Trim('\'', '"'))];
|
|
}
|
|
}
|
|
|
|
private static string NormalizeJavaScriptArray(string input)
|
|
{
|
|
return input.Trim().Replace('\'', '"');
|
|
}
|
|
} |