using HtmlAgilityPack; using System.Text.Json; using System.Web; namespace JSMR.Infrastructure.Scanning; public static class ScannerUtilities { public static List GetStringListFromNodes(HtmlNode[] nodes) { return [.. nodes .Where(node => string.IsNullOrEmpty(node.InnerHtml) == false) .Select(node => HttpUtility.HtmlDecode(node.InnerHtml))]; } public static string GetDecodedText(HtmlNode? node) { if (node == null) return string.Empty; if (string.IsNullOrWhiteSpace(node.InnerHtml)) return string.Empty; return HttpUtility.HtmlDecode(node.InnerHtml.Replace("\n", "")).Trim(); } public static string GetTextBetween(string text, string startText, string endText) { int startIndex = text.IndexOf(startText) + startText.Length; int endIndex = text.IndexOf(endText); int length = endIndex - startIndex; if (length <= 0) return ""; return text.Substring(startIndex, length); } public static string GetImageSource(HtmlNode imageNode) { string imageSource = imageNode.GetAttributeValue("src", ""); if (string.IsNullOrEmpty(imageSource)) imageSource = imageNode.GetAttributeValue("data-src", ""); return imageSource; } public static string[] ParseJavaScriptArray(string value) { try { string json = NormalizeJavaScriptArray(value); return JsonSerializer.Deserialize(json) ?? []; } catch { return [.. value .Trim('[', ']') .Split(',', StringSplitOptions.RemoveEmptyEntries) .Select(x => x.Trim().Trim('\'', '"'))]; } } private static string NormalizeJavaScriptArray(string input) { return input.Trim().Replace('\'', '"'); } }