Files
jsmr/JSMR.Infrastructure/Data/Repositories/VoiceWorks/MySqlBooleanQuery.cs

178 lines
5.2 KiB
C#

using System.Text;
namespace JSMR.Infrastructure.Data.Repositories.VoiceWorks;
public static class MySqlBooleanQuery
{
public static string Normalize(string input)
{
if (string.IsNullOrWhiteSpace(input))
return string.Empty;
// Split into top-level tokens by spaces (not inside quotes/parentheses)
var tokens = SplitTopLevel(input.Trim(), ' ');
var parts = new List<string>(tokens.Count);
foreach (var raw in tokens)
{
var t = raw.Trim();
if (t.Length == 0)
continue;
if (t is "|")
continue;
// Preserve explicit boolean operators user may already supply
if (t[0] == '-' || t[0] == '+')
{
if (t.Length == 1)
continue;
// Token already has a sign; normalize rest
var sign = t[0];
var body = t[1..].Trim();
parts.Add(sign + NormalizePositive(body));
continue;
}
// Default: required term
parts.Add("+" + NormalizePositive(t));
}
return string.Join(' ', parts.Where(p => p.Length > 1 || p == "+" || p == "-")).Trim();
}
// Normalize a positive (non-signed) token: handles ORs, phrases, grouping
private static string NormalizePositive(string token)
{
if (string.IsNullOrWhiteSpace(token)) return string.Empty;
// If token is a quoted phrase already -> keep quotes
if (IsQuoted(token))
{
return EnsureBalancedQuotes(token);
}
// If token starts/ends with parentheses, leave as-is (user grouping)
if (token.StartsWith('(') && token.EndsWith(')') && token.Length > 2)
{
// Optionally normalize inside the group if you want;
// here we trust user's grouping.
return token;
}
// If token contains OR '|' at top level, convert to (a|b|...)
if (ContainsTopLevel(token, '|'))
{
var orParts = SplitTopLevel(token, '|')
.Select(p => NormalizeOrAtom(p.Trim()))
.Where(p => p.Length > 0);
return "(" + string.Join(" ", orParts) + ")";
}
// Plain atom -> as-is
return token;
}
// Normalize one OR-side atom (could be phrase or bare word)
private static string NormalizeOrAtom(string atom)
{
if (string.IsNullOrWhiteSpace(atom)) return string.Empty;
// Allow nested quotes inside OR
if (IsQuoted(atom)) return EnsureBalancedQuotes(atom);
// If it contains whitespace, quote it to become a phrase
if (atom.Any(char.IsWhiteSpace))
return $"\"{EscapeQuotes(atom)}\"";
return atom;
}
// -------------- helpers --------------
private static bool IsQuoted(string s)
=> s.Length >= 2 && s[0] == '"' && s[^1] == '"';
private static string EnsureBalancedQuotes(string s)
{
// If user typed starting quote but forgot closing, close it.
if (s.Length >= 1 && s[0] == '"' && (s.Length == 1 || s[^1] != '"'))
return s + "\"";
// If user typed closing quote but no opening, open it.
if (s.Length >= 1 && s[^1] == '"' && (s.Length == 1 || s[0] != '"'))
return "\"" + s;
// Also escape any embedded quotes (rare)
if (IsQuoted(s))
{
var inner = s[1..^1];
return $"\"{EscapeQuotes(inner)}\"";
}
return s;
}
private static string EscapeQuotes(string s) => s.Replace("\"", "\\\"");
private static bool ContainsTopLevel(string s, char sep)
{
int depth = 0;
bool inQuotes = false;
foreach (var ch in s)
{
if (ch == '"' && depth == 0) inQuotes = !inQuotes;
else if (!inQuotes)
{
if (ch == '(') depth++;
else if (ch == ')' && depth > 0) depth--;
else if (ch == sep && depth == 0) return true;
}
}
return false;
}
private static List<string> SplitTopLevel(string s, char sep)
{
var list = new List<string>();
var sb = new StringBuilder();
int depth = 0;
bool inQuotes = false;
for (int i = 0; i < s.Length; i++)
{
var ch = s[i];
if (ch == '"' && depth == 0)
{
inQuotes = !inQuotes;
sb.Append(ch);
continue;
}
if (!inQuotes)
{
if (ch == '(') { depth++; sb.Append(ch); continue; }
if (ch == ')') { depth = Math.Max(0, depth - 1); sb.Append(ch); continue; }
if (ch == sep && depth == 0)
{
// split
var piece = sb.ToString().Trim();
if (piece.Length > 0) list.Add(piece);
sb.Clear();
continue;
}
}
sb.Append(ch);
}
var tail = sb.ToString().Trim();
if (tail.Length > 0) list.Add(tail);
return list;
}
}