Implement normalizer PoC (#4)

This commit is contained in:
minsung
2026-04-07 14:12:07 +09:00
parent f043c18061
commit 3c5294a4cb
11 changed files with 709 additions and 0 deletions

View File

@@ -0,0 +1,204 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
namespace Recordingtest.Normalizer;
public static class Rules
{
// Matches ISO8601 (with optional fractional seconds and timezone) and common "yyyy-MM-dd HH:mm:ss"
public static readonly Regex TimestampRegex = new(
@"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?",
RegexOptions.Compiled);
public static readonly Regex GuidRegex = new(
@"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
RegexOptions.Compiled);
public static (string output, int count) StripTimestamps(string input)
{
int count = 0;
var result = TimestampRegex.Replace(input, _ => { count++; return "<TS>"; });
return (result, count);
}
public static (string output, int count) MaskGuids(string input)
{
int count = 0;
var result = GuidRegex.Replace(input, _ => { count++; return "<GUID>"; });
return (result, count);
}
public static (string output, int count) NormalizePaths(string input)
{
int count = 0;
string result = input;
var repo = Environment.GetEnvironmentVariable("RECORDINGTEST_REPO");
if (string.IsNullOrEmpty(repo))
{
repo = Directory.GetCurrentDirectory();
}
// Try both raw and JSON-escaped (\\) forms
foreach (var candidate in EnumerateForms(repo))
{
result = ReplaceCounting(result, candidate, "<REPO>", ref count);
}
var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
if (!string.IsNullOrEmpty(home))
{
foreach (var candidate in EnumerateForms(home))
{
result = ReplaceCounting(result, candidate, "<USER>", ref count);
}
}
return (result, count);
}
private static IEnumerable<string> EnumerateForms(string path)
{
yield return path;
// JSON-escaped backslashes
if (path.Contains('\\'))
yield return path.Replace("\\", "\\\\");
// forward slashes
if (path.Contains('\\'))
yield return path.Replace('\\', '/');
}
private static string ReplaceCounting(string input, string find, string replace, ref int count)
{
if (string.IsNullOrEmpty(find)) return input;
int idx = 0;
var sb = new System.Text.StringBuilder();
while (true)
{
int next = input.IndexOf(find, idx, StringComparison.OrdinalIgnoreCase);
if (next < 0)
{
sb.Append(input, idx, input.Length - idx);
break;
}
sb.Append(input, idx, next - idx);
sb.Append(replace);
count++;
idx = next + find.Length;
}
return sb.ToString();
}
/// <summary>
/// JSON-aware: parse and round all double values to 6 decimals. Operates only when input is JSON.
/// Returns (json-output, count) when input is JSON; otherwise returns input unchanged with count=0.
/// </summary>
public static (JsonNode? node, int count) RoundFloatsInNode(JsonNode? node)
{
int count = 0;
if (node is null) return (null, 0);
Walk(node);
return (node, count);
void Walk(JsonNode n)
{
if (n is JsonObject obj)
{
foreach (var kv in obj.ToList())
{
if (kv.Value is JsonValue v && TryAsDouble(v, out var d, out var wasFloat) && wasFloat)
{
var rounded = Math.Round(d, 6, MidpointRounding.AwayFromZero);
obj[kv.Key] = JsonValue.Create(rounded);
count++;
}
else if (kv.Value is JsonObject || kv.Value is JsonArray)
{
Walk(kv.Value);
}
}
}
else if (n is JsonArray arr)
{
for (int i = 0; i < arr.Count; i++)
{
var item = arr[i];
if (item is JsonValue v && TryAsDouble(v, out var d, out var wasFloat) && wasFloat)
{
var rounded = Math.Round(d, 6, MidpointRounding.AwayFromZero);
arr[i] = JsonValue.Create(rounded);
count++;
}
else if (item is JsonObject || item is JsonArray)
{
Walk(item);
}
}
}
}
}
private static bool TryAsDouble(JsonValue v, out double d, out bool wasFloat)
{
d = 0;
wasFloat = false;
var el = v.GetValue<object?>();
// Use the underlying JsonElement when possible
if (v.TryGetValue<JsonElement>(out var je))
{
if (je.ValueKind == JsonValueKind.Number)
{
var raw = je.GetRawText();
if (raw.Contains('.') || raw.Contains('e') || raw.Contains('E'))
{
if (je.TryGetDouble(out d))
{
wasFloat = true;
return true;
}
}
}
}
return false;
}
/// <summary>
/// Returns a new JsonNode with object keys sorted recursively. Counts the number of objects sorted.
/// </summary>
public static (JsonNode? node, int count) SortJsonKeys(JsonNode? node)
{
int count = 0;
var result = SortInternal(node, ref count);
return (result, count);
}
private static JsonNode? SortInternal(JsonNode? node, ref int count)
{
if (node is JsonObject obj)
{
count++;
var sorted = new JsonObject();
foreach (var kv in obj.OrderBy(k => k.Key, StringComparer.Ordinal))
{
sorted[kv.Key] = SortInternal(kv.Value, ref count);
}
return sorted;
}
if (node is JsonArray arr)
{
var newArr = new JsonArray();
foreach (var item in arr)
{
newArr.Add(SortInternal(item, ref count));
}
return newArr;
}
if (node is JsonValue v)
{
// Clone scalar
return JsonNode.Parse(v.ToJsonString());
}
return null;
}
}