Implement normalizer PoC (#4)

This commit is contained in:
minsung
2026-04-07 14:12:07 +09:00
parent f043c18061
commit 3c5294a4cb
11 changed files with 709 additions and 0 deletions

View File

@@ -0,0 +1,133 @@
using System.Text.Json;
using System.Text.Json.Nodes;
namespace Recordingtest.Normalizer;
public static class Normalizer
{
public static NormalizeResult Normalize(string input, string profileName)
{
var profile = Profile.Load(profileName);
var log = new List<RuleApplication>();
// Try to parse as JSON
JsonNode? jsonNode = null;
bool isJson = false;
var trimmed = input.TrimStart();
if (trimmed.StartsWith("{") || trimmed.StartsWith("["))
{
try
{
jsonNode = JsonNode.Parse(input);
isJson = jsonNode is not null;
}
catch (JsonException)
{
isJson = false;
}
}
string current = input;
foreach (var rule in profile.Rules)
{
switch (rule)
{
case "strip_timestamps":
{
if (isJson && jsonNode is not null)
{
// serialize, apply, reparse to keep pipeline consistent
var s = jsonNode.ToJsonString();
var (o, c) = Rules.StripTimestamps(s);
log.Add(new RuleApplication(rule, c));
if (c > 0) jsonNode = JsonNode.Parse(o);
}
else
{
var (o, c) = Rules.StripTimestamps(current);
current = o;
log.Add(new RuleApplication(rule, c));
}
break;
}
case "mask_guids":
{
if (isJson && jsonNode is not null)
{
var s = jsonNode.ToJsonString();
var (o, c) = Rules.MaskGuids(s);
log.Add(new RuleApplication(rule, c));
if (c > 0) jsonNode = JsonNode.Parse(o);
}
else
{
var (o, c) = Rules.MaskGuids(current);
current = o;
log.Add(new RuleApplication(rule, c));
}
break;
}
case "normalize_paths":
{
if (isJson && jsonNode is not null)
{
var s = jsonNode.ToJsonString();
var (o, c) = Rules.NormalizePaths(s);
log.Add(new RuleApplication(rule, c));
if (c > 0) jsonNode = JsonNode.Parse(o);
}
else
{
var (o, c) = Rules.NormalizePaths(current);
current = o;
log.Add(new RuleApplication(rule, c));
}
break;
}
case "round_floats":
{
if (isJson && jsonNode is not null)
{
var (n, c) = Rules.RoundFloatsInNode(jsonNode);
jsonNode = n;
log.Add(new RuleApplication(rule, c));
}
else
{
log.Add(new RuleApplication(rule, 0));
}
break;
}
case "sort_json_keys":
{
if (isJson && jsonNode is not null)
{
var (n, c) = Rules.SortJsonKeys(jsonNode);
jsonNode = n;
log.Add(new RuleApplication(rule, c));
}
else
{
log.Add(new RuleApplication(rule, 0));
}
break;
}
default:
throw new InvalidOperationException($"Unknown rule: {rule}");
}
}
string output;
if (isJson && jsonNode is not null)
{
output = jsonNode.ToJsonString(new JsonSerializerOptions { WriteIndented = true });
}
else
{
output = current;
}
return new NormalizeResult(output, log);
}
}

View File

@@ -0,0 +1,28 @@
using YamlDotNet.Serialization;
using YamlDotNet.Serialization.NamingConventions;
namespace Recordingtest.Normalizer;
public sealed class Profile
{
public string Name { get; set; } = "";
public List<string> Rules { get; set; } = new();
public static Profile Load(string profileName)
{
var baseDir = AppContext.BaseDirectory;
var path = Path.Combine(baseDir, "profiles", profileName + ".yaml");
if (!File.Exists(path))
{
// fallback: search up from cwd
var alt = Path.Combine(Directory.GetCurrentDirectory(), "profiles", profileName + ".yaml");
if (File.Exists(alt)) path = alt;
else throw new FileNotFoundException($"Profile not found: {profileName}", path);
}
var yaml = File.ReadAllText(path);
var deserializer = new DeserializerBuilder()
.WithNamingConvention(CamelCaseNamingConvention.Instance)
.Build();
return deserializer.Deserialize<Profile>(yaml) ?? new Profile { Name = profileName };
}
}

View File

@@ -0,0 +1,15 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<AssemblyName>Recordingtest.Normalizer</AssemblyName>
<RootNamespace>Recordingtest.Normalizer</RootNamespace>
</PropertyGroup>
<ItemGroup>
<PackageReference Include="YamlDotNet" Version="15.1.6" />
<PackageReference Include="System.Text.Json" Version="8.0.5" />
</ItemGroup>
<ItemGroup>
<None Update="profiles\*.yaml">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
</ItemGroup>
</Project>

View File

@@ -0,0 +1,15 @@
namespace Recordingtest.Normalizer;
public sealed record RuleApplication(string RuleId, int Count);
public sealed class NormalizeResult
{
public string Output { get; }
public IReadOnlyList<RuleApplication> Log { get; }
public NormalizeResult(string output, IReadOnlyList<RuleApplication> log)
{
Output = output;
Log = log;
}
}

View File

@@ -0,0 +1,204 @@
using System.Text.Json;
using System.Text.Json.Nodes;
using System.Text.RegularExpressions;
namespace Recordingtest.Normalizer;
public static class Rules
{
// Matches ISO8601 (with optional fractional seconds and timezone) and common "yyyy-MM-dd HH:mm:ss"
public static readonly Regex TimestampRegex = new(
@"\d{4}-\d{2}-\d{2}[T ]\d{2}:\d{2}:\d{2}(?:\.\d+)?(?:Z|[+-]\d{2}:?\d{2})?",
RegexOptions.Compiled);
public static readonly Regex GuidRegex = new(
@"\b[0-9a-fA-F]{8}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{4}-[0-9a-fA-F]{12}\b",
RegexOptions.Compiled);
public static (string output, int count) StripTimestamps(string input)
{
int count = 0;
var result = TimestampRegex.Replace(input, _ => { count++; return "<TS>"; });
return (result, count);
}
public static (string output, int count) MaskGuids(string input)
{
int count = 0;
var result = GuidRegex.Replace(input, _ => { count++; return "<GUID>"; });
return (result, count);
}
public static (string output, int count) NormalizePaths(string input)
{
int count = 0;
string result = input;
var repo = Environment.GetEnvironmentVariable("RECORDINGTEST_REPO");
if (string.IsNullOrEmpty(repo))
{
repo = Directory.GetCurrentDirectory();
}
// Try both raw and JSON-escaped (\\) forms
foreach (var candidate in EnumerateForms(repo))
{
result = ReplaceCounting(result, candidate, "<REPO>", ref count);
}
var home = Environment.GetFolderPath(Environment.SpecialFolder.UserProfile);
if (!string.IsNullOrEmpty(home))
{
foreach (var candidate in EnumerateForms(home))
{
result = ReplaceCounting(result, candidate, "<USER>", ref count);
}
}
return (result, count);
}
private static IEnumerable<string> EnumerateForms(string path)
{
yield return path;
// JSON-escaped backslashes
if (path.Contains('\\'))
yield return path.Replace("\\", "\\\\");
// forward slashes
if (path.Contains('\\'))
yield return path.Replace('\\', '/');
}
private static string ReplaceCounting(string input, string find, string replace, ref int count)
{
if (string.IsNullOrEmpty(find)) return input;
int idx = 0;
var sb = new System.Text.StringBuilder();
while (true)
{
int next = input.IndexOf(find, idx, StringComparison.OrdinalIgnoreCase);
if (next < 0)
{
sb.Append(input, idx, input.Length - idx);
break;
}
sb.Append(input, idx, next - idx);
sb.Append(replace);
count++;
idx = next + find.Length;
}
return sb.ToString();
}
/// <summary>
/// JSON-aware: parse and round all double values to 6 decimals. Operates only when input is JSON.
/// Returns (json-output, count) when input is JSON; otherwise returns input unchanged with count=0.
/// </summary>
public static (JsonNode? node, int count) RoundFloatsInNode(JsonNode? node)
{
int count = 0;
if (node is null) return (null, 0);
Walk(node);
return (node, count);
void Walk(JsonNode n)
{
if (n is JsonObject obj)
{
foreach (var kv in obj.ToList())
{
if (kv.Value is JsonValue v && TryAsDouble(v, out var d, out var wasFloat) && wasFloat)
{
var rounded = Math.Round(d, 6, MidpointRounding.AwayFromZero);
obj[kv.Key] = JsonValue.Create(rounded);
count++;
}
else if (kv.Value is JsonObject || kv.Value is JsonArray)
{
Walk(kv.Value);
}
}
}
else if (n is JsonArray arr)
{
for (int i = 0; i < arr.Count; i++)
{
var item = arr[i];
if (item is JsonValue v && TryAsDouble(v, out var d, out var wasFloat) && wasFloat)
{
var rounded = Math.Round(d, 6, MidpointRounding.AwayFromZero);
arr[i] = JsonValue.Create(rounded);
count++;
}
else if (item is JsonObject || item is JsonArray)
{
Walk(item);
}
}
}
}
}
private static bool TryAsDouble(JsonValue v, out double d, out bool wasFloat)
{
d = 0;
wasFloat = false;
var el = v.GetValue<object?>();
// Use the underlying JsonElement when possible
if (v.TryGetValue<JsonElement>(out var je))
{
if (je.ValueKind == JsonValueKind.Number)
{
var raw = je.GetRawText();
if (raw.Contains('.') || raw.Contains('e') || raw.Contains('E'))
{
if (je.TryGetDouble(out d))
{
wasFloat = true;
return true;
}
}
}
}
return false;
}
/// <summary>
/// Returns a new JsonNode with object keys sorted recursively. Counts the number of objects sorted.
/// </summary>
public static (JsonNode? node, int count) SortJsonKeys(JsonNode? node)
{
int count = 0;
var result = SortInternal(node, ref count);
return (result, count);
}
private static JsonNode? SortInternal(JsonNode? node, ref int count)
{
if (node is JsonObject obj)
{
count++;
var sorted = new JsonObject();
foreach (var kv in obj.OrderBy(k => k.Key, StringComparer.Ordinal))
{
sorted[kv.Key] = SortInternal(kv.Value, ref count);
}
return sorted;
}
if (node is JsonArray arr)
{
var newArr = new JsonArray();
foreach (var item in arr)
{
newArr.Add(SortInternal(item, ref count));
}
return newArr;
}
if (node is JsonValue v)
{
// Clone scalar
return JsonNode.Parse(v.ToJsonString());
}
return null;
}
}

View File

@@ -0,0 +1,7 @@
name: default
rules:
- strip_timestamps
- mask_guids
- normalize_paths
- round_floats
- sort_json_keys