diff --git a/.gitignore b/.gitignore index 2f3a845..2b72cff 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ obj/ *.log checkpoints/ sample.txt +sample_data diff --git a/DB작업_U형측구.xlsx b/DB작업_U형측구.xlsx deleted file mode 100644 index f1fbba4..0000000 Binary files a/DB작업_U형측구.xlsx and /dev/null differ diff --git a/ExcelKv.Core/ExcelLoader.cs b/ExcelKv.Core/ExcelLoader.cs index 4308991..c307ec2 100644 --- a/ExcelKv.Core/ExcelLoader.cs +++ b/ExcelKv.Core/ExcelLoader.cs @@ -3,31 +3,32 @@ using System.Collections.Generic; using System.Linq; using System.Threading.Tasks; using MiniExcelLibs; -using StackExchange.Redis; // We'll just use IDatabase interface if possible, or dependency injection. - // For now, let's keep it simple and pass the db/batch or connection string. - // Better yet, pass an interface IStorageWrapper. namespace ExcelKv.Core; -// Simple Config DTO for user-defined areas public class RegionConfig { public int TopHeaderStartRow { get; set; } = 0; public int TopHeaderDepth { get; set; } = 3; public int LeftHeaderStartCol { get; set; } = 0; public int LeftHeaderWidth { get; set; } = 4; + + // Optional Parsing Limits (inclusive indices) + public int? DataEndRow { get; set; } + public int? DataEndCol { get; set; } } public interface IStorageWrapper { Task SetAsync(string key, string value); + // New: Metadata overload for Traceability + Task SetAsync(string key, string value, int row, int col); Task IncrementAsync(string key, double value); } // Live Aggregator Helper (Moved here for shared access) public class LiveAggregator { - // Simple basic check for now. Can be expanded. private static readonly System.Text.RegularExpressions.Regex MetricPattern = new System.Text.RegularExpressions.Regex(@"(수량|합계|Volume|Weight|Total|Usage)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Compiled); @@ -42,6 +43,25 @@ public class LiveAggregator public class ExcelLoader { + public static async Task> GetSheetNamesAsync(string filePath) + { + return await Task.Run(() => MiniExcel.GetSheetNames(filePath).ToList()); + } + + public static async Task> GetPreviewRowsAsync(string filePath, string sheetName, int count = 50) + { + return await Task.Run(() => + { + var rows = MiniExcel.Query(filePath, sheetName: sheetName, useHeaderRow: false).Take(count).ToList(); + List result = new(); + foreach(IDictionary r in rows) + { + result.Add(FlattenDictionaryRow(r)); + } + return result; + }); + } + public static async Task ProcessFileAsync( string filePath, string sheetName, @@ -50,7 +70,6 @@ public class ExcelLoader SchemaRegistry registry) { Console.WriteLine($"[Core.Loader] Processing {filePath} Sheet: {sheetName}..."); - Console.WriteLine($"[Config] Top: Row{config.TopHeaderStartRow} (+{config.TopHeaderDepth}), Left: Col{config.LeftHeaderStartCol} (+{config.LeftHeaderWidth})"); var rows = MiniExcel.Query(filePath, sheetName: sheetName, useHeaderRow: false).ToList(); @@ -58,112 +77,118 @@ public class ExcelLoader int dataStartRow = config.TopHeaderStartRow + config.TopHeaderDepth; if (rows.Count <= dataStartRow) return 0; - // 1. Analyze Top Headers - // Extract the rows that form the Top Header - List topHeaderRows = new(); + // 1. Analyze Top Headers (Data Columns) + var topHeaderRows = new List(); for (int i = config.TopHeaderStartRow; i < dataStartRow; i++) { - var rowDict = (IDictionary)rows[i]; - topHeaderRows.Add(FlattenDictionaryRow(rowDict)); + topHeaderRows.Add(FlattenDictionaryRow((IDictionary)rows[i])); } - // Flatten Top Headers (Horizontal Forward Fill) - // Offset: We effectively care about columns starting from LeftHeaderWidth to End - // but we pass the absolute index. - var topAxisKeys = FlattenTopHeaders(topHeaderRows, config.LeftHeaderStartCol + config.LeftHeaderWidth); + // Calculate Global Max Col from Data Rows to ensure we don't truncate data + // Optimization: check a sample or assume logical limit. For correctness, check valid rows. + int globalMaxCol = 0; + int limitRow = config.DataEndRow.HasValue ? Math.Min(rows.Count, config.DataEndRow.Value + 1) : rows.Count; + for(int i = dataStartRow; i < limitRow; i++) + { + var d = (IDictionary)rows[i]; + if(d.Count > globalMaxCol) globalMaxCol = d.Count; // Approximate + // FlattenDictionaryRow is cleaner but expensive to call just for count. + // d.Keys.Count is effectively the column count for that row. + } + + // Flatten Data Headers (Right Side) + var topAxisKeys = FlattenTopHeaders(topHeaderRows, config.LeftHeaderStartCol + config.LeftHeaderWidth, globalMaxCol); - // Register Schema + // ... (Left Axis Header logic omitted for brevity, unchanged) ... + // ** New: Extract Left Axis Headers (Corner Region) ** + // These are the headers *above* the Left Key columns. + var leftAxisHeaders = new List(); + var bottomHeaderRow = FlattenDictionaryRow((IDictionary)rows[dataStartRow - 1]); + + for (int c = 0; c < config.LeftHeaderWidth; c++) + { + int absCol = config.LeftHeaderStartCol + c; + string headerVal = (absCol < bottomHeaderRow.Length) ? bottomHeaderRow[absCol] : $"Col{c}"; + leftAxisHeaders.Add(headerVal); + } + string ns = System.IO.Path.GetFileName(filePath) + "_" + sheetName; registry.RegisterSchema(ns, topAxisKeys); // 2. Process Data Rows - // State for Vertical Forward Fill (Left Headers) - string[] lastLeftValues = new string[config.LeftHeaderWidth]; + string[] lastLeftValues = new string[config.LeftHeaderWidth]; int processedCount = 0; - for (int i = dataStartRow; i < rows.Count; i++) + for (int i = dataStartRow; i < limitRow; i++) { var rowDict = (IDictionary)rows[i]; var rowVals = FlattenDictionaryRow(rowDict); - // A. Build Left Axis Key (Vertical Forward Fill) + // ... (Left Key Logic Unchanged) ... var currentLeftParts = new List(); bool rowHasContent = false; - for (int c = 0; c < config.LeftHeaderWidth; c++) { int absCol = config.LeftHeaderStartCol + c; string val = (absCol < rowVals.Length) ? rowVals[absCol] : ""; - - // Logic: Unmerge & Fill (Same as Vertical Forward Fill) - if (string.IsNullOrWhiteSpace(val)) - { - val = lastLeftValues[c]; - } - else - { - lastLeftValues[c] = val; // New merge block starts - } - - if(!string.IsNullOrWhiteSpace(val)) - { + if (string.IsNullOrWhiteSpace(val)) val = lastLeftValues[c]; + else lastLeftValues[c] = val; + if(!string.IsNullOrWhiteSpace(val)) { currentLeftParts.Add(val); rowHasContent = true; } } - - // Skip rows with no left context (e.g. empty separator lines) if (!rowHasContent) continue; + string leftKey = string.Join("----", currentLeftParts); - string leftKey = string.Join(".", currentLeftParts); + // B. Map Values + int limitCol = config.DataEndCol.HasValue ? Math.Min(rowVals.Length, config.DataEndCol.Value + 1) : rowVals.Length; - // B. Map Values to Top Axis Keys - for (int c = config.LeftHeaderStartCol + config.LeftHeaderWidth; c < rowVals.Length; c++) + for (int c = config.LeftHeaderStartCol + config.LeftHeaderWidth; c < limitCol; c++) { - // Align physical column to Top Axis Index - // Top Axis Keys[0] corresponds to column (LeftStart + LeftWidth) int topIndex = c - (config.LeftHeaderStartCol + config.LeftHeaderWidth); - if (topIndex < 0 || topIndex >= topAxisKeys.Count) continue; string topKey = topAxisKeys[topIndex]; - string val = rowVals[c]; - - if (string.IsNullOrWhiteSpace(val)) continue; // Sparse + if (string.IsNullOrWhiteSpace(topKey)) continue; // Skip if no header key + + string val = rowVals[c]; + if (string.IsNullOrWhiteSpace(val)) continue; + + // ** Feature: Rounding to 6 Decimal Places ** + // "All data should be formatted to 6 decimal places by default" + if (double.TryParse(val, out double dVal)) + { + val = Math.Round(dVal, 6).ToString(); // Simple G-format or fixed? User said "Decimal 6 places". + // Let's use standard string representation of the rounded 6-digit number to avoid trailing zeros "1.100000". + // Math.Round(1.1, 6) -> 1.1. + // If user meant "Fixed 6 places" (1.100000), use F6. Usually "representation" means significant digits. + // Given SI context, significant precision matters. Let's use normalization (remove trailing zeros) + // Update: User said "소수점 6자리까지 표현하는걸로 통일". Could mean truncate or round. Safe bet is Round. + } - // C. Construct Final Key string fullKey = $"{sheetName}:{leftKey}*{topKey}"; - await storage.SetAsync(fullKey, val); + await storage.SetAsync(fullKey, val, i, c); - // D. Live Compute - if (LiveAggregator.IsMetric(topKey) && double.TryParse(val, out double dVal)) + if (LiveAggregator.IsMetric(topKey) && double.TryParse(val, out double statVal)) { - await storage.IncrementAsync($"Stats:Global:{topKey}", dVal); + await storage.IncrementAsync($"Stats:Global:{topKey}", statVal); } } processedCount++; } + Console.WriteLine($"[Core.Loader] Finished. Processed: {processedCount} items."); return processedCount; } + private static string[] FlattenDictionaryRow(IDictionary rowDict) { - // MiniExcel Dynamic Dictionary Keys are A, B... AA.. - // We assume we want 0-indexed string array. - // We find the 'Max' column index to allocate array. - - // This sorting + re-indexing is a bit naive for very wide sheets but OK for PoC. - // Better to use MiniExcel's column index if available, but dynamic object hides it. - // We'll trust the Key sorting for now. var sortedKeys = rowDict.Keys.OrderBy(k => k.Length).ThenBy(k => k).ToList(); var result = new string[sortedKeys.Count]; - // Note: Dictionary might define "A", "C" but skip "B". - // This simple list approach collapses them. - // TO BE EXACT: We should convert "A"->0, "B"->1. - // For PoC we keep this, but in production we need ColumnLetterToIndex logic. for (int i = 0; i < sortedKeys.Count; i++) { @@ -173,15 +198,16 @@ public class ExcelLoader return result; } - private static List FlattenTopHeaders(List headerRows, int startCol) + private static List FlattenTopHeaders(List headerRows, int startCol, int globalMaxCol) { if (headerRows.Count == 0) return new List(); - int maxCol = headerRows.Max(r => r.Length); + // Ensure we cover all data columns, even if headers are short + int maxCol = Math.Max(headerRows.Max(r => r.Length), globalMaxCol); + var flatHeaders = new List(); - var lastValues = new string[headerRows.Count]; // Last value for each ROW level (Horizontal Fill) - // Iterate Columns + var lastValues = new string[headerRows.Count]; for (int c = startCol; c < maxCol; c++) { var parts = new List(); @@ -190,19 +216,17 @@ public class ExcelLoader { string val = (c < headerRows[r].Length) ? headerRows[r][c] : ""; - // Horizontal Forward Fill (Unmerge Logic) - if (string.IsNullOrWhiteSpace(val)) - { - val = lastValues[r]; - } - else - { - lastValues[r] = val; - } + // Horizontal Forward Fill (Re-enabled for Merged Headers) + if (string.IsNullOrWhiteSpace(val)) val = lastValues[r]; + else lastValues[r] = val; if (!string.IsNullOrWhiteSpace(val)) parts.Add(val); } - flatHeaders.Add(string.Join(".", parts)); + // If empty, use "Col_Index" fallback or keep empty? + // User schema usually requires keys. If empty, it's skipped in mapping. + // Let's keep it empty, but if data exists, it won't map unless we have a key. + // If parts is empty, let's leave valid empty string so mapping can decide. + flatHeaders.Add(string.Join("----", parts)); } return flatHeaders; } diff --git a/ExcelKvPoC/Program.cs b/ExcelKvPoC/Program.cs index 381ee95..12c35c3 100644 --- a/ExcelKvPoC/Program.cs +++ b/ExcelKvPoC/Program.cs @@ -123,9 +123,7 @@ class Program await ExcelLoader.ProcessFileAsync(excelPath, sheetName, rangeConfig, client, registry); client.ExecuteBatch(); // Commit - - // Verification Dump - DumpSample(sheetName); + } else { @@ -135,25 +133,4 @@ class Program server.SaveCheckpoint(); Console.WriteLine("Done."); } - - static void DumpSample(string sheetName) - { - Console.WriteLine("\n--- Exporting to sample.txt ---"); - using var redis = StackExchange.Redis.ConnectionMultiplexer.Connect("localhost:3278"); - var serverEnd = redis.GetServer("localhost:3278"); - var db = redis.GetDatabase(); - var keys = serverEnd.Keys(pattern: $"{sheetName}:*"); - - using (var writer = new StreamWriter("sample.txt")) - { - int count = 0; - foreach(var key in keys) - { - string val = db.StringGet(key); - writer.WriteLine($"{key} = {val}"); - count++; - } - Console.WriteLine($"[Export] Written {count} keys to sample.txt"); - } - } } diff --git a/SchemaEditor/Components/App.razor b/SchemaEditor/Components/App.razor index 40404e5..c9531cc 100644 --- a/SchemaEditor/Components/App.razor +++ b/SchemaEditor/Components/App.razor @@ -15,6 +15,16 @@ + diff --git a/SchemaEditor/Components/Layout/NavMenu.razor b/SchemaEditor/Components/Layout/NavMenu.razor index 22a7ba3..7a98ad3 100644 --- a/SchemaEditor/Components/Layout/NavMenu.razor +++ b/SchemaEditor/Components/Layout/NavMenu.razor @@ -1,30 +1,39 @@  - - -