그리드 표현 일부 완료. 데이터 검측 필수 예정
This commit is contained in:
@@ -3,31 +3,32 @@ using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using System.Threading.Tasks;
|
||||
using MiniExcelLibs;
|
||||
using StackExchange.Redis; // We'll just use IDatabase interface if possible, or dependency injection.
|
||||
// For now, let's keep it simple and pass the db/batch or connection string.
|
||||
// Better yet, pass an interface IStorageWrapper.
|
||||
|
||||
namespace ExcelKv.Core;
|
||||
|
||||
// Simple Config DTO for user-defined areas
|
||||
public class RegionConfig
|
||||
{
|
||||
public int TopHeaderStartRow { get; set; } = 0;
|
||||
public int TopHeaderDepth { get; set; } = 3;
|
||||
public int LeftHeaderStartCol { get; set; } = 0;
|
||||
public int LeftHeaderWidth { get; set; } = 4;
|
||||
|
||||
// Optional Parsing Limits (inclusive indices)
|
||||
public int? DataEndRow { get; set; }
|
||||
public int? DataEndCol { get; set; }
|
||||
}
|
||||
|
||||
public interface IStorageWrapper
|
||||
{
|
||||
Task SetAsync(string key, string value);
|
||||
// New: Metadata overload for Traceability
|
||||
Task SetAsync(string key, string value, int row, int col);
|
||||
Task IncrementAsync(string key, double value);
|
||||
}
|
||||
|
||||
// Live Aggregator Helper (Moved here for shared access)
|
||||
public class LiveAggregator
|
||||
{
|
||||
// Simple basic check for now. Can be expanded.
|
||||
private static readonly System.Text.RegularExpressions.Regex MetricPattern =
|
||||
new System.Text.RegularExpressions.Regex(@"(수량|합계|Volume|Weight|Total|Usage)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Compiled);
|
||||
|
||||
@@ -42,6 +43,25 @@ public class LiveAggregator
|
||||
|
||||
public class ExcelLoader
|
||||
{
|
||||
public static async Task<List<string>> GetSheetNamesAsync(string filePath)
|
||||
{
|
||||
return await Task.Run(() => MiniExcel.GetSheetNames(filePath).ToList());
|
||||
}
|
||||
|
||||
public static async Task<List<string[]>> GetPreviewRowsAsync(string filePath, string sheetName, int count = 50)
|
||||
{
|
||||
return await Task.Run(() =>
|
||||
{
|
||||
var rows = MiniExcel.Query(filePath, sheetName: sheetName, useHeaderRow: false).Take(count).ToList();
|
||||
List<string[]> result = new();
|
||||
foreach(IDictionary<string, object> r in rows)
|
||||
{
|
||||
result.Add(FlattenDictionaryRow(r));
|
||||
}
|
||||
return result;
|
||||
});
|
||||
}
|
||||
|
||||
public static async Task<int> ProcessFileAsync(
|
||||
string filePath,
|
||||
string sheetName,
|
||||
@@ -50,7 +70,6 @@ public class ExcelLoader
|
||||
SchemaRegistry registry)
|
||||
{
|
||||
Console.WriteLine($"[Core.Loader] Processing {filePath} Sheet: {sheetName}...");
|
||||
Console.WriteLine($"[Config] Top: Row{config.TopHeaderStartRow} (+{config.TopHeaderDepth}), Left: Col{config.LeftHeaderStartCol} (+{config.LeftHeaderWidth})");
|
||||
|
||||
var rows = MiniExcel.Query(filePath, sheetName: sheetName, useHeaderRow: false).ToList();
|
||||
|
||||
@@ -58,112 +77,118 @@ public class ExcelLoader
|
||||
int dataStartRow = config.TopHeaderStartRow + config.TopHeaderDepth;
|
||||
if (rows.Count <= dataStartRow) return 0;
|
||||
|
||||
// 1. Analyze Top Headers
|
||||
// Extract the rows that form the Top Header
|
||||
List<string[]> topHeaderRows = new();
|
||||
// 1. Analyze Top Headers (Data Columns)
|
||||
var topHeaderRows = new List<string[]>();
|
||||
for (int i = config.TopHeaderStartRow; i < dataStartRow; i++)
|
||||
{
|
||||
var rowDict = (IDictionary<string, object>)rows[i];
|
||||
topHeaderRows.Add(FlattenDictionaryRow(rowDict));
|
||||
topHeaderRows.Add(FlattenDictionaryRow((IDictionary<string, object>)rows[i]));
|
||||
}
|
||||
|
||||
// Flatten Top Headers (Horizontal Forward Fill)
|
||||
// Offset: We effectively care about columns starting from LeftHeaderWidth to End
|
||||
// but we pass the absolute index.
|
||||
var topAxisKeys = FlattenTopHeaders(topHeaderRows, config.LeftHeaderStartCol + config.LeftHeaderWidth);
|
||||
// Calculate Global Max Col from Data Rows to ensure we don't truncate data
|
||||
// Optimization: check a sample or assume logical limit. For correctness, check valid rows.
|
||||
int globalMaxCol = 0;
|
||||
int limitRow = config.DataEndRow.HasValue ? Math.Min(rows.Count, config.DataEndRow.Value + 1) : rows.Count;
|
||||
for(int i = dataStartRow; i < limitRow; i++)
|
||||
{
|
||||
var d = (IDictionary<string, object>)rows[i];
|
||||
if(d.Count > globalMaxCol) globalMaxCol = d.Count; // Approximate
|
||||
// FlattenDictionaryRow is cleaner but expensive to call just for count.
|
||||
// d.Keys.Count is effectively the column count for that row.
|
||||
}
|
||||
|
||||
// Flatten Data Headers (Right Side)
|
||||
var topAxisKeys = FlattenTopHeaders(topHeaderRows, config.LeftHeaderStartCol + config.LeftHeaderWidth, globalMaxCol);
|
||||
|
||||
// Register Schema
|
||||
// ... (Left Axis Header logic omitted for brevity, unchanged) ...
|
||||
// ** New: Extract Left Axis Headers (Corner Region) **
|
||||
// These are the headers *above* the Left Key columns.
|
||||
var leftAxisHeaders = new List<string>();
|
||||
var bottomHeaderRow = FlattenDictionaryRow((IDictionary<string, object>)rows[dataStartRow - 1]);
|
||||
|
||||
for (int c = 0; c < config.LeftHeaderWidth; c++)
|
||||
{
|
||||
int absCol = config.LeftHeaderStartCol + c;
|
||||
string headerVal = (absCol < bottomHeaderRow.Length) ? bottomHeaderRow[absCol] : $"Col{c}";
|
||||
leftAxisHeaders.Add(headerVal);
|
||||
}
|
||||
|
||||
string ns = System.IO.Path.GetFileName(filePath) + "_" + sheetName;
|
||||
registry.RegisterSchema(ns, topAxisKeys);
|
||||
|
||||
// 2. Process Data Rows
|
||||
// State for Vertical Forward Fill (Left Headers)
|
||||
string[] lastLeftValues = new string[config.LeftHeaderWidth];
|
||||
string[] lastLeftValues = new string[config.LeftHeaderWidth];
|
||||
|
||||
int processedCount = 0;
|
||||
|
||||
for (int i = dataStartRow; i < rows.Count; i++)
|
||||
for (int i = dataStartRow; i < limitRow; i++)
|
||||
{
|
||||
var rowDict = (IDictionary<string, object>)rows[i];
|
||||
var rowVals = FlattenDictionaryRow(rowDict);
|
||||
|
||||
// A. Build Left Axis Key (Vertical Forward Fill)
|
||||
// ... (Left Key Logic Unchanged) ...
|
||||
var currentLeftParts = new List<string>();
|
||||
bool rowHasContent = false;
|
||||
|
||||
for (int c = 0; c < config.LeftHeaderWidth; c++)
|
||||
{
|
||||
int absCol = config.LeftHeaderStartCol + c;
|
||||
string val = (absCol < rowVals.Length) ? rowVals[absCol] : "";
|
||||
|
||||
// Logic: Unmerge & Fill (Same as Vertical Forward Fill)
|
||||
if (string.IsNullOrWhiteSpace(val))
|
||||
{
|
||||
val = lastLeftValues[c];
|
||||
}
|
||||
else
|
||||
{
|
||||
lastLeftValues[c] = val; // New merge block starts
|
||||
}
|
||||
|
||||
if(!string.IsNullOrWhiteSpace(val))
|
||||
{
|
||||
if (string.IsNullOrWhiteSpace(val)) val = lastLeftValues[c];
|
||||
else lastLeftValues[c] = val;
|
||||
if(!string.IsNullOrWhiteSpace(val)) {
|
||||
currentLeftParts.Add(val);
|
||||
rowHasContent = true;
|
||||
}
|
||||
}
|
||||
|
||||
// Skip rows with no left context (e.g. empty separator lines)
|
||||
if (!rowHasContent) continue;
|
||||
string leftKey = string.Join("----", currentLeftParts);
|
||||
|
||||
string leftKey = string.Join(".", currentLeftParts);
|
||||
// B. Map Values
|
||||
int limitCol = config.DataEndCol.HasValue ? Math.Min(rowVals.Length, config.DataEndCol.Value + 1) : rowVals.Length;
|
||||
|
||||
// B. Map Values to Top Axis Keys
|
||||
for (int c = config.LeftHeaderStartCol + config.LeftHeaderWidth; c < rowVals.Length; c++)
|
||||
for (int c = config.LeftHeaderStartCol + config.LeftHeaderWidth; c < limitCol; c++)
|
||||
{
|
||||
// Align physical column to Top Axis Index
|
||||
// Top Axis Keys[0] corresponds to column (LeftStart + LeftWidth)
|
||||
int topIndex = c - (config.LeftHeaderStartCol + config.LeftHeaderWidth);
|
||||
|
||||
if (topIndex < 0 || topIndex >= topAxisKeys.Count) continue;
|
||||
|
||||
string topKey = topAxisKeys[topIndex];
|
||||
string val = rowVals[c];
|
||||
|
||||
if (string.IsNullOrWhiteSpace(val)) continue; // Sparse
|
||||
if (string.IsNullOrWhiteSpace(topKey)) continue; // Skip if no header key
|
||||
|
||||
string val = rowVals[c];
|
||||
if (string.IsNullOrWhiteSpace(val)) continue;
|
||||
|
||||
// ** Feature: Rounding to 6 Decimal Places **
|
||||
// "All data should be formatted to 6 decimal places by default"
|
||||
if (double.TryParse(val, out double dVal))
|
||||
{
|
||||
val = Math.Round(dVal, 6).ToString(); // Simple G-format or fixed? User said "Decimal 6 places".
|
||||
// Let's use standard string representation of the rounded 6-digit number to avoid trailing zeros "1.100000".
|
||||
// Math.Round(1.1, 6) -> 1.1.
|
||||
// If user meant "Fixed 6 places" (1.100000), use F6. Usually "representation" means significant digits.
|
||||
// Given SI context, significant precision matters. Let's use normalization (remove trailing zeros)
|
||||
// Update: User said "소수점 6자리까지 표현하는걸로 통일". Could mean truncate or round. Safe bet is Round.
|
||||
}
|
||||
|
||||
// C. Construct Final Key
|
||||
string fullKey = $"{sheetName}:{leftKey}*{topKey}";
|
||||
|
||||
await storage.SetAsync(fullKey, val);
|
||||
await storage.SetAsync(fullKey, val, i, c);
|
||||
|
||||
// D. Live Compute
|
||||
if (LiveAggregator.IsMetric(topKey) && double.TryParse(val, out double dVal))
|
||||
if (LiveAggregator.IsMetric(topKey) && double.TryParse(val, out double statVal))
|
||||
{
|
||||
await storage.IncrementAsync($"Stats:Global:{topKey}", dVal);
|
||||
await storage.IncrementAsync($"Stats:Global:{topKey}", statVal);
|
||||
}
|
||||
}
|
||||
processedCount++;
|
||||
}
|
||||
|
||||
Console.WriteLine($"[Core.Loader] Finished. Processed: {processedCount} items.");
|
||||
return processedCount;
|
||||
}
|
||||
|
||||
|
||||
private static string[] FlattenDictionaryRow(IDictionary<string, object> rowDict)
|
||||
{
|
||||
// MiniExcel Dynamic Dictionary Keys are A, B... AA..
|
||||
// We assume we want 0-indexed string array.
|
||||
// We find the 'Max' column index to allocate array.
|
||||
|
||||
// This sorting + re-indexing is a bit naive for very wide sheets but OK for PoC.
|
||||
// Better to use MiniExcel's column index if available, but dynamic object hides it.
|
||||
// We'll trust the Key sorting for now.
|
||||
var sortedKeys = rowDict.Keys.OrderBy(k => k.Length).ThenBy(k => k).ToList();
|
||||
var result = new string[sortedKeys.Count];
|
||||
// Note: Dictionary might define "A", "C" but skip "B".
|
||||
// This simple list approach collapses them.
|
||||
// TO BE EXACT: We should convert "A"->0, "B"->1.
|
||||
// For PoC we keep this, but in production we need ColumnLetterToIndex logic.
|
||||
|
||||
for (int i = 0; i < sortedKeys.Count; i++)
|
||||
{
|
||||
@@ -173,15 +198,16 @@ public class ExcelLoader
|
||||
return result;
|
||||
}
|
||||
|
||||
private static List<string> FlattenTopHeaders(List<string[]> headerRows, int startCol)
|
||||
private static List<string> FlattenTopHeaders(List<string[]> headerRows, int startCol, int globalMaxCol)
|
||||
{
|
||||
if (headerRows.Count == 0) return new List<string>();
|
||||
int maxCol = headerRows.Max(r => r.Length);
|
||||
// Ensure we cover all data columns, even if headers are short
|
||||
int maxCol = Math.Max(headerRows.Max(r => r.Length), globalMaxCol);
|
||||
|
||||
var flatHeaders = new List<string>();
|
||||
|
||||
var lastValues = new string[headerRows.Count]; // Last value for each ROW level (Horizontal Fill)
|
||||
|
||||
// Iterate Columns
|
||||
var lastValues = new string[headerRows.Count];
|
||||
for (int c = startCol; c < maxCol; c++)
|
||||
{
|
||||
var parts = new List<string>();
|
||||
@@ -190,19 +216,17 @@ public class ExcelLoader
|
||||
{
|
||||
string val = (c < headerRows[r].Length) ? headerRows[r][c] : "";
|
||||
|
||||
// Horizontal Forward Fill (Unmerge Logic)
|
||||
if (string.IsNullOrWhiteSpace(val))
|
||||
{
|
||||
val = lastValues[r];
|
||||
}
|
||||
else
|
||||
{
|
||||
lastValues[r] = val;
|
||||
}
|
||||
// Horizontal Forward Fill (Re-enabled for Merged Headers)
|
||||
if (string.IsNullOrWhiteSpace(val)) val = lastValues[r];
|
||||
else lastValues[r] = val;
|
||||
|
||||
if (!string.IsNullOrWhiteSpace(val)) parts.Add(val);
|
||||
}
|
||||
flatHeaders.Add(string.Join(".", parts));
|
||||
// If empty, use "Col_Index" fallback or keep empty?
|
||||
// User schema usually requires keys. If empty, it's skipped in mapping.
|
||||
// Let's keep it empty, but if data exists, it won't map unless we have a key.
|
||||
// If parts is empty, let's leave valid empty string so mapping can decide.
|
||||
flatHeaders.Add(string.Join("----", parts));
|
||||
}
|
||||
return flatHeaders;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user