Files
bim-dogma-poc/ExcelKv.Core/ExcelLoader.cs
2026-01-08 15:32:15 +09:00

210 lines
8.1 KiB
C#

using System;
using System.Collections.Generic;
using System.Linq;
using System.Threading.Tasks;
using MiniExcelLibs;
using StackExchange.Redis; // We'll just use IDatabase interface if possible, or dependency injection.
// For now, let's keep it simple and pass the db/batch or connection string.
// Better yet, pass an interface IStorageWrapper.
namespace ExcelKv.Core;
// Simple Config DTO for user-defined areas
public class RegionConfig
{
public int TopHeaderStartRow { get; set; } = 0;
public int TopHeaderDepth { get; set; } = 3;
public int LeftHeaderStartCol { get; set; } = 0;
public int LeftHeaderWidth { get; set; } = 4;
}
public interface IStorageWrapper
{
Task SetAsync(string key, string value);
Task IncrementAsync(string key, double value);
}
// Live Aggregator Helper (Moved here for shared access)
public class LiveAggregator
{
// Simple basic check for now. Can be expanded.
private static readonly System.Text.RegularExpressions.Regex MetricPattern =
new System.Text.RegularExpressions.Regex(@"(수량|합계|Volume|Weight|Total|Usage)", System.Text.RegularExpressions.RegexOptions.IgnoreCase | System.Text.RegularExpressions.RegexOptions.Compiled);
public static bool IsMetric(string colName) => MetricPattern.IsMatch(colName);
public static string GenerateGroupKey(string sheetName, string category, string spec, string metricName)
=> $"Stats:{Use_(category)}:{Use_(spec)}:{Use_(metricName)}";
private static string Use_(string s) => string.IsNullOrWhiteSpace(s) ? "Unknown" : s.Replace(" ", "_");
}
public class ExcelLoader
{
public static async Task<int> ProcessFileAsync(
string filePath,
string sheetName,
RegionConfig config,
IStorageWrapper storage,
SchemaRegistry registry)
{
Console.WriteLine($"[Core.Loader] Processing {filePath} Sheet: {sheetName}...");
Console.WriteLine($"[Config] Top: Row{config.TopHeaderStartRow} (+{config.TopHeaderDepth}), Left: Col{config.LeftHeaderStartCol} (+{config.LeftHeaderWidth})");
var rows = MiniExcel.Query(filePath, sheetName: sheetName, useHeaderRow: false).ToList();
// Validation
int dataStartRow = config.TopHeaderStartRow + config.TopHeaderDepth;
if (rows.Count <= dataStartRow) return 0;
// 1. Analyze Top Headers
// Extract the rows that form the Top Header
List<string[]> topHeaderRows = new();
for (int i = config.TopHeaderStartRow; i < dataStartRow; i++)
{
var rowDict = (IDictionary<string, object>)rows[i];
topHeaderRows.Add(FlattenDictionaryRow(rowDict));
}
// Flatten Top Headers (Horizontal Forward Fill)
// Offset: We effectively care about columns starting from LeftHeaderWidth to End
// but we pass the absolute index.
var topAxisKeys = FlattenTopHeaders(topHeaderRows, config.LeftHeaderStartCol + config.LeftHeaderWidth);
// Register Schema
string ns = System.IO.Path.GetFileName(filePath) + "_" + sheetName;
registry.RegisterSchema(ns, topAxisKeys);
// 2. Process Data Rows
// State for Vertical Forward Fill (Left Headers)
string[] lastLeftValues = new string[config.LeftHeaderWidth];
int processedCount = 0;
for (int i = dataStartRow; i < rows.Count; i++)
{
var rowDict = (IDictionary<string, object>)rows[i];
var rowVals = FlattenDictionaryRow(rowDict);
// A. Build Left Axis Key (Vertical Forward Fill)
var currentLeftParts = new List<string>();
bool rowHasContent = false;
for (int c = 0; c < config.LeftHeaderWidth; c++)
{
int absCol = config.LeftHeaderStartCol + c;
string val = (absCol < rowVals.Length) ? rowVals[absCol] : "";
// Logic: Unmerge & Fill (Same as Vertical Forward Fill)
if (string.IsNullOrWhiteSpace(val))
{
val = lastLeftValues[c];
}
else
{
lastLeftValues[c] = val; // New merge block starts
}
if(!string.IsNullOrWhiteSpace(val))
{
currentLeftParts.Add(val);
rowHasContent = true;
}
}
// Skip rows with no left context (e.g. empty separator lines)
if (!rowHasContent) continue;
string leftKey = string.Join(".", currentLeftParts);
// B. Map Values to Top Axis Keys
for (int c = config.LeftHeaderStartCol + config.LeftHeaderWidth; c < rowVals.Length; c++)
{
// Align physical column to Top Axis Index
// Top Axis Keys[0] corresponds to column (LeftStart + LeftWidth)
int topIndex = c - (config.LeftHeaderStartCol + config.LeftHeaderWidth);
if (topIndex < 0 || topIndex >= topAxisKeys.Count) continue;
string topKey = topAxisKeys[topIndex];
string val = rowVals[c];
if (string.IsNullOrWhiteSpace(val)) continue; // Sparse
// C. Construct Final Key
string fullKey = $"{sheetName}:{leftKey}*{topKey}";
await storage.SetAsync(fullKey, val);
// D. Live Compute
if (LiveAggregator.IsMetric(topKey) && double.TryParse(val, out double dVal))
{
await storage.IncrementAsync($"Stats:Global:{topKey}", dVal);
}
}
processedCount++;
}
return processedCount;
}
private static string[] FlattenDictionaryRow(IDictionary<string, object> rowDict)
{
// MiniExcel Dynamic Dictionary Keys are A, B... AA..
// We assume we want 0-indexed string array.
// We find the 'Max' column index to allocate array.
// This sorting + re-indexing is a bit naive for very wide sheets but OK for PoC.
// Better to use MiniExcel's column index if available, but dynamic object hides it.
// We'll trust the Key sorting for now.
var sortedKeys = rowDict.Keys.OrderBy(k => k.Length).ThenBy(k => k).ToList();
var result = new string[sortedKeys.Count];
// Note: Dictionary might define "A", "C" but skip "B".
// This simple list approach collapses them.
// TO BE EXACT: We should convert "A"->0, "B"->1.
// For PoC we keep this, but in production we need ColumnLetterToIndex logic.
for (int i = 0; i < sortedKeys.Count; i++)
{
var val = rowDict[sortedKeys[i]];
result[i] = val?.ToString() ?? "";
}
return result;
}
private static List<string> FlattenTopHeaders(List<string[]> headerRows, int startCol)
{
if (headerRows.Count == 0) return new List<string>();
int maxCol = headerRows.Max(r => r.Length);
var flatHeaders = new List<string>();
var lastValues = new string[headerRows.Count]; // Last value for each ROW level (Horizontal Fill)
// Iterate Columns
for (int c = startCol; c < maxCol; c++)
{
var parts = new List<string>();
// Iterate Rows (Depth)
for (int r = 0; r < headerRows.Count; r++)
{
string val = (c < headerRows[r].Length) ? headerRows[r][c] : "";
// Horizontal Forward Fill (Unmerge Logic)
if (string.IsNullOrWhiteSpace(val))
{
val = lastValues[r];
}
else
{
lastValues[r] = val;
}
if (!string.IsNullOrWhiteSpace(val)) parts.Add(val);
}
flatHeaders.Add(string.Join(".", parts));
}
return flatHeaders;
}
}