diff --git a/src/phase_z2_mapper.py b/src/phase_z2_mapper.py index 17c0e57..69cffe0 100644 --- a/src/phase_z2_mapper.py +++ b/src/phase_z2_mapper.py @@ -237,25 +237,21 @@ def parse_quadrant_item(unit: tuple[str, list[str]]) -> dict: def parse_compare_row_2col_item(unit: tuple[str, list[str]]) -> dict: """F18-style — bold = category label, nested 2 bullets = col_a / col_b values. - Pattern : top bullet = **카테고리**, nested = `- BIM: ...` / `- DX: ...` 또는 - 단순 ordering (첫 nested = col_a, 두번째 = col_b). prefix "BIM:" / "DX:" 등 - 있으면 stripping. + Pattern : top bullet = **카테고리**, nested = first 2 bullets. + *Parser 는 prefix stripping 안 함* (Codex round 43 §F1-b — narrow alias 정정). + Prefix stripping 은 *builder 의 strip_col_prefix_aliases option* 으로 위임. Returns: {label, col_a, col_b} """ top_line, nested_lines = unit label = _extract_bold_or_plain(top_line) - # nested bullets — strip bullet marker, take first 2 + # nested bullets — strip bullet marker, take first 2 (no prefix stripping) nested = [] for l in nested_lines: l_strip = l.strip() if re.match(r"^[\*\-]\s", l_strip): txt = re.sub(r"^[\*\-]\s+", "", l_strip) - # strip optional "BIM:" / "DX:" prefix (anything before colon ≤ 5 chars) - m = re.match(r"^[A-Za-z가-힣]{1,8}\s*:\s*(.+)$", txt) - if m: - txt = m.group(1).strip() txt = re.sub(r"\*\*(.+?)\*\*", r"\1", txt) nested.append(txt) col_a = nested[0] if len(nested) > 0 else "" @@ -525,10 +521,13 @@ def _build_compare_table_2col(section, units, contract) -> dict: rows : list[{label, col_a, col_b}] — top_bullets 각각 → row builder_options : - item_parser : ITEM_PARSERS key (예: `compare_row_2col_item`) - col_a_label_default : col_a header (MDX 첫 행에 명시 안 되면 사용. default "") - col_b_label_default : col_b header (default "") - max_rows : N (default 999 — practical 한계). 초과 시 _truncated_count + item_parser : ITEM_PARSERS key (예: `compare_row_2col_item`) + col_a_label_default : col_a header (MDX 미명시 시 fallback. F1-a fix) + col_b_label_default : col_b header (MDX 미명시 시 fallback) + strip_col_prefix_aliases : list[str] — col_a/col_b 값의 prefix `:` + 를 strip (Codex round 43 §F1-b — narrow alias). + 예 : ["BIM", "DX"]. default [] (no stripping). + max_rows : N (default 999 — practical 한계). """ options = contract["payload"]["builder_options"] parser_name = options["item_parser"] @@ -541,6 +540,7 @@ def _build_compare_table_2col(section, units, contract) -> dict: col_a_label = options.get("col_a_label_default", "") col_b_label = options.get("col_b_label_default", "") + strip_aliases = options.get("strip_col_prefix_aliases", []) or [] max_rows = options.get("max_rows", 999) payload: dict = {} @@ -548,8 +548,27 @@ def _build_compare_table_2col(section, units, contract) -> dict: payload["col_a_label"] = col_a_label payload["col_b_label"] = col_b_label + # Compile precise prefix patterns per alias (Codex round 43 §F1-b narrow). + strip_patterns = [ + re.compile(rf"^{re.escape(a)}\s*[::]\s*(.+)$") + for a in strip_aliases + ] + + def _strip_alias(value: str) -> str: + for pat in strip_patterns: + m = pat.match(value) + if m: + return m.group(1).strip() + return value + visible = list(units[:max_rows]) - rows = [parser(u) for u in visible] + rows = [] + for u in visible: + row = parser(u) + if strip_patterns: + row["col_a"] = _strip_alias(row.get("col_a", "")) + row["col_b"] = _strip_alias(row.get("col_b", "")) + rows.append(row) payload["rows"] = rows if len(units) > max_rows: diff --git a/templates/phase_z2/catalog/frame_contracts.yaml b/templates/phase_z2/catalog/frame_contracts.yaml index 87e82c6..cdd7711 100644 --- a/templates/phase_z2/catalog/frame_contracts.yaml +++ b/templates/phase_z2/catalog/frame_contracts.yaml @@ -398,8 +398,10 @@ bim_dx_comparison_table: family: table source_shape: top_bullets + # NOTE (Codex round 43 §F1-c) : top-level `cardinality.strict: 2` = *column 수* + # (col_a / col_b). data row 수 는 별 — `sub_zones.rows.cardinality` 의 `{min:1, max:12}`. cardinality: - strict: 2 # 2 columns (BIM vs DX 등) + strict: 2 # 2 columns (col_a / col_b) — NOT row count overflow_policy: abort_or_review role_order: @@ -443,6 +445,9 @@ bim_dx_comparison_table: builder: compare_table_2col builder_options: item_parser: compare_row_2col_item # NEW parser — top_bullet → {label, col_a, col_b} - col_a_label_default: "" # MDX 명시 또는 frame default - col_b_label_default: "" + col_a_label_default: "BIM" # F1-a (Codex round 43) — explicit default + col_b_label_default: "DX" # F1-a — explicit default + strip_col_prefix_aliases: # F1-b (Codex round 43) — narrow alias 만 strip + - "BIM" + - "DX" max_rows: 12 # typical 4-8, overflow 보호