From a422d72c0b780bdb545a40304fcc94226de995e7 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Fri, 15 May 2026 22:28:59 +0900 Subject: [PATCH] =?UTF-8?q?feat(IMP-08):=20U1=20=E2=80=94=20schema=20helpe?= =?UTF-8?q?r=20+=20V4=20alias=20resolver=20(4=20lookup=20sites)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds sub-section schema fields (heading_number / v4_alias_keys / sub_sections) to MdxSection with defaults so existing 4-positional constructions remain valid. Introduces _resolve_v4_section_key helper that resolves a V4 mdx_sections key in exact > alias > None order with no parent/sibling promotion (axis 7 hybrid lock). Rewires four runtime V4 lookup sites (lookup_v4_match, lookup_v4_match_with_fallback, lookup_v4_all_judgments, lookup_v4_candidates) to accept an optional alias_keys kwarg and go through the resolver. U1 callers pass empty alias lists so behaviour is byte-identical to the previous exact-match path; U2 will populate aliases from MDX heading_number metadata. Closure callers in run_phase_z2 build section_alias_by_id from MdxSection.v4_alias_keys and forward into lookup_fn / candidates_lookup_fn / lookup_v4_all_judgments (Step 7-A trace) and into _select_template_for_overrides single-section selector. Step 9 candidate report (post-decision diagnostic) is marked with an inline English exemption comment per N-R6 — runtime selection goes through _resolve_v4_section_key, the report path stays a direct dict-shape lookup to avoid debug_zones schema plumbing. derive_parent_id now recognises canonical ordinal ids ("03-1-sub-2" -> "03-1") first and keeps the legacy decimal fallback ("04-2.1" -> "04-2") for V4 alias compatibility. Tests : 8 synthetic cases in tests/test_phase_z2_subsection_schema.py covering derive_parent_id ordinal/decimal/none and the resolver exact/alias/no-promote/miss cases. 30/30 PASS combined with the 14 override + 8 fallback baseline. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/phase_z2_composition.py | 18 +++-- src/phase_z2_pipeline.py | 91 +++++++++++++++++++++--- tests/test_phase_z2_subsection_schema.py | 82 +++++++++++++++++++++ 3 files changed, 175 insertions(+), 16 deletions(-) create mode 100644 tests/test_phase_z2_subsection_schema.py diff --git a/src/phase_z2_composition.py b/src/phase_z2_composition.py index d5c4909..8050cb4 100644 --- a/src/phase_z2_composition.py +++ b/src/phase_z2_composition.py @@ -21,6 +21,7 @@ Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는 from __future__ import annotations +import re from dataclasses import dataclass, field from pathlib import Path from typing import Optional @@ -371,13 +372,20 @@ class CompositionUnit: # ─── Heading Tree ────────────────────────────────────────────── def derive_parent_id(section_id: str) -> Optional[str]: - """section_id 에서 parent 도출 — V4 키 컨벤션 기반. + """Section id -> parent id derivation by V4 key convention. - 예시 (코멘트, 룰 X) : - - "04-2.1" → "04-2" (decimal suffix → strip) - - "04-1" → None (top-level, no parent) - - "04" → None + IMP-08 B-3 : canonical ordinal `${parent}-sub-${n}` recognised first; + legacy decimal `04-2.1` kept as fallback alias path. + + Examples (illustrative, not rules) : + - "03-1-sub-2" -> "03-1" (canonical ordinal, IMP-08) + - "04-2.1" -> "04-2" (decimal suffix, legacy V4 key style) + - "04-1" -> None (top-level, no parent) + - "04" -> None """ + m = re.fullmatch(r"(.+?)-sub-(\d+)", section_id) + if m: + return m.group(1) parts = section_id.split("-", 1) if len(parts) != 2: return None diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 4ab9d73..5516902 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -31,7 +31,7 @@ import re import shutil import sys import time -from dataclasses import asdict, dataclass +from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Optional @@ -136,6 +136,13 @@ class MdxSection: section_num: int title: str raw_content: str + # IMP-08 B-3 sub-section schema (additive, defaults preserve 4-positional callers). + # heading_number: decimal "2.1" from MDX `### 2.1 Title` capture (U2-populated). + # v4_alias_keys: legacy V4 keys to try when canonical ordinal id misses (e.g. "04-2.1"). + # sub_sections: raw child payloads from section_parser (Stage 0 adapter consumes). + heading_number: Optional[str] = None + v4_alias_keys: list = field(default_factory=list) + sub_sections: list = field(default_factory=list) @dataclass @@ -424,8 +431,39 @@ def _v4_match_from_judgment(section_id: str, judgment: dict, rank: Optional[int] ) -def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]: - sec = v4.get("mdx_sections", {}).get(section_id) +def _resolve_v4_section_key( + v4: dict, + section_id: str, + *, + alias_keys: Optional[list] = None, +) -> Optional[str]: + """Resolve a V4 ``mdx_sections`` key for *section_id*. + + Resolution order : + 1. exact match (canonical ordinal id wins) + 2. alias_keys in given order (e.g. legacy decimal ``04-2.1`` for ``04-2-sub-1``) + 3. None on miss. + + Never promotes to parent or sibling — that would reinterpret V4 evidence + (axis 7 hybrid lock, RULE 0). U1 callers pass alias_keys=None so the + function is byte-identical to the previous exact-match lookup; U2 populates + aliases from MDX heading_number metadata. + """ + keys = v4.get("mdx_sections", {}) + if section_id in keys: + return section_id + if alias_keys: + for a in alias_keys: + if a and a in keys: + return a + return None + + +def lookup_v4_match( + v4: dict, section_id: str, *, alias_keys: Optional[list] = None +) -> Optional[V4Match]: + resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) + sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return None judgments = sec.get("judgments_full32", []) @@ -462,13 +500,15 @@ def lookup_v4_match_with_fallback( *, raw_content: Optional[str] = None, max_rank: int = 3, + alias_keys: Optional[list] = None, ) -> tuple[Optional[V4Match], dict]: """Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable. This is an IMP-05 selector only. It uses existing V4 labels, frame-contract presence, and the Phase Z capacity precheck; it does not call calculate_fit. """ - sec = v4.get("mdx_sections", {}).get(section_id) + resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) + sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None trace = { "section_id": section_id, "max_rank": max_rank, @@ -571,7 +611,9 @@ def lookup_v4_match_with_fallback( return None, trace -def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: +def lookup_v4_all_judgments( + v4: dict, section_id: str, *, alias_keys: Optional[list] = None +) -> list[V4Match]: """V4 raw 32 entry 그대로 반환 — reject 포함, max_n filter 없음. Step 7-A axis 보강 (사용자 lock 2026-05-08) — 사용자 UI 가 모든 frame 의 @@ -581,7 +623,8 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: Returns : list[V4Match] — 0~32 길이. raw judgments_full32 순서 (= V4 score desc) 보존. """ - sec = v4.get("mdx_sections", {}).get(section_id) + resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) + sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return [] judgments = sec.get("judgments_full32", []) @@ -592,7 +635,11 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: def lookup_v4_candidates( - v4: dict, section_id: str, max_n: int = 6 + v4: dict, + section_id: str, + max_n: int = 6, + *, + alias_keys: Optional[list] = None, ) -> list[V4Match]: """V4 non-reject 후보 list 반환 (Step 5 보완 axis — 사용자 lock 2026-05-08). @@ -612,7 +659,8 @@ def lookup_v4_candidates( 호출처 무변. 본 함수는 Step 5 artifact + Step 9 application_plan input 위한 새 entry point. """ - sec = v4.get("mdx_sections", {}).get(section_id) + resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) + sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return [] judgments = sec.get("judgments_full32", []) @@ -932,7 +980,12 @@ def _build_position_assignment_plan( if v4 is None or section is None: return None, "no_v4_section", None raw_content = getattr(section, "raw_content", None) - match, trace = lookup_v4_match_with_fallback(v4, sid, raw_content=raw_content) + # IMP-08 B-3 : forward sub-section V4 aliases (decimal heading_number) + # when canonical ordinal id misses; safe for top-level sids (empty list). + alias_keys = list(getattr(section, "v4_alias_keys", []) or []) + match, trace = lookup_v4_match_with_fallback( + v4, sid, raw_content=raw_content, alias_keys=alias_keys + ) if match is None: return None, "no_direct_render_template", trace return match.template_id, None, trace @@ -2039,6 +2092,11 @@ def run_phase_z2_mvp1( # candidate (separate / parent_merged) → score → greedy non-overlapping select → # layout preset (count-based v0). section_content_by_id = {s.section_id: s.raw_content for s in sections} + # IMP-08 B-3 : sub-section ordinal id -> legacy V4 key aliases (e.g. "04-2.1"). + # Empty list for canonical (top-level) sections — U1 baseline path is exact-only. + section_alias_by_id: dict[str, list] = { + s.section_id: list(getattr(s, "v4_alias_keys", []) or []) for s in sections + } v4_fallback_traces: dict[str, dict] = {} def lookup_fn(sid: str) -> Optional[V4Match]: @@ -2047,6 +2105,7 @@ def run_phase_z2_mvp1( sid, raw_content=section_content_by_id.get(sid), max_rank=3, + alias_keys=section_alias_by_id.get(sid), ) v4_fallback_traces[sid] = trace return match @@ -2054,7 +2113,7 @@ def run_phase_z2_mvp1( # Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn. # composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음. def candidates_lookup_fn(sid: str) -> list[V4Match]: - return lookup_v4_candidates(v4, sid) + return lookup_v4_candidates(v4, sid, alias_keys=section_alias_by_id.get(sid)) units, layout_preset, comp_debug = plan_composition( sections, lookup_fn, V4_LABEL_TO_PHASE_Z_STATUS, MVP1_ALLOWED_STATUSES, @@ -2777,6 +2836,11 @@ def run_phase_z2_mvp1( note="V4 evidence 와 B4 통합 미완 — 별 axis. 현재 = composition planner 의 V4 rank-1 채택.", ) # Step 9 HTML — V4 top candidates per zone (rank 1~4) + # IMP-08 N-R6 diagnostic exemption : this report path is post-decision + # reporting only. Runtime selection goes through _resolve_v4_section_key + # (4 sites). Direct dict lookup here is intentional — debug_zones carries + # dict-shape entries without v4_alias_keys plumbing, and a miss here only + # yields a "V4 entry 없음" report line (runtime impact zero). try: with open(V4_RESULT_PATH, encoding="utf-8") as _vf: _v4_full = yaml.safe_load(_vf) @@ -3263,7 +3327,12 @@ def run_phase_z2_mvp1( # 모든 frame 의 png 를 카드로 보여주기 위함). # unit_id = source_section_ids join. parent_merged 는 첫 section 의 # judgments 사용 (parent V4 entry 가 그 section 에 있으므로). - v4_all_for_unit = lookup_v4_all_judgments(v4, unit.source_section_ids[0]) + # IMP-08 B-3 : forward sub-section V4 aliases (decimal heading_number) + # when canonical ordinal id misses; U1 default = empty list (no change). + _first_sid = unit.source_section_ids[0] + v4_all_for_unit = lookup_v4_all_judgments( + v4, _first_sid, alias_keys=section_alias_by_id.get(_first_sid) + ) # application_candidates : V4 후보 zip 으로 application_mode 변환 app_candidates = [] diff --git a/tests/test_phase_z2_subsection_schema.py b/tests/test_phase_z2_subsection_schema.py new file mode 100644 index 0000000..1de1e7c --- /dev/null +++ b/tests/test_phase_z2_subsection_schema.py @@ -0,0 +1,82 @@ +"""IMP-08 B-3 sub-section drag/drop — schema + V4 alias resolver tests. + +Fully synthetic per Codex #7 generalization guardrail: +NO real catalog template_id / frame_id, NO ``v4_full32_result.yaml`` dependency, +NO MDX-specific section ids beyond canonical id format. + +Locked scope (Stage 3 R8) : + A. ``derive_parent_id`` canonical ordinal recognition + legacy decimal fallback. + B. ``_resolve_v4_section_key`` exact > alias > None (no parent/sibling promotion). +""" +from __future__ import annotations + +from src.phase_z2_composition import derive_parent_id +from src.phase_z2_pipeline import _resolve_v4_section_key + + +# ─── A. derive_parent_id ──────────────────────────────────────────────────── + + +def test_derive_parent_id_ordinal_sub(): + assert derive_parent_id("03-1-sub-2") == "03-1" + assert derive_parent_id("04-2-sub-1") == "04-2" + + +def test_derive_parent_id_decimal_legacy_alias(): + # Legacy V4 decimal id retains existing behaviour for alias path. + assert derive_parent_id("04-2.1") == "04-2" + + +def test_derive_parent_id_top_level_none(): + assert derive_parent_id("04-1") is None + assert derive_parent_id("04") is None + assert derive_parent_id("nonsense") is None + + +# ─── B. _resolve_v4_section_key ───────────────────────────────────────────── + + +def _fake_v4(*keys): + return {"mdx_sections": {k: {"judgments_full32": []} for k in keys}} + + +def test_alias_resolver_exact_match_wins(): + v4 = _fake_v4("04-2-sub-1", "04-2.1") + assert _resolve_v4_section_key(v4, "04-2-sub-1") == "04-2-sub-1" + assert ( + _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) + == "04-2-sub-1" + ) + + +def test_alias_resolver_decimal_alias_when_metadata_present(): + v4 = _fake_v4("04-2.1") + assert ( + _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) + == "04-2.1" + ) + + +def test_alias_resolver_no_parent_promotion(): + # parent V4 entry must not be promoted into a sibling sub-section lookup. + v4 = _fake_v4("04-2") + assert _resolve_v4_section_key(v4, "04-2-sub-1") is None + assert ( + _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2"]) + == "04-2" + ) # alias is opt-in; only resolves when caller explicitly provides it + + +def test_alias_resolver_no_sibling_promotion(): + # sibling sub-section entry must not be auto-promoted without an alias. + v4 = _fake_v4("04-2-sub-2") + assert _resolve_v4_section_key(v4, "04-2-sub-1") is None + + +def test_alias_resolver_miss_returns_none(): + v4 = _fake_v4("99-1") + assert _resolve_v4_section_key(v4, "04-2-sub-1") is None + assert ( + _resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"]) + is None + )