feat(IMP-08): U1 — schema helper + V4 alias resolver (4 lookup sites)

Adds sub-section schema fields (heading_number / v4_alias_keys /
sub_sections) to MdxSection with defaults so existing 4-positional
constructions remain valid. Introduces _resolve_v4_section_key helper
that resolves a V4 mdx_sections key in exact > alias > None order with
no parent/sibling promotion (axis 7 hybrid lock).

Rewires four runtime V4 lookup sites (lookup_v4_match,
lookup_v4_match_with_fallback, lookup_v4_all_judgments,
lookup_v4_candidates) to accept an optional alias_keys kwarg and go
through the resolver. U1 callers pass empty alias lists so behaviour
is byte-identical to the previous exact-match path; U2 will populate
aliases from MDX heading_number metadata.

Closure callers in run_phase_z2 build section_alias_by_id from
MdxSection.v4_alias_keys and forward into lookup_fn /
candidates_lookup_fn / lookup_v4_all_judgments (Step 7-A trace) and
into _select_template_for_overrides single-section selector.

Step 9 candidate report (post-decision diagnostic) is marked with an
inline English exemption comment per N-R6 — runtime selection goes
through _resolve_v4_section_key, the report path stays a direct
dict-shape lookup to avoid debug_zones schema plumbing.

derive_parent_id now recognises canonical ordinal ids
("03-1-sub-2" -> "03-1") first and keeps the legacy decimal fallback
("04-2.1" -> "04-2") for V4 alias compatibility.

Tests : 8 synthetic cases in tests/test_phase_z2_subsection_schema.py
covering derive_parent_id ordinal/decimal/none and the resolver
exact/alias/no-promote/miss cases. 30/30 PASS combined with the 14
override + 8 fallback baseline.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-15 22:28:59 +09:00
parent 0f0d3fa91f
commit a422d72c0b
3 changed files with 175 additions and 16 deletions

View File

@@ -21,6 +21,7 @@ Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는
from __future__ import annotations from __future__ import annotations
import re
from dataclasses import dataclass, field from dataclasses import dataclass, field
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@@ -371,13 +372,20 @@ class CompositionUnit:
# ─── Heading Tree ────────────────────────────────────────────── # ─── Heading Tree ──────────────────────────────────────────────
def derive_parent_id(section_id: str) -> Optional[str]: def derive_parent_id(section_id: str) -> Optional[str]:
"""section_id 에서 parent 도출 — V4 키 컨벤션 기반. """Section id -> parent id derivation by V4 key convention.
예시 (코멘트, 룰 X) : IMP-08 B-3 : canonical ordinal `${parent}-sub-${n}` recognised first;
- "04-2.1""04-2" (decimal suffix → strip) legacy decimal `04-2.1` kept as fallback alias path.
- "04-1" → None (top-level, no parent)
- "04" → None Examples (illustrative, not rules) :
- "03-1-sub-2" -> "03-1" (canonical ordinal, IMP-08)
- "04-2.1" -> "04-2" (decimal suffix, legacy V4 key style)
- "04-1" -> None (top-level, no parent)
- "04" -> None
""" """
m = re.fullmatch(r"(.+?)-sub-(\d+)", section_id)
if m:
return m.group(1)
parts = section_id.split("-", 1) parts = section_id.split("-", 1)
if len(parts) != 2: if len(parts) != 2:
return None return None

View File

@@ -31,7 +31,7 @@ import re
import shutil import shutil
import sys import sys
import time import time
from dataclasses import asdict, dataclass from dataclasses import asdict, dataclass, field
from pathlib import Path from pathlib import Path
from typing import Optional from typing import Optional
@@ -136,6 +136,13 @@ class MdxSection:
section_num: int section_num: int
title: str title: str
raw_content: str raw_content: str
# IMP-08 B-3 sub-section schema (additive, defaults preserve 4-positional callers).
# heading_number: decimal "2.1" from MDX `### 2.1 Title` capture (U2-populated).
# v4_alias_keys: legacy V4 keys to try when canonical ordinal id misses (e.g. "04-2.1").
# sub_sections: raw child payloads from section_parser (Stage 0 adapter consumes).
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass @dataclass
@@ -424,8 +431,39 @@ def _v4_match_from_judgment(section_id: str, judgment: dict, rank: Optional[int]
) )
def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]: def _resolve_v4_section_key(
sec = v4.get("mdx_sections", {}).get(section_id) v4: dict,
section_id: str,
*,
alias_keys: Optional[list] = None,
) -> Optional[str]:
"""Resolve a V4 ``mdx_sections`` key for *section_id*.
Resolution order :
1. exact match (canonical ordinal id wins)
2. alias_keys in given order (e.g. legacy decimal ``04-2.1`` for ``04-2-sub-1``)
3. None on miss.
Never promotes to parent or sibling — that would reinterpret V4 evidence
(axis 7 hybrid lock, RULE 0). U1 callers pass alias_keys=None so the
function is byte-identical to the previous exact-match lookup; U2 populates
aliases from MDX heading_number metadata.
"""
keys = v4.get("mdx_sections", {})
if section_id in keys:
return section_id
if alias_keys:
for a in alias_keys:
if a and a in keys:
return a
return None
def lookup_v4_match(
v4: dict, section_id: str, *, alias_keys: Optional[list] = None
) -> Optional[V4Match]:
resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys)
sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None
if not sec: if not sec:
return None return None
judgments = sec.get("judgments_full32", []) judgments = sec.get("judgments_full32", [])
@@ -462,13 +500,15 @@ def lookup_v4_match_with_fallback(
*, *,
raw_content: Optional[str] = None, raw_content: Optional[str] = None,
max_rank: int = 3, max_rank: int = 3,
alias_keys: Optional[list] = None,
) -> tuple[Optional[V4Match], dict]: ) -> tuple[Optional[V4Match], dict]:
"""Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable. """Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable.
This is an IMP-05 selector only. It uses existing V4 labels, frame-contract This is an IMP-05 selector only. It uses existing V4 labels, frame-contract
presence, and the Phase Z capacity precheck; it does not call calculate_fit. presence, and the Phase Z capacity precheck; it does not call calculate_fit.
""" """
sec = v4.get("mdx_sections", {}).get(section_id) resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys)
sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None
trace = { trace = {
"section_id": section_id, "section_id": section_id,
"max_rank": max_rank, "max_rank": max_rank,
@@ -571,7 +611,9 @@ def lookup_v4_match_with_fallback(
return None, trace return None, trace
def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: def lookup_v4_all_judgments(
v4: dict, section_id: str, *, alias_keys: Optional[list] = None
) -> list[V4Match]:
"""V4 raw 32 entry 그대로 반환 — reject 포함, max_n filter 없음. """V4 raw 32 entry 그대로 반환 — reject 포함, max_n filter 없음.
Step 7-A axis 보강 (사용자 lock 2026-05-08) — 사용자 UI 가 모든 frame 의 Step 7-A axis 보강 (사용자 lock 2026-05-08) — 사용자 UI 가 모든 frame 의
@@ -581,7 +623,8 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]:
Returns : Returns :
list[V4Match] — 0~32 길이. raw judgments_full32 순서 (= V4 score desc) 보존. list[V4Match] — 0~32 길이. raw judgments_full32 순서 (= V4 score desc) 보존.
""" """
sec = v4.get("mdx_sections", {}).get(section_id) resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys)
sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None
if not sec: if not sec:
return [] return []
judgments = sec.get("judgments_full32", []) judgments = sec.get("judgments_full32", [])
@@ -592,7 +635,11 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]:
def lookup_v4_candidates( def lookup_v4_candidates(
v4: dict, section_id: str, max_n: int = 6 v4: dict,
section_id: str,
max_n: int = 6,
*,
alias_keys: Optional[list] = None,
) -> list[V4Match]: ) -> list[V4Match]:
"""V4 non-reject 후보 list 반환 (Step 5 보완 axis — 사용자 lock 2026-05-08). """V4 non-reject 후보 list 반환 (Step 5 보완 axis — 사용자 lock 2026-05-08).
@@ -612,7 +659,8 @@ def lookup_v4_candidates(
호출처 무변. 본 함수는 Step 5 artifact + Step 9 application_plan input 호출처 무변. 본 함수는 Step 5 artifact + Step 9 application_plan input
위한 새 entry point. 위한 새 entry point.
""" """
sec = v4.get("mdx_sections", {}).get(section_id) resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys)
sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None
if not sec: if not sec:
return [] return []
judgments = sec.get("judgments_full32", []) judgments = sec.get("judgments_full32", [])
@@ -932,7 +980,12 @@ def _build_position_assignment_plan(
if v4 is None or section is None: if v4 is None or section is None:
return None, "no_v4_section", None return None, "no_v4_section", None
raw_content = getattr(section, "raw_content", None) raw_content = getattr(section, "raw_content", None)
match, trace = lookup_v4_match_with_fallback(v4, sid, raw_content=raw_content) # IMP-08 B-3 : forward sub-section V4 aliases (decimal heading_number)
# when canonical ordinal id misses; safe for top-level sids (empty list).
alias_keys = list(getattr(section, "v4_alias_keys", []) or [])
match, trace = lookup_v4_match_with_fallback(
v4, sid, raw_content=raw_content, alias_keys=alias_keys
)
if match is None: if match is None:
return None, "no_direct_render_template", trace return None, "no_direct_render_template", trace
return match.template_id, None, trace return match.template_id, None, trace
@@ -2039,6 +2092,11 @@ def run_phase_z2_mvp1(
# candidate (separate / parent_merged) → score → greedy non-overlapping select → # candidate (separate / parent_merged) → score → greedy non-overlapping select →
# layout preset (count-based v0). # layout preset (count-based v0).
section_content_by_id = {s.section_id: s.raw_content for s in sections} section_content_by_id = {s.section_id: s.raw_content for s in sections}
# IMP-08 B-3 : sub-section ordinal id -> legacy V4 key aliases (e.g. "04-2.1").
# Empty list for canonical (top-level) sections — U1 baseline path is exact-only.
section_alias_by_id: dict[str, list] = {
s.section_id: list(getattr(s, "v4_alias_keys", []) or []) for s in sections
}
v4_fallback_traces: dict[str, dict] = {} v4_fallback_traces: dict[str, dict] = {}
def lookup_fn(sid: str) -> Optional[V4Match]: def lookup_fn(sid: str) -> Optional[V4Match]:
@@ -2047,6 +2105,7 @@ def run_phase_z2_mvp1(
sid, sid,
raw_content=section_content_by_id.get(sid), raw_content=section_content_by_id.get(sid),
max_rank=3, max_rank=3,
alias_keys=section_alias_by_id.get(sid),
) )
v4_fallback_traces[sid] = trace v4_fallback_traces[sid] = trace
return match return match
@@ -2054,7 +2113,7 @@ def run_phase_z2_mvp1(
# Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn. # Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn.
# composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음. # composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음.
def candidates_lookup_fn(sid: str) -> list[V4Match]: def candidates_lookup_fn(sid: str) -> list[V4Match]:
return lookup_v4_candidates(v4, sid) return lookup_v4_candidates(v4, sid, alias_keys=section_alias_by_id.get(sid))
units, layout_preset, comp_debug = plan_composition( units, layout_preset, comp_debug = plan_composition(
sections, lookup_fn, V4_LABEL_TO_PHASE_Z_STATUS, MVP1_ALLOWED_STATUSES, sections, lookup_fn, V4_LABEL_TO_PHASE_Z_STATUS, MVP1_ALLOWED_STATUSES,
@@ -2777,6 +2836,11 @@ def run_phase_z2_mvp1(
note="V4 evidence 와 B4 통합 미완 — 별 axis. 현재 = composition planner 의 V4 rank-1 채택.", note="V4 evidence 와 B4 통합 미완 — 별 axis. 현재 = composition planner 의 V4 rank-1 채택.",
) )
# Step 9 HTML — V4 top candidates per zone (rank 1~4) # Step 9 HTML — V4 top candidates per zone (rank 1~4)
# IMP-08 N-R6 diagnostic exemption : this report path is post-decision
# reporting only. Runtime selection goes through _resolve_v4_section_key
# (4 sites). Direct dict lookup here is intentional — debug_zones carries
# dict-shape entries without v4_alias_keys plumbing, and a miss here only
# yields a "V4 entry 없음" report line (runtime impact zero).
try: try:
with open(V4_RESULT_PATH, encoding="utf-8") as _vf: with open(V4_RESULT_PATH, encoding="utf-8") as _vf:
_v4_full = yaml.safe_load(_vf) _v4_full = yaml.safe_load(_vf)
@@ -3263,7 +3327,12 @@ def run_phase_z2_mvp1(
# 모든 frame 의 png 를 카드로 보여주기 위함). # 모든 frame 의 png 를 카드로 보여주기 위함).
# unit_id = source_section_ids join. parent_merged 는 첫 section 의 # unit_id = source_section_ids join. parent_merged 는 첫 section 의
# judgments 사용 (parent V4 entry 가 그 section 에 있으므로). # judgments 사용 (parent V4 entry 가 그 section 에 있으므로).
v4_all_for_unit = lookup_v4_all_judgments(v4, unit.source_section_ids[0]) # IMP-08 B-3 : forward sub-section V4 aliases (decimal heading_number)
# when canonical ordinal id misses; U1 default = empty list (no change).
_first_sid = unit.source_section_ids[0]
v4_all_for_unit = lookup_v4_all_judgments(
v4, _first_sid, alias_keys=section_alias_by_id.get(_first_sid)
)
# application_candidates : V4 후보 zip 으로 application_mode 변환 # application_candidates : V4 후보 zip 으로 application_mode 변환
app_candidates = [] app_candidates = []

View File

@@ -0,0 +1,82 @@
"""IMP-08 B-3 sub-section drag/drop — schema + V4 alias resolver tests.
Fully synthetic per Codex #7 generalization guardrail:
NO real catalog template_id / frame_id, NO ``v4_full32_result.yaml`` dependency,
NO MDX-specific section ids beyond canonical id format.
Locked scope (Stage 3 R8) :
A. ``derive_parent_id`` canonical ordinal recognition + legacy decimal fallback.
B. ``_resolve_v4_section_key`` exact > alias > None (no parent/sibling promotion).
"""
from __future__ import annotations
from src.phase_z2_composition import derive_parent_id
from src.phase_z2_pipeline import _resolve_v4_section_key
# ─── A. derive_parent_id ────────────────────────────────────────────────────
def test_derive_parent_id_ordinal_sub():
assert derive_parent_id("03-1-sub-2") == "03-1"
assert derive_parent_id("04-2-sub-1") == "04-2"
def test_derive_parent_id_decimal_legacy_alias():
# Legacy V4 decimal id retains existing behaviour for alias path.
assert derive_parent_id("04-2.1") == "04-2"
def test_derive_parent_id_top_level_none():
assert derive_parent_id("04-1") is None
assert derive_parent_id("04") is None
assert derive_parent_id("nonsense") is None
# ─── B. _resolve_v4_section_key ─────────────────────────────────────────────
def _fake_v4(*keys):
return {"mdx_sections": {k: {"judgments_full32": []} for k in keys}}
def test_alias_resolver_exact_match_wins():
v4 = _fake_v4("04-2-sub-1", "04-2.1")
assert _resolve_v4_section_key(v4, "04-2-sub-1") == "04-2-sub-1"
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
== "04-2-sub-1"
)
def test_alias_resolver_decimal_alias_when_metadata_present():
v4 = _fake_v4("04-2.1")
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
== "04-2.1"
)
def test_alias_resolver_no_parent_promotion():
# parent V4 entry must not be promoted into a sibling sub-section lookup.
v4 = _fake_v4("04-2")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2"])
== "04-2"
) # alias is opt-in; only resolves when caller explicitly provides it
def test_alias_resolver_no_sibling_promotion():
# sibling sub-section entry must not be auto-promoted without an alias.
v4 = _fake_v4("04-2-sub-2")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
def test_alias_resolver_miss_returns_none():
v4 = _fake_v4("99-1")
assert _resolve_v4_section_key(v4, "04-2-sub-1") is None
assert (
_resolve_v4_section_key(v4, "04-2-sub-1", alias_keys=["04-2.1"])
is None
)