"""Phase Z-2 MVP-1.5b — single slide + Type B + frame-derived adapted blocks. 원래 Phase Z 설계 복귀 (멀티-슬라이드 / native-fit 모두 폐기) : - MDX 1 = slide 1 - slide-base → slide-body → layout preset (Type B) → zones[] → frame-derived block (zone-compatible adapt) - frame은 시각 언어 / slot 구성 / 패턴의 source. native geometry 통째 삽입 X. - AI 는 layout / zone / frame / variant 선택에 관여 X — code / catalog 가 결정. MVP-1.5b spec : - 대상 : MDX 03 (회귀) - 출력 : data/runs/{run_id}/phase_z2/final.html (single slide) - AI : 미사용 — MDX → slot_payload 결정론적 매핑 - status : matched_zone only — non-matched 발생 시 abort + error.json - layout : 2 sections → Type B (top + bottom zones) - Frame partials : templates/phase_z2/families/{template_id}.html (Figma 시각 언어 promote, geometry adapt) - Assets : render time copy → data/runs/{run_id}/phase_z2/assets/{template_id}/ 상세 설계 : - docs/architecture/PHASE-Z-CATALOG-RUNTIME-DESIGN.md § 17 (frame-derived partial promotion + zone-compatible adapt) 이전 실험 실패 기록 : - mvp1_test5 : scaffold 임의 — frame 느낌 부재 - mvp1.5_test3 : frame native 통째 — slide 대체 - mvp1.5a_test1 : 멀티-슬라이드 — MDX 1=slide 1 위반 - mvp1.5b_test* : 본 모듈, 원래 설계 라인 합류 """ import hashlib import json import os import re import shutil import sys import time from dataclasses import asdict, dataclass, field from pathlib import Path from typing import Any, Optional import yaml from jinja2 import Environment, FileSystemLoader, select_autoescape from phase_z2_composition import ( LAYOUT_PRESETS, CompositionUnit, compose_zone_popup_payload, derive_parent_id, plan_composition, resplit_all_reject_merges, select_display_strategy_candidates, select_layout_candidates, select_region_layout_candidates, ) from phase_z2_mapper import ( FitError, compute_capacity_fit, get_contract, load_frame_contracts, load_v4_fallback_policy, map_with_contract, ) from phase_z2_classifier import classify_visual_runtime_check from phase_z2_router import plan_details_popup_escalation, route_fit_classification from phase_z2_retry import ( DEFAULT_SAFETY_MARGIN_PX, apply_cross_zone_redistribute_css, apply_font_step_compression_css, apply_frame_internal_fit_candidate_css, apply_glue_compression_css, apply_image_fit_css, apply_layout_adjust_layout_css, apply_retry_to_layout_css, plan_cross_zone_redistribute, plan_font_step_compression, plan_frame_internal_fit_candidate, plan_glue_compression, plan_image_fit, plan_layout_adjust, plan_zone_ratio_retry, ) from phase_z2_failure_router import ( enrich_retry_trace_with_failure_classification, route_retry_failure, ) # trace-only runtime 연결 v0 — B1 → B4 chain. # final.html / mapper / render path 미영향. debug_zones[i].placement_trace 만 기록. from phase_z2_content_extractor import extract_content_objects, extract_rich_content_objects from phase_z2_placement_planner import plan_placement # IMP-47B u4 — Step 12 AI repair wiring. gather() short-circuits at the # router when settings.ai_fallback_enabled is False (default), so import # at module load is safe for the AI=0 normal path (PZ-1). Activation gate # stays in src/config.py + src/phase_z2_ai_fallback/router.py. from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals # IMP-35 (#64) u5 — Step 17 deterministic POPUP gate executor. Runs after # the salvage cascade exhausts at cascade-terminal action # ``details_popup_escalation`` (router u3 / failure_router u2) and BEFORE # the AI_REPAIR cascade stage. Stamps ``popup_escalation_plan`` and the # idempotent ``has_popup`` marker onto retry_trace per unit. No AI call. from src.phase_z2_ai_fallback.step17 import run_step17_popup_gate # IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar (JSON-only). Schema + # serializers + validator live in u2 (``src.phase_z2_reuse_snapshot``); # this module's call site at the Step 6 boundary writes the sidecar # alongside ``steps/step06_composition_plan.json`` so that future # ``--reuse-from`` runs (u4) can resume at Step 7 without re-deriving # Step 0/1/2/5/6 state. ``--reuse-from`` is u4 scope; here we only # WRITE the snapshot — restore wiring lands in u4. from src.phase_z2_reuse_snapshot import build_snapshot, SNAPSHOT_FILENAME # ─── Constants ────────────────────────────────────────────────── PROJECT_ROOT = Path(__file__).parent.parent TEMPLATE_DIR = PROJECT_ROOT / "templates" / "phase_z2" ASSETS_SOURCE_BASE = PROJECT_ROOT / "figma_to_html_agent" / "blocks" V4_RESULT_PATH = PROJECT_ROOT / "tests" / "matching" / "v4_full32_result.yaml" RUNS_DIR = PROJECT_ROOT / "data" / "runs" # IMP-39 (#68) u1 — single-source ranking sort policy yaml. # Loader + apply_ranking_sort helper below `to_phase_z_status`. RANKING_SORT_POLICY_PATH = ( PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "ranking_sort_policy.yaml" ) # V4 label → Phase Z status (§ 7.4 매트릭스) V4_LABEL_TO_PHASE_Z_STATUS = { "use_as_is": "matched_zone", "light_edit": "adapt_matched_zone", "restructure": "extract_matched_zone", "reject": "fallback_candidate", } MVP1_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"} # Step 9 v0 (사용자 lock 2026-05-08) — V4 label → application_mode 변환. # tuple = (application_mode, auto_applicable, delegated_to). # status.md §2 Q3 / Q7 lock 따라. APPLICATION_MODE_BY_V4_LABEL = { "use_as_is": ("direct_insert", True, "step10_contract_check"), "light_edit": ("same_frame_with_adjustment", True, "step10_contract_check"), "restructure": ("layout_or_region_change", False, "human_review"), "reject": ("exclude", False, None), } # adapt_matched_zone (V4 light_edit) = frame 구조 동일, 텍스트만 minor edit 필요. # minor edit 정책 (mapper 의무) : # 1. MDX item 수 < frame slot 수 → 빈 slot 그대로 (Jinja2 {% if %} 로 스킵) # 2. MDX item 수 > frame slot 수 → 추가 item 누락 (truncate) — debug.json 에 기록 # 3. 텍스트 길이 mismatch → 그대로 통과 (overflow 는 zone-fit + Selenium check 가 처리) # 4. slot ↔ MDX item 의미 매핑 → 순서 기반 (간단). V4 anchor_match 정교화는 future # AI 호출 X — MVP-1.5b 의 "MDX 1:1 결정론적 매핑" 룰 그대로. # Slide canvas / body geometry — front 기준 정상화 (2026-05-07) # 참조: D:/ad-hoc/kei/design_agent_front/slide-base.html # slide-base.html CSS 와 1:1 일치해야 함 (불일치 시 layout 계산 어긋남) SLIDE_W = 1280 SLIDE_H = 720 SLIDE_BODY_LEFT = 50 SLIDE_BODY_TOP = 76 # 사용자 직설 (divider-body 16px 여백) 2026-05-07 SLIDE_BODY_WIDTH = 1180 # calc(100% - 100px) SLIDE_BODY_HEIGHT = 585 # 사용자 직설 (body-footer 10px 여백) 2026-05-07 SLIDE_FOOTER_HEIGHT = 41 # was 32, front 기준 GRID_GAP = 14 # zone 간격 (사용자 직설 2026-05-07) # zone min-height fallback — contract 에 visual_hints.min_height_px 없을 때 사용. # token-based font (var(--font-body) 11px 등) 기준 최소 가독 높이. DEFAULT_ZONE_MIN_HEIGHT_PX = 100 # Step 14 image_aspect_mismatch tolerance — |natural_ratio - rendered_ratio| > TOL ⇒ fail. # Local anchor : IMP-15 실행-1 (Gitea issue #45) — image axis acceptance criteria. # Spec doc row (PHASE-Z-FIT-CLASSIFIER-ROUTER-SPEC) update deferred to IMP-15 실행-4. IMAGE_ASPECT_DELTA_TOL = 0.05 # Step 14 table_self_overflow tolerance — scrollW−clientW or scrollH−clientH > TOL ⇒ fail. # Local anchor : IMP-15 실행-2 (Gitea issue #46) — table axis acceptance criteria. # Mirrors existing inline 5px tolerance used by slide/zone/clipped scans in run_overflow_check. TABLE_SCROLL_TOL_PX = 5 # content_weight 계산 가중치 CONTENT_WEIGHT_COEFFS = { "text_per_chars": 800, # text_len / 800 = score "top_bullet": 0.4, "nested_bullet": 0.15, "table_bonus": 1.5, "subsection": 0.6, } # ─── Data classes ─────────────────────────────────────────────── @dataclass class MdxSection: section_id: str section_num: int title: str raw_content: str # IMP-08 B-3 sub-section schema (additive, defaults preserve 4-positional callers). # heading_number: decimal "2.1" from MDX `### 2.1 Title` capture (U2-populated). # v4_alias_keys: legacy V4 keys to try when canonical ordinal id misses (e.g. "04-2.1"). # sub_sections: raw child payloads from section_parser (Stage 0 adapter consumes). heading_number: Optional[str] = None v4_alias_keys: list = field(default_factory=list) sub_sections: list = field(default_factory=list) @dataclass class V4Match: section_id: str frame_id: str frame_number: int template_id: str confidence: float label: str v4_rank: Optional[int] = None selection_path: str = "rank_1" fallback_reason: Optional[str] = None # IMP-30 u1 — provisional first-render flag. True when the selector # synthesizes a rank-1 V4 candidate after chain_exhausted because the # opt-in allow_provisional kwarg was set. Default False keeps IMP-05 # behavior byte-identical; downstream surfaces this for zone-level # "needs adaptation" marking without altering V4 evidence. provisional: bool = False def to_phase_z_status(match: V4Match) -> str: return V4_LABEL_TO_PHASE_Z_STATUS.get(match.label, "unknown") # ─── IMP-39 (#68) u1 — single-source ranking sort policy ────────── # # Single source of (label_priority, tie-break) ordering shared by: # - backend `lookup_v4_match_with_fallback` selector loop (wired in u2) # - Step 9 `_build_application_plan_unit` payload (wired in u3) # - frontend `designAgentApi.ts` candidate builder (wired in u4) # # u1 scope = additive only (yaml + loader + helper). No selector wiring, # no behavior change. Default-fallback matches yaml so missing-file boot # keeps deterministic ordering identical to the file-loaded policy. _RANKING_SORT_POLICY_DEFAULT: dict = { "policy_type": "deterministic_label_priority_then_confidence", "label_priority": { "use_as_is": 0, "light_edit": 1, "restructure": 2, "reject": 3, }, "unknown_label_priority": 99, "tie_break_axes": ["confidence_desc", "v4_rank_asc"], } _RANKING_SORT_POLICY_CACHE: Optional[dict] = None def load_ranking_sort_policy() -> dict: """IMP-39 u1 — ranking sort policy loader (separate yaml, additive). Returns dict with keys: policy_type, label_priority (dict), unknown_label_priority (int), tie_break_axes (list[str]). Graceful fallback: yaml 파일 없을 시 _RANKING_SORT_POLICY_DEFAULT (위 dict) 그대로 — backward-compat boot-safe. Cache: module-level, mirrors `load_v4_fallback_policy` pattern. """ global _RANKING_SORT_POLICY_CACHE if _RANKING_SORT_POLICY_CACHE is None: if RANKING_SORT_POLICY_PATH.exists(): loaded = ( yaml.safe_load(RANKING_SORT_POLICY_PATH.read_text(encoding="utf-8")) or {} ) # merge with default so partial yaml falls through cleanly merged = dict(_RANKING_SORT_POLICY_DEFAULT) for k, v in loaded.items(): merged[k] = v _RANKING_SORT_POLICY_CACHE = merged else: _RANKING_SORT_POLICY_CACHE = dict(_RANKING_SORT_POLICY_DEFAULT) return _RANKING_SORT_POLICY_CACHE def apply_ranking_sort( records: list, *, policy: Optional[dict] = None, label_key: str = "label", confidence_key: str = "confidence", v4_rank_key: str = "v4_rank", ) -> list: """IMP-39 u1 — stable sort by (label_priority asc, confidence desc, v4_rank asc). Shared ordering primitive — backend selector / Step 9 payload / frontend mirror invariant. Sample-agnostic; no hardcoded sample IDs. Args: records: list of dicts (selector loop, trace candidates) OR V4Match objects. Field access falls through getitem → getattr. policy: optional explicit policy dict; defaults to `load_ranking_sort_policy()`. label_key / confidence_key / v4_rank_key: per-record field names. Returns: NEW list — input is not mutated. Records lacking a key get the unknown-label priority / confidence=0.0 / v4_rank=inf so they sink to the bottom in a deterministic way. """ pol = policy if policy is not None else load_ranking_sort_policy() priority_map: dict = pol.get("label_priority", {}) or {} unknown_priority: int = int(pol.get("unknown_label_priority", 99)) def _get(rec, key): if isinstance(rec, dict): return rec.get(key) return getattr(rec, key, None) def _key(rec): label = _get(rec, label_key) conf = _get(rec, confidence_key) v4_rank = _get(rec, v4_rank_key) label_pri = priority_map.get(label, unknown_priority) conf_val = float(conf) if conf is not None else 0.0 # confidence desc → negate for asc sort key rank_val = int(v4_rank) if v4_rank is not None else 10**9 return (label_pri, -conf_val, rank_val) return sorted(records, key=_key) def _b4_mapper_source_enabled() -> bool: """IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE env flag reader (default OFF). Switches slot_payload source-of-truth from mapper-only (legacy) to B4 PlacementPlan.selected_template_id. Distinct from PHASE_Z_B4_GATEKEEPER (mismatch render-skip semantics). u2 wires this into the slot_payload construction site; u3 adds BLOCKED exits for no-cover and FitError under flag ON. Truthy values: '1', 'true', 'yes' (case-insensitive, trimmed). """ return os.environ.get("PHASE_Z_B4_MAPPER_SOURCE", "").strip().lower() in { "1", "true", "yes", } def _select_mapper_template_id( placement_plan, unit_frame_template_id: str ) -> Optional[str]: """IMP-89 89-a u2 — slot_payload source-of-truth selector. Returns the template_id that drives slot_payload construction at the single switch site in the runtime loop: flag ON → placement_plan.selected_template_id (B4 PlacementPlan, Layer A render-active path) flag OFF → unit_frame_template_id (legacy mapper-only / V4 rank-1; byte-equivalent default; final.html SHA parity guarded by u4) Under flag ON the returned value may be None when B4 found no covering frame. u3 adds the BLOCKED exit for None and for FitError on the B4-selected template — NO silent fallback (IMP-87 honesty gate pattern). """ if _b4_mapper_source_enabled(): return placement_plan.selected_template_id return unit_frame_template_id def _b4_mapper_source_blocked_exit( reason: str, *, position: str, context: dict ) -> "NoReturn": # type: ignore[name-defined] """IMP-89 89-a u3 — BLOCKED exit (sys.exit(1)) when PHASE_Z_B4_MAPPER_SOURCE is ON and the Layer A render path cannot resolve a covering frame. Stage 1 Q2 lock: when the user explicitly opts into the B4-driven render path, a content-bearing zone MUST NOT silently degrade to adapter_needed or to the legacy V4 rank-1 mapper input. Mirrors IMP-87 u3 honesty-gate pattern (`_is_blocked_overall` → `sys.exit(1)`): the BLOCKED signal preempts the silent adapter_needed fallback so the operator sees the Layer A failure immediately on stderr instead of inheriting a pseudo-rendered partial. Reasons (locked enum): b4_no_cover — PlacementPlan.selected_template_id is None (B4 found no covering frame on the unit) b4_selected_fit_error — map_mdx_to_slots raised FitError against the B4-selected template (frame contract reject) Always raises SystemExit(1) via sys.exit. The `NoReturn` annotation makes the call-site control flow explicit for type-checkers; behavior is the same as IMP-87 u3's sys.exit(1) at L6387. """ print( f"\n[Phase Z-2 IMP-89 89-a u3] BLOCKED @ {reason} (zone--{position})", file=sys.stderr, ) print( " policy : PHASE_Z_B4_MAPPER_SOURCE=ON requires B4-driven render " "(NO silent fallback — IMP-87 honesty gate pattern)", file=sys.stderr, ) for key, value in context.items(): print(f" {key:9}: {value}", file=sys.stderr) sys.exit(1) # ─── MDX parsing ──────────────────────────────────────────────── def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]: """basic MDX parser — ## level sections only. V4 무관 (matching artifact 모름). section.raw_content 에 ### sub-section 그대로 포함. V4 granularity 와 align 은 align_sections_to_v4_granularity() 가 처리. """ text = mdx_path.read_text(encoding="utf-8") fm_match = re.match(r"^---\n(.*?)\n---\n", text, re.DOTALL) slide_title = "" if fm_match: fm = yaml.safe_load(fm_match.group(1)) slide_title = fm.get("title", "") text = text[fm_match.end():] footer_match = re.search(r":::note\[[^\]]*\]\n(.*?)\n:::", text, re.DOTALL) footer_text = None if footer_match: body = footer_match.group(1) bullet_match = re.search(r"\*\s*\*\*([^*]+)\*\*", body) footer_text = (bullet_match.group(1).strip() if bullet_match else body.strip()) text = text[:footer_match.start()] + text[footer_match.end():] sections = [] section_pattern = re.compile(r"^##\s+(\d+)\.\s+(.+?)$", re.MULTILINE) matches = list(section_pattern.finditer(text)) mdx_num_match = re.match(r"(\d+)", mdx_path.stem) mdx_id = mdx_num_match.group(1).zfill(2) if mdx_num_match else "00" for i, m in enumerate(matches): section_num = int(m.group(1)) title_text = m.group(2).strip() start = m.end() end = matches[i + 1].start() if i + 1 < len(matches) else len(text) raw_content = text[start:end].strip() sections.append(MdxSection( section_id=f"{mdx_id}-{section_num}", section_num=section_num, title=f"{section_num}. {title_text}", raw_content=raw_content, )) return slide_title, sections, footer_text # IMP-02 (Phase Z Step 2) — Stage 0 normalize chained adapter. # scope-lock 7 조건 (Gitea #2): # 1. inline helper near parse_mdx() # 2. PHASE_Z_STAGE0_ADAPTER_ENABLED env flag, default OFF (canary, matches PHASE_Z_B4_*) # 3. env=1 sample verification required (in review loop) # 4. fallback_reason: str | None flat — 5 hard cases # 5. verify normalize_mdx_content(raw_mdx)["sections"] is list # 6. preserve Step 2 existing fields; stage0_adapter_diagnostics additive only # 7. out of scope: V4 / align / composition / AI/Kei / frame selection / status semantics _STAGE0_FALLBACK_REASONS = { "ADAPTER_EXCEPTION", "NO_USABLE_SECTIONS", "MISSING_INVALID_IDS", "DUPLICATE_IDS", "NON_POSITIVE_SECTION_NUM", } def _stage0_chained_adapter( mdx_path: Path, legacy_slide_title: str, legacy_sections: list[MdxSection], legacy_footer: Optional[str], ) -> tuple[str, list[MdxSection], Optional[str], dict, dict]: """IMP-02 — chained adapter for Stage 0 normalize → Phase Z Step 2 input. Chain: mdx_normalizer.normalize_mdx_content + section_parser.extract_major_sections + section_parser.extract_conclusion_text → reconstructed MdxSection list. Default OFF (canary, env=`1/true/yes` to enable). When OFF, returns legacy parse_mdx output with diagnostics indicating disabled. When ON, runs adapter chain; on any hard contract failure or exception, falls back to legacy and records fallback_reason. Returns (slide_title, sections, footer, diagnostics, normalized_assets). normalized_assets = {"popups": [...], "images": [...], "tables": [...]} — IMP-03 Step 3 handoff. env=OFF or hard fallback 시 빈 list. """ diagnostics: dict = { "enabled": False, "used": False, "fallback_reason": None, "id_reconstruction_log": [], "adapter_counts": None, "legacy_counts": {"sections": len(legacy_sections)}, } # IMP-03 — Step 3 handoff. env=OFF / fallback 시 모든 list 가 비어 있음. normalized_assets: dict = {"popups": [], "images": [], "tables": []} raw_flag = os.environ.get("PHASE_Z_STAGE0_ADAPTER_ENABLED", "").strip().lower() enabled = raw_flag in {"1", "true", "yes"} diagnostics["enabled"] = enabled if not enabled: return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets try: # Defer imports — legacy path must not depend on these modules. from mdx_normalizer import normalize_mdx_content from section_parser import extract_conclusion_text, extract_major_sections raw_mdx = mdx_path.read_text(encoding="utf-8") normalized = normalize_mdx_content(raw_mdx) if not isinstance(normalized, dict) or not isinstance(normalized.get("sections"), list): diagnostics["fallback_reason"] = "MISSING_INVALID_IDS" return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets majors = extract_major_sections(normalized["sections"]) if not majors: diagnostics["fallback_reason"] = "NO_USABLE_SECTIONS" return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets adapter_title = (normalized.get("title") or "").strip() or legacy_slide_title conclusion = extract_conclusion_text(raw_mdx) adapter_footer = conclusion if conclusion else None mdx_num_match = re.match(r"(\d+)", mdx_path.stem) mdx_id = mdx_num_match.group(1).zfill(2) if mdx_num_match else "00" # Pre-scan raw MDX `## N. Title` headings → {title: section_num} map. # Required to make scope-lock §5 "raw heading reuse first" functionally # reachable, since extract_major_sections strips the leading `N.` from # its level=2 group titles (Codex implementation review #6 catch). raw_heading_map: dict[str, int] = {} for h in re.finditer(r"^##\s+(\d+)\.\s+(.+?)$", raw_mdx, re.MULTILINE): raw_heading_map[h.group(2).strip()] = int(h.group(1)) adapter_sections: list[MdxSection] = [] used_nums: set[int] = set() for idx, m in enumerate(majors, start=1): mtitle = (m.get("title") or "").strip() content = (m.get("content") or "").strip() if mtitle in raw_heading_map: section_num = raw_heading_map[mtitle] clean_title = mtitle reuse_source = "raw_heading" else: inline_match = re.match(r"^(\d+)\.\s*(.+)$", mtitle) if inline_match: section_num = int(inline_match.group(1)) clean_title = inline_match.group(2).strip() reuse_source = "raw_heading_inline" else: section_num = idx clean_title = mtitle reuse_source = "order_fallback" if section_num <= 0: diagnostics["fallback_reason"] = "NON_POSITIVE_SECTION_NUM" return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets if section_num in used_nums: diagnostics["fallback_reason"] = "DUPLICATE_IDS" return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets used_nums.add(section_num) diagnostics["id_reconstruction_log"].append({ "input_title": mtitle, "section_num": section_num, "reuse_source": reuse_source, }) adapter_sections.append(MdxSection( section_id=f"{mdx_id}-{section_num}", section_num=section_num, title=f"{section_num}. {clean_title}", raw_content=content, )) diagnostics["adapter_counts"] = { "sections": len(adapter_sections), "majors": len(majors), "normalized_sections": len(normalized["sections"]), "popups": len(normalized.get("popups", []) or []), "images": len(normalized.get("images", []) or []), "tables": len(normalized.get("tables", []) or []), } diagnostics["diff_vs_legacy"] = { "title_match": adapter_title == legacy_slide_title, "count_match": len(adapter_sections) == len(legacy_sections), "footer_match": adapter_footer == legacy_footer, } diagnostics["used"] = True # IMP-03 — populate Step 3 handoff (success path only). # All fallback paths leave normalized_assets as empty lists (defined at fn top). normalized_assets = { "popups": normalized.get("popups", []) or [], "images": normalized.get("images", []) or [], "tables": normalized.get("tables", []) or [], } return adapter_title, adapter_sections, adapter_footer, diagnostics, normalized_assets except Exception as exc: # noqa: BLE001 — adapter must never break legacy path diagnostics["fallback_reason"] = "ADAPTER_EXCEPTION" diagnostics["exception"] = repr(exc) return legacy_slide_title, legacy_sections, legacy_footer, diagnostics, normalized_assets # ─── V4 lookup ────────────────────────────────────────────────── def load_v4_result() -> dict: return yaml.safe_load(V4_RESULT_PATH.read_text(encoding="utf-8")) def align_sections_to_v4_granularity( sections: list[MdxSection], v4: dict, *, override_target_section_ids: Optional[list[str]] = None, ) -> list[MdxSection]: """Align MDX sections to canonical sub-section granularity. Default behaviour (V4-driven granularity, backward compatible) : - V4 has section_id exact key -> keep section unchanged (parent granularity rendering, parent-level V4 evidence applies). - V4 missing + H3 sub-sections -> drill into sub-sections, emit canonical ids ``${section_id}-sub-${ordinal}`` with optional decimal alias for legacy V4 keys (e.g. ``04-2.1``). - V4 missing + no H3 -> pass through (downstream V4 lookup will naturally abort with no_v4_section). IMP-08 B-3 / Stage 5 R2 blocker-fix — ``override_target_section_ids`` is the list of section ids that drag/drop override CLI flags target. When any override target matches ``${section_id}-sub-N`` for a section whose parent is otherwise V4-aligned, that section is force-drilled so sub-section ids become addressable. This keeps the default rendering path on V4 granularity while making drag/drop deterministic regardless of whether V4 carries a parent exact key. Each drilled sub-section carries : - heading_number : decimal "2.1" / integer "1" / None (bare H3 title). - v4_alias_keys : legacy V4 keys to try when the canonical ordinal id misses. Populated only when ``heading_number`` matches the decimal pattern ``\\d+\\.\\d+`` (N-R5 guard) — integer-only or bare H3 produces no alias to avoid sibling-parent V4 collisions. Design boundary : - parser (``parse_mdx``) = MDX-only knowledge (V4-agnostic). - aligner (this function) = canonical sub-id schema, MDX-driven on force_drill, V4-driven otherwise. - resolver (``_resolve_v4_section_key``) = exact > alias > None, never auto-promotes to parent/sibling (axis 7 hybrid lock). """ v4_keys = set(v4.get("mdx_sections", {}).keys()) # Build the set of parent ids whose sub-ids are explicitly targeted by # an override. These sections must be drilled even if V4 also carries # the parent key exactly. Parents derived from canonical "X-sub-N" ids # only — non-sub ids (top-level overrides) do not trigger drilling. force_drill_parents: set[str] = set() if override_target_section_ids: for sid in override_target_section_ids: parent = derive_parent_id(sid) if parent and sid != parent: force_drill_parents.add(parent) aligned: list[MdxSection] = [] # Capture optional heading-number prefix (decimal "2.1" or integer "1") # plus the heading title. None group = bare "### Title". sub_pattern = re.compile( r"^###\s+(?:(\d+(?:\.\d+)?)\s+)?(.+?)$", re.MULTILINE ) decimal_re = re.compile(r"\d+\.\d+") for section in sections: force_drill = section.section_id in force_drill_parents if section.section_id in v4_keys and not force_drill: # V4 carries this section exactly and no override targets a # sub-id under it: keep parent granularity (backward compat). aligned.append(section) continue sub_matches = list(sub_pattern.finditer(section.raw_content)) if not sub_matches: # No H3 sub-sections: cannot drill. Pass section through; # downstream V4 lookup aborts with no_v4_section when needed. aligned.append(section) continue mdx_id = section.section_id.split("-")[0] # e.g., "04" for ordinal, m in enumerate(sub_matches, start=1): heading_number = m.group(1) # decimal "2.1" / integer "1" / None sub_title = m.group(2).strip() start = m.end() end = ( sub_matches[ordinal].start() if ordinal < len(sub_matches) else len(section.raw_content) ) raw = section.raw_content[start:end].strip() # N-R5 : alias only for decimal heading numbers. integer-only # H3 (`### 1`) or undecorated H3 produce no alias to avoid # sibling-parent V4 collisions (e.g., 05.mdx integer H3s). alias_keys: list[str] = [] if heading_number and decimal_re.fullmatch(heading_number): alias_keys.append(f"{mdx_id}-{heading_number}") title = ( f"{heading_number} {sub_title}" if heading_number else sub_title ) aligned.append(MdxSection( section_id=f"{section.section_id}-sub-{ordinal}", section_num=section.section_num, title=title, raw_content=raw, heading_number=heading_number, v4_alias_keys=alias_keys, )) return aligned def _v4_match_from_judgment(section_id: str, judgment: dict, rank: Optional[int] = None) -> V4Match: resolved_rank = rank if rank is not None else judgment.get("v4_full_rank") return V4Match( section_id=section_id, frame_id=str(judgment["frame_id"]), frame_number=int(judgment["frame_number"]), template_id=judgment["template_id"], confidence=float(judgment["confidence"]), label=judgment["label"], v4_rank=int(resolved_rank) if resolved_rank is not None else None, ) def _resolve_v4_section_key( v4: dict, section_id: str, *, alias_keys: Optional[list] = None, ) -> Optional[str]: """Resolve a V4 ``mdx_sections`` key for *section_id*. Resolution order : 1. exact match (canonical ordinal id wins) 2. alias_keys in given order (e.g. legacy decimal ``04-2.1`` for ``04-2-sub-1``) 3. None on miss. Never promotes to parent or sibling — that would reinterpret V4 evidence (axis 7 hybrid lock, RULE 0). U1 callers pass alias_keys=None so the function is byte-identical to the previous exact-match lookup; U2 populates aliases from MDX heading_number metadata. """ keys = v4.get("mdx_sections", {}) if section_id in keys: return section_id if alias_keys: for a in alias_keys: if a and a in keys: return a return None def lookup_v4_match( v4: dict, section_id: str, *, alias_keys: Optional[list] = None ) -> Optional[V4Match]: resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return None judgments = sec.get("judgments_full32", []) if not judgments: return None top = judgments[0] return _v4_match_from_judgment(section_id, top, rank=1) # IMP-05 L2/L5 route hint — V4 label → execution route guidance for future consumers # (frontend zone-level override / AI-assisted adaptation). Codex #2 conceptual model : # use_as_is → Phase Z direct render # light_edit → deterministic minor adjustment # restructure → AI-assisted frame-aware adaptation (deferred to IMP-17 — carve-out, AI fallback only, normal path 밖) # reject → AI re-construction over the rank-1 reject frame (IMP-47B u1, 2026-05-21); # policy correction supersedes the legacy "design reference only" disposition. # Frame visual / contract stays untouched; AI only re-maps MDX content into # declared slots. Activation still gated by ai_fallback_enabled (default OFF). _IMP05_ROUTE_HINTS: dict[str, str] = { "use_as_is": "direct_render", "light_edit": "deterministic_minor_adjustment", "restructure": "ai_adaptation_required", "reject": "ai_adaptation_required", } def _imp05_route_hint(label: Optional[str]) -> Optional[str]: """Map V4 label to execution route hint. Returns None for unknown labels.""" if label is None: return None return _IMP05_ROUTE_HINTS.get(label) def _load_frame_partial_html(template_id: str) -> str: """IMP-47B u4 — Read templates/phase_z2/families/{template_id}.html. Missing partial (e.g., ``__empty__`` shell from IMP-30) returns an empty string so gather_step12_ai_repair_proposals can still build a record with skip_reason without raising on file IO. """ partial_path = TEMPLATE_DIR / "families" / f"{template_id}.html" if not partial_path.is_file(): return "" return partial_path.read_text(encoding="utf-8") def _run_step12_ai_repair(units) -> list[dict]: """IMP-47B u4 — Wire gather_step12_ai_repair_proposals into Step 12. Routes provisional units whose IMP-05 hint maps to ``ai_adaptation_required`` (``restructure`` + ``reject`` per u1) through ``src.phase_z2_ai_fallback.router``. Normal-path units (``use_as_is`` / ``light_edit`` / non-provisional) record a skip_reason without invoking the router; flag-off runs short-circuit at the router (``settings.ai_fallback_enabled=False`` default). Returns the per-unit record list — u5 consumes records for PARTIAL_OVERRIDES apply and u6 writes the audit artifact. """ return gather_step12_ai_repair_proposals( units, route_for_label=_imp05_route_hint, get_contract_fn=get_contract, frame_visual_loader=_load_frame_partial_html, ) _REJECT_SUPPORTED_PROPOSAL_KINDS: frozenset[str] = frozenset({"partial_overrides"}) def _apply_ai_repair_proposals_to_zones( ai_repair_records: list[dict], unit_positions: list[str], zones_data: list[dict], ) -> None: """IMP-47B u5 — Apply PARTIAL_OVERRIDES into zones_data.slot_payload. Mutates each record's ``apply_status`` in place and merges ``proposal.payload.slots`` into the matching zone. Out-of-scope kinds (``builder_options_patch``, ``slot_mapping_proposal``) loud-fail with ``unsupported_kind_for_reject_route:`` — zones untouched (human_review surfacing → u8). IMP-33 u5 validator guarantees declared-slot completeness, so ``dict.update`` is the structural merge (``feedback_ai_isolation_contract``). """ zone_by_position = {z["position"]: z for z in zones_data} for record in ai_repair_records: proposal = record.get("proposal") if proposal is None: record["apply_status"] = "no_proposal" continue kind = proposal.get("proposal_kind") if kind not in _REJECT_SUPPORTED_PROPOSAL_KINDS: record["apply_status"] = f"unsupported_kind_for_reject_route:{kind}" print( f" [ai-repair-apply] unit {record['unit_index']} " f"proposal_kind='{kind}' out-of-scope for reject route — " "skipping apply; human_review required.", file=sys.stderr, ) continue unit_index = record["unit_index"] position = ( unit_positions[unit_index] if 0 <= unit_index < len(unit_positions) else None ) zone = zone_by_position.get(position) if position is not None else None if zone is None: record["apply_status"] = "no_zone_match" continue slots = (proposal.get("payload") or {}).get("slots") or {} zone["slot_payload"].update(slots) record["apply_status"] = "applied:partial_overrides" def _check_post_ai_coverage_invariant( units, ai_repair_records: list[dict], ) -> dict: """IMP-47B u7 — Verify AI repair preserved every source_section_id. Compares the union of unit-level ``source_section_ids`` (pre-AI) to the union present on ``ai_repair_records`` post-apply. Per the AI isolation contract + dropped 절대 룰 (``feedback_ai_isolation_contract``), AI repair never removes a unit's section coverage. Any divergence indicates a regression that u8 surfaces through ``slide_status.ai_repair_status``. The check is structural (set membership); the per-record ``source_section_ids`` list is a copy populated by ``gather_step12_ai_repair_proposals`` (``step12.py:124``) so apply mutations cannot silently drop it. """ pre_ai_ids: set[str] = set() for unit in units: pre_ai_ids.update(getattr(unit, "source_section_ids", []) or []) post_ai_ids: set[str] = set() for record in ai_repair_records: post_ai_ids.update(record.get("source_section_ids") or []) dropped = sorted(pre_ai_ids - post_ai_ids) return { "pre_ai_section_ids": sorted(pre_ai_ids), "post_ai_section_ids": sorted(post_ai_ids), "dropped_section_ids": dropped, "status": "ok" if not dropped else "violated", } def _persist_ai_repair_proposals_to_cache( ai_repair_records: list[dict], *, visual_check_passed: bool, user_approved: bool, auto_cache: bool, ) -> None: """IMP-47B u13 — Persist applied AI repair proposals through IMP-46 gates. Mutates each record in place with a ``cache_save_status`` axis. Only records whose ``apply_status`` starts with ``"applied:"`` and that still carry the original ``cache_key`` + ``fingerprints`` + a serialized ``proposal`` dict are eligible — everything else marked ``not_applied``. Eligible records go through ``cache.save_proposal`` with the IMP-46 dual-gate truth table; the helper catches :class:`AiFallbackCacheGateError` so a gate block is surfaced (``gate_blocked:``) without raising into the pipeline runtime (the cache is a hint, never a hard dependency — cache.py contract). ``visual_check_passed`` is never bypassable; ``auto_cache=True`` bypasses ONLY the ``user_approved`` gate per IMP-46 u5. Pure save layer: no AI call, no MDX touch. """ from src.phase_z2_ai_fallback.cache import ( AiFallbackCacheGateError, save_proposal, ) from src.phase_z2_ai_fallback.schema import AiFallbackProposal for record in ai_repair_records: apply_status = record.get("apply_status") or "" proposal_dict = record.get("proposal") cache_key = record.get("cache_key") fingerprints = record.get("fingerprints") if ( not apply_status.startswith("applied:") or not isinstance(proposal_dict, dict) or not cache_key or not isinstance(fingerprints, dict) ): record["cache_save_status"] = "not_applied" continue try: proposal_obj = AiFallbackProposal.model_validate(proposal_dict) except Exception as exc: # noqa: BLE001 — invalid payload → skip, never raise record["cache_save_status"] = f"invalid_proposal:{type(exc).__name__}" continue try: save_proposal( cache_key, proposal_obj, visual_check_passed=visual_check_passed, user_approved=user_approved, auto_cache=auto_cache, fingerprints=fingerprints, ) except AiFallbackCacheGateError as gate_exc: record["cache_save_status"] = f"gate_blocked:{gate_exc}" continue record["cache_save_status"] = "saved" def _summarize_ai_repair_status( ai_repair_records: list[dict], coverage_invariant: dict, ) -> dict: """IMP-47B u8 — Classify Step 12 AI repair outcomes for slide_status surfacing. Reads u4 gather ``error`` + u5 ``apply_status`` + u7 coverage_invariant to derive a single ``ai_repair_status`` axis attached to ``slide_status``. Failure-axis priority (highest → lowest): ``error`` > ``coverage_violated`` > ``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required`` flips True on the three failure axes so the frontend (u11) can surface a notification per the IMP-47B policy ("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification"). Pure: no IO, no AI call. IMP-92 u3 — propagate ``api_error_kind`` (quota / billing / auth / other) stamped by Step 12 (u2 ``classify_operational_error``) through ``ai_repair_status`` so the frontend operational formatter can route only operational kinds (quota / billing / auth) to user-visible alerts. ``api_error_kinds`` aggregates counts by kind at the summary level; ``error_records[i]["api_error_kind"]`` retains the per-record kind for unit-level surfacing. """ counts = { "total": len(ai_repair_records), "applied": 0, "no_proposal": 0, "no_zone_match": 0, "unsupported_kind": 0, "error": 0, } api_error_kinds = {"quota": 0, "billing": 0, "auth": 0, "other": 0} unsupported_records: list[dict] = [] error_records: list[dict] = [] for record in ai_repair_records: if record.get("error"): counts["error"] += 1 kind = record.get("api_error_kind") if kind in api_error_kinds: api_error_kinds[kind] += 1 error_records.append({ "unit_index": record.get("unit_index"), "source_section_ids": list(record.get("source_section_ids") or []), "error": record.get("error"), "api_error_kind": kind, }) continue apply_status = record.get("apply_status") or "" if apply_status.startswith("applied:"): counts["applied"] += 1 elif apply_status.startswith("unsupported_kind_for_reject_route:"): counts["unsupported_kind"] += 1 unsupported_records.append({ "unit_index": record.get("unit_index"), "source_section_ids": list(record.get("source_section_ids") or []), "apply_status": apply_status, }) elif apply_status == "no_zone_match": counts["no_zone_match"] += 1 else: counts["no_proposal"] += 1 coverage_status = (coverage_invariant or {}).get("status", "ok") dropped = list((coverage_invariant or {}).get("dropped_section_ids") or []) if counts["error"]: status = "error" elif coverage_status != "ok": status = "coverage_violated" elif counts["unsupported_kind"]: status = "unsupported_kind" elif counts["applied"]: status = "applied" else: status = "ok" return { "status": status, "counts": counts, "api_error_kinds": api_error_kinds, "unsupported_kind_records": unsupported_records, "error_records": error_records, "coverage_status": coverage_status, "dropped_section_ids": dropped, "human_review_required": status in {"error", "coverage_violated", "unsupported_kind"}, } def lookup_v4_match_with_fallback( v4: dict, section_id: str, *, raw_content: Optional[str] = None, max_rank: Optional[int] = None, alias_keys: Optional[list] = None, allow_provisional: bool = False, ) -> tuple[Optional[V4Match], dict]: """Select V4 rank-1, or promote rank-2..N when rank-1 is not auto-renderable. This is an IMP-05 selector only. It uses existing V4 labels, frame-contract presence, and the Phase Z capacity precheck; it does not call calculate_fit. IMP-30 u1 — when ``allow_provisional=True`` and the rank-1..effective_max_rank chain is exhausted (no candidate passes MVP1 filter + contract + capacity), the selector synthesizes a *provisional* V4Match from the rank-1 judgment so the first-render invariant can be satisfied downstream. The synthesized match carries ``provisional=True``, ``selection_path="provisional_rank_1"``, and ``fallback_reason`` mirrors the existing chain-exhaust reason. The candidate trace shape is unchanged (synthetic injection only updates the top-level ``selection_path`` + ``selected_*`` mirrors). When the rank-1 judgment itself is missing (``empty_v4_judgments`` / ``no_v4_section``), no provisional is synthesized — the caller handles those cases with a placeholder zone or empty-shell. Default ``allow_provisional=False`` keeps the IMP-05 behavior byte-identical. IMP-38 — dynamic effective max_rank via ``load_v4_fallback_policy()`` (4 round 합의 / Codex #1~#3 + Claude #1~#4 LOCK at #67 comment 23195): - ``max_rank=None`` (default) → policy applied: usable_count = candidates in rank 1..default_max_rank passing 3-tier predicate (status in MVP1 + catalog registered + optional capacity). usable_count >= usable_threshold → effective_max_rank = default_max_rank. Otherwise → effective_max_rank = min(extended_max_rank, len(judgments_full32)) = effective_extended_ceiling (Codex #2 정정). - ``max_rank`` explicitly passed → caller_override: that value is used as-is (backward compat for tests / explicit IMP-05/IMP-30 paths). Trace gains 8 IMP-38 fields: ``requested_max_rank``, ``default_max_rank``, ``configured_extended_max_rank``, ``judgments_count``, ``effective_extended_ceiling``, ``effective_max_rank``, ``usable_count``, ``policy_applied``. ``max_rank`` legacy field kept as alias for backward compat (= effective_max_rank). """ resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None all_judgments = (sec.get("judgments_full32") if sec else None) or [] judgments_count = len(all_judgments) # IMP-38 — load policy (graceful: yaml 없을 시 default_max_rank=3, extended=3) _policy = load_v4_fallback_policy() default_max_rank = int(_policy.get("default_max_rank", 3)) configured_extended_max_rank = int(_policy.get("extended_max_rank", default_max_rank)) usable_threshold = int(_policy.get("usable_threshold", 1)) # Codex #2 정정: min(configured, len(judgments_full32)) — yaml ceiling 무력화 방지 effective_extended_ceiling = min(configured_extended_max_rank, judgments_count) if judgments_count else default_max_rank usable_count: Optional[int] = None # set only when policy path active if max_rank is not None: # caller override (backward compat — explicit IMP-05/IMP-30 paths, tests) effective_max_rank = int(max_rank) policy_applied = "caller_override" elif judgments_count == 0: # no judgments — slicing 빈 list 라 어차피 영향 X effective_max_rank = default_max_rank policy_applied = "no_judgments" else: # IMP-38 policy path — 3-tier predicate usable_count on default window usable_count = 0 default_window = all_judgments[:default_max_rank] for _j in default_window: _m = _v4_match_from_judgment(section_id, _j, rank=0) if to_phase_z_status(_m) not in MVP1_ALLOWED_STATUSES: continue if get_contract(_m.template_id) is None: continue if raw_content is not None: _cap = compute_capacity_fit(_m.template_id, raw_content) if _cap and _cap.get("fit_status") not in { "ok", "no_contract", "unknown_source_shape", }: continue usable_count += 1 if usable_count >= usable_threshold: effective_max_rank = default_max_rank policy_applied = "default_max_rank" else: effective_max_rank = effective_extended_ceiling policy_applied = "extended_max_rank" trace = { "section_id": section_id, # IMP-38 — 8 trace fields (4 round LOCK) "requested_max_rank": max_rank, "default_max_rank": default_max_rank, "configured_extended_max_rank": configured_extended_max_rank, "judgments_count": judgments_count, "effective_extended_ceiling": effective_extended_ceiling, "effective_max_rank": effective_max_rank, "usable_count": usable_count, "policy_applied": policy_applied, # legacy alias for backward compat (= effective_max_rank) "max_rank": effective_max_rank, "selection_path": "no_v4_candidate", "selected_rank": None, "selected_template_id": None, "selected_frame_id": None, "selected_label": None, "fallback_used": False, "fallback_reason": None, "candidates": [], } if not sec: trace["fallback_reason"] = "no_v4_section" return None, trace judgments = all_judgments[:effective_max_rank] if not judgments: trace["fallback_reason"] = "empty_v4_judgments" return None, trace # IMP-39 (#68) u2 — apply single-source ranking sort policy to the selected # window AFTER IMP-38 raw-window calc (default_window / usable_count above # remain RAW all_judgments-based — no silent interaction with fallback # expansion). Selection order now follows # (label_priority asc, confidence desc, v4_rank asc) # so backend selected rank-1 matches frontend frame_candidates[0] # (designAgentApi.ts:578-597 LABEL_PRIORITY + confidence-desc mirror). # `v4_rank_key="v4_full_rank"` reads the RAW V4 confidence-rank from each # judgment dict for tie-break (yaml: tie_break_axes=[confidence_desc, # v4_rank_asc]). Input list is NOT mutated (apply_ranking_sort returns a # new list). Trace fields (sorted_candidate_evidence / ranking_sort_policy) # are forwarded through Step 9 payload in u3. ranking_sort_policy = load_ranking_sort_policy() judgments = apply_ranking_sort( judgments, policy=ranking_sort_policy, label_key="label", confidence_key="confidence", v4_rank_key="v4_full_rank", ) trace["ranking_sort_policy_applied"] = ranking_sort_policy.get( "policy_type", "deterministic_label_priority_then_confidence" ) first_skip_reason: Optional[str] = None # IMP-05 L4 dedup (Codex #14 ordering — Claude #16 placement precision) : # first occurrence claims template_id for the chain regardless of decision # (selected/non-direct/rejected/missing-contract/capacity-skipped). Defensive # against V4 anomaly where same template_id appears at multiple ranks with # different labels — first label/reason is preserved, later duplicates skip. seen_template_ids: set[str] = set() for i, judgment in enumerate(judgments, start=1): match = _v4_match_from_judgment(section_id, judgment, rank=i) status = to_phase_z_status(match) # IMP-05 L2 (Codex #10 E4) — informative candidate_evidence schema. # `v4_label` naming matches Codex schema (Claude #13 §1 lock). # `filtered_for_direct_execution` + `route_hint` = L5 restructure/reject trace 보존 # 단일 source (frontend/AI future consumer guidance). is_direct_eligible = status in MVP1_ALLOWED_STATUSES candidate_trace = { "rank": i, "template_id": match.template_id, "frame_id": match.frame_id, "frame_number": match.frame_number, "confidence": match.confidence, "label": match.label, # existing — kept for backward compat "v4_label": match.label, # IMP-05 L2 alias (Codex schema) "phase_z_status": status, "catalog_registered": get_contract(match.template_id) is not None, "filtered_for_direct_execution": not is_direct_eligible, # IMP-05 L2/L5 "route_hint": _imp05_route_hint(match.label), # IMP-05 L2/L5 "decision": "skipped", "reason": None, } # IMP-05 L4 dedup — duplicate check BEFORE rank evaluation. # First occurrence reserves template_id even if non-direct/rejected/skipped. # Later rank with same template_id is skipped as duplicate, audit fields preserved. if match.template_id in seen_template_ids: candidate_trace["reason"] = "duplicate_template_id" trace["candidates"].append(candidate_trace) continue seen_template_ids.add(match.template_id) if status not in MVP1_ALLOWED_STATUSES: candidate_trace["reason"] = f"phase_z_status_not_allowed:{status}" elif get_contract(match.template_id) is None: candidate_trace["reason"] = "skipped_no_contract" else: capacity_fit = None if raw_content is not None: capacity_fit = compute_capacity_fit(match.template_id, raw_content) candidate_trace["capacity_fit"] = capacity_fit if capacity_fit and capacity_fit.get("fit_status") not in { "ok", "no_contract", "unknown_source_shape", }: candidate_trace["reason"] = f"capacity_mismatch:{capacity_fit.get('fit_status')}" else: fallback_used = i > 1 fallback_reason = first_skip_reason if fallback_used else None match.selection_path = f"rank_{i}" if not fallback_used else f"rank_{i}_fallback" match.fallback_reason = fallback_reason candidate_trace["decision"] = "selected" candidate_trace["reason"] = "primary_selected" if i == 1 else "fallback_selected" trace["candidates"].append(candidate_trace) trace.update({ "selection_path": match.selection_path, "selected_rank": i, "selected_template_id": match.template_id, "selected_frame_id": match.frame_id, "selected_label": match.label, "fallback_used": fallback_used, "fallback_reason": fallback_reason, }) return match, trace if i == 1: first_skip_reason = candidate_trace["reason"] trace["candidates"].append(candidate_trace) trace["selection_path"] = "chain_exhausted" trace["fallback_reason"] = first_skip_reason or f"no_auto_renderable_rank_1_to_{effective_max_rank}" # IMP-30 u1 — opt-in provisional first-render synthesis. When the caller # signals allow_provisional, promote rank-1 judgment as a provisional # match so downstream composition can satisfy the first-render invariant. # Top-level mirrors (selection_path / selected_*) are updated; candidate # trace entries are left intact (their skip reasons remain accurate). # Default-off keeps IMP-05 behavior byte-identical. if allow_provisional: rank_1_judgment = judgments[0] provisional_match = _v4_match_from_judgment( section_id, rank_1_judgment, rank=1 ) provisional_match.selection_path = "provisional_rank_1" provisional_match.fallback_reason = trace["fallback_reason"] provisional_match.provisional = True trace.update({ "selection_path": "provisional_rank_1", "selected_rank": 1, "selected_template_id": provisional_match.template_id, "selected_frame_id": provisional_match.frame_id, "selected_label": provisional_match.label, "fallback_used": True, "provisional": True, }) return provisional_match, trace return None, trace def lookup_v4_all_judgments( v4: dict, section_id: str, *, alias_keys: Optional[list] = None ) -> list[V4Match]: """V4 raw 32 entry 그대로 반환 — reject 포함, max_n filter 없음. Step 7-A axis 보강 (사용자 lock 2026-05-08) — 사용자 UI 가 모든 frame 의 png 를 보여줄 수 있도록 reject 까지 trace. lookup_v4_candidates 는 변경 없음 (backward compat — non-reject + max_n 만 반환). Returns : list[V4Match] — 0~32 길이. raw judgments_full32 순서 (= V4 score desc) 보존. """ resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return [] judgments = sec.get("judgments_full32", []) out: list[V4Match] = [] for j in judgments: out.append(_v4_match_from_judgment(section_id, j)) return out def _is_visual_pending(template_id: str) -> bool: """IMP-#85 u4 — return True iff catalog marks contract as ``visual_pending``. Data-driven from ``frame_contracts.yaml`` (no hard-coded frame allow-list). Used by ``lookup_v4_candidates`` to exclude VP frames from the live candidate set; ``lookup_v4_all_judgments`` raw telemetry stays untouched (Step 7-A axis preserves full 32-frame evidence for the frontend). """ contract = get_contract(template_id) if not isinstance(contract, dict): return False return contract.get("visual_pending") is True def lookup_v4_candidates( v4: dict, section_id: str, max_n: int = 6, *, alias_keys: Optional[list] = None, ) -> list[V4Match]: """V4 non-reject 후보 list 반환 (Step 5 보완 axis — 사용자 lock 2026-05-08). Rule (catalog 와 1:1) : v4_candidates = [ c for c in judgments_full32 if c["label"] != "reject" and not visual_pending(c.template_id) # IMP-#85 u4 ][:max_n] Returns: list[V4Match] — 0~max_n 길이. 0 길이 = "no_non_reject_v4_candidate" 신호 (Step 9 fallback path 입력). raw 32 entry 는 tests/matching/v4_full32_result.yaml 에 영속 보존. Backward compat: lookup_v4_match() (rank-1) 는 그대로. Step 6 의 plan_composition() 호출처 무변. 본 함수는 Step 5 artifact + Step 9 application_plan input 위한 새 entry point. IMP-#85 u4 — visual_pending frames are excluded from the live candidate set (catalog scaffolding without registered builder would crash the mapper). lookup_v4_all_judgments raw telemetry is intentionally NOT gated here. """ resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None if not sec: return [] judgments = sec.get("judgments_full32", []) candidates: list[V4Match] = [] for j in judgments: if j.get("label") == "reject": continue tid = j.get("template_id") if tid and _is_visual_pending(tid): continue candidates.append(_v4_match_from_judgment(section_id, j)) if len(candidates) >= max_n: break return candidates def _apply_frame_override_to_unit(unit, new_tid: str, v4: dict) -> str: """IMP-47B u3 — apply a frame override to *unit* in place. Returns a meta_source string for the override book-keeping. Three probe layers, in order: 1. ``unit.v4_candidates`` (non-reject, max_n bounded). Copies frame_id / frame_number / confidence / label from the matching candidate so Step 9 metadata stays consistent. Returns ``"v4_candidates"``. 2. Full 32 V4 judgments (reject inclusive). When the override target matches a reject judgment for the unit's primary section, the unit is promoted to ``provisional=True`` with ``label="reject"`` so Step 12 (IMP-47B u4) admits the AI repair path. Returns ``"v4_reject_judgment_provisional"``. 3. Raw fall-through. Updates only ``frame_template_id``; returns ``"raw_template_id_only"``. Frame visual / contract stay untouched per the AI isolation contract (frame auto-swap forbidden — AI re-places content into the existing frame only). The caller validates catalog contract presence before invoking this helper. """ for cand in (unit.v4_candidates or []): if getattr(cand, "template_id", None) == new_tid: unit.frame_template_id = cand.template_id unit.frame_id = cand.frame_id unit.frame_number = cand.frame_number unit.confidence = cand.confidence unit.label = cand.label return "v4_candidates" primary_sid = ( unit.source_section_ids[0] if unit.source_section_ids else None ) if primary_sid: for j in lookup_v4_all_judgments(v4, primary_sid): if j.template_id == new_tid and j.label == "reject": unit.frame_template_id = j.template_id unit.frame_id = j.frame_id unit.frame_number = j.frame_number unit.confidence = j.confidence unit.label = "reject" unit.provisional = True return "v4_reject_judgment_provisional" unit.frame_template_id = new_tid return "raw_template_id_only" # ─── Content weight + zone layout 계산 ───────────────────────── # layout preset 선택은 phase_z2_composition.select_layout_preset (composition v0) 가 담당. # 본 모듈의 select_layout_preset 은 이전 단순 count-based 구현이었고 dead code 로 제거 (2026-04-29). def compute_content_weight(section: MdxSection) -> dict: """Section 의 콘텐츠 부피 측정 — text/bullet/table/subsection 합성 score.""" text = section.raw_content lines = text.splitlines() text_len = len(text) top_bullets = sum(1 for l in lines if re.match(r"^[\*\-]\s", l)) nested_bullets = sum(1 for l in lines if re.match(r"^\s+[\*\-]\s", l)) has_table = bool(re.search(r"\|[^\n]+\|\n[ \t]*\|[\s\-:|]+\|", text)) subsections = len(re.findall(r"^###\s", text, re.MULTILINE)) c = CONTENT_WEIGHT_COEFFS score = ( text_len / c["text_per_chars"] + top_bullets * c["top_bullet"] + nested_bullets * c["nested_bullet"] + (c["table_bonus"] if has_table else 0) + subsections * c["subsection"] ) return { "score": round(score, 3), "text_length": text_len, "top_bullets": top_bullets, "nested_bullets": nested_bullets, "has_table": has_table, "subsection_count": subsections, } def compute_zone_layout(zones_data: list[dict], total_height: int = SLIDE_BODY_HEIGHT, gap: int = GRID_GAP) -> dict: """zone height 계산 — frame_min_height_px 우선 + 남은 공간 content_weight 비율 분배. Returns dict with per-zone heights + reasoning trace. """ n = len(zones_data) if n == 0: return {"heights_px": [], "ratios": [], "zones": []} available = total_height - gap * (n - 1) # Step 1: 각 zone 의 min_height 할당 — pipeline 가 zones_data 에 frame contract 의 # visual_hints.min_height_px 를 미리 주입했음. 없으면 DEFAULT_ZONE_MIN_HEIGHT_PX. min_heights = [ z.get("min_height_px", DEFAULT_ZONE_MIN_HEIGHT_PX) for z in zones_data ] total_min = sum(min_heights) min_scaled = False if total_min > available: scale = available / total_min min_heights = [int(m * scale) for m in min_heights] total_min = sum(min_heights) min_scaled = True remaining = available - total_min # Step 2: 남은 공간을 content_weight 비율로 분배 weights = [z["content_weight"]["score"] for z in zones_data] total_w = sum(weights) if sum(weights) > 0 else n extras = [int(round(remaining * (w / total_w))) for w in weights] # Step 3: rounding 보정 (마지막 zone 잔여 흡수) heights = [m + e for m, e in zip(min_heights, extras)] diff = available - sum(heights) if diff != 0 and heights: heights[-1] += diff ratios = [round(h / total_height, 3) for h in heights] return { "computation": "min_height_first + content_weight_distribution", "slide_body_height": total_height, "gap": gap, "available_after_gap": available, "min_heights_px": min_heights, "min_scaled": min_scaled, "total_min_height": total_min, "remaining_after_min": remaining, "content_weights": [{"position": z["position"], "template_id": z["template_id"], "score": w} for z, w in zip(zones_data, weights)], "weight_shares": [round(w / total_w, 3) for w in weights], "extras_px": extras, "heights_px": heights, "ratios": ratios, } # ─── IMP-09 PR 1 helpers (8-preset layout vocabulary) ──────────────── # Catalog css_areas / css_cols / css_rows parsing + per-zone aggregation # + col-axis solver. Symmetric counterparts to compute_zone_layout (row-axis). def _parse_css_areas(css_areas: str) -> tuple[list[list[str]], list[str]]: """Parse CSS grid-template-areas string into (row x col) cell grid. Input : '"top top" "bottom-left bottom-right"' Output : ( [["top", "top"], ["bottom-left", "bottom-right"]], ["top", "bottom-left", "bottom-right"], ) Raises ValueError on empty input, missing quotes, empty row, or non-rectangular layout (rows with mismatched column counts). """ rows: list[list[str]] = [] seen: list[str] = [] quoted = re.findall(r'"([^"]+)"', css_areas) if not quoted: raise ValueError( f"_parse_css_areas: no quoted row strings found in {css_areas!r}" ) for row_str in quoted: tokens = row_str.split() if not tokens: raise ValueError( f"_parse_css_areas: empty row in {css_areas!r}" ) rows.append(tokens) for token in tokens: if token not in seen: seen.append(token) col_counts = {len(r) for r in rows} if len(col_counts) > 1: raise ValueError( f"_parse_css_areas: non-rectangular grid, row column counts = " f"{col_counts} in {css_areas!r}" ) return rows, seen def _parse_fr_string(spec: str, total: int) -> list[int]: """Parse '1fr' / '1fr 1fr' / 'Nfr Mfr' into integer px lengths. Catalog presets (templates/phase_z2/layouts/layouts.yaml) only use 1fr-only specs; mixed px/fr is out of scope. Raises ValueError on non-fr tokens or zero total. """ fractions: list[float] = [] for token in spec.split(): m = re.fullmatch(r"(\d+(?:\.\d+)?)fr", token) if not m: raise ValueError( f"_parse_fr_string: non-fr token {token!r} in {spec!r}" ) fractions.append(float(m.group(1))) if not fractions: raise ValueError(f"_parse_fr_string: empty spec {spec!r}") total_fr = sum(fractions) if total_fr <= 0: raise ValueError(f"_parse_fr_string: total fr = 0 in {spec!r}") sizes = [int(round(total * (f / total_fr))) for f in fractions] sizes[-1] += total - sum(sizes) return sizes def compute_zone_layout_cols(zones_data: list[dict], total_width: int = SLIDE_BODY_WIDTH, gap: int = GRID_GAP) -> dict: """Per-zone column width allocation — weight-only distribution. Symmetric counterpart of compute_zone_layout for the column axis. No min_width_px contract exists in frame_contracts.yaml (verified empty as of IMP-09), so column allocation is purely content_weight score based. """ n = len(zones_data) if n == 0: return {"widths_px": [], "width_ratios": [], "zones": []} available = total_width - gap * (n - 1) weights = [z["content_weight"]["score"] for z in zones_data] total_w = sum(weights) if total_w <= 0: # Zero-weight guard (override-empty zone where score=0). widths_px = [available // n] * n widths_px[-1] += available - sum(widths_px) weight_shares = [round(1.0 / n, 3)] * n else: widths_px = [ int(round(available * (w / total_w))) for w in weights ] diff = available - sum(widths_px) if diff != 0: widths_px[-1] += diff weight_shares = [round(w / total_w, 3) for w in weights] width_ratios = [round(w / total_width, 3) for w in widths_px] return { "computation": "content_weight_distribution_cols", "slide_body_width": total_width, "gap": gap, "available_after_gap": available, "content_weights": [ {"position": z["position"], "template_id": z["template_id"], "score": w} for z, w in zip(zones_data, weights) ], "weight_shares": weight_shares, "widths_px": widths_px, "width_ratios": width_ratios, } def _aggregate_zone_signals_per_track( preset: dict, zones_data: list[dict], ) -> tuple[list[dict], list[dict]]: """Build per-row + per-col virtual zones for 2-D dynamic dispatch. Each virtual zone aggregates content_weight.score (max) and min_height_px (max) across single-span zones on that track (occupied_rows == {r} for rows, occupied_cols == {c} for cols). Falls back to all-span zones (touching every track on the axis) when a track has no single-span zone. """ rows_grid, _ = _parse_css_areas(preset["css_areas"]) R = len(rows_grid) C = len(rows_grid[0]) occupancy: list[tuple[dict, set[int], set[int]]] = [] for z in zones_data: pos = z["position"] occ_rows = {r for r, row in enumerate(rows_grid) if pos in row} occ_cols = { c for row in rows_grid for c, tok in enumerate(row) if tok == pos } occupancy.append((z, occ_rows, occ_cols)) def _track_virtual(idx: int, axis: str) -> dict: if axis == "row": single = [z for z, rr, _cc in occupancy if rr == {idx}] allspan = [z for z, rr, _cc in occupancy if rr == set(range(R))] else: single = [z for z, _rr, cc in occupancy if cc == {idx}] allspan = [z for z, _rr, cc in occupancy if cc == set(range(C))] candidates = single or allspan return { "position": f"_virtual_{axis}_{idx}", "template_id": f"_virtual_{axis}_{idx}", "content_weight": { "score": max(c["content_weight"]["score"] for c in candidates) }, "min_height_px": max( c.get("min_height_px", DEFAULT_ZONE_MIN_HEIGHT_PX) for c in candidates ), } row_virtuals = [_track_virtual(r, "row") for r in range(R)] col_virtuals = [_track_virtual(c, "col") for c in range(C)] return row_virtuals, col_virtuals def _compute_per_zone_geometry( layout_css: dict, debug_zones: list[dict], gap: int = GRID_GAP, ) -> list[dict]: """Aggregate grid-track sizes into per-zone dimensions for ALL layouts. Parses layout_css["areas"] (catalog css_areas) into an R x C cell grid, then for each zone in debug_zones sums the heights_px of its occupied rows and widths_px of its occupied columns, including the inter-track gap absorbed by a spanning zone. Length contract: layout_css["heights_px"] MUST have length R, and layout_css["widths_px"] MUST have length C. Mismatch raises ValueError because that indicates a broken build path, not a runtime input issue. """ rows_grid, _ = _parse_css_areas(layout_css["areas"]) R = len(rows_grid) C = len(rows_grid[0]) heights_px = layout_css.get("heights_px") or [] widths_px = layout_css.get("widths_px") or [] if len(heights_px) != R: raise ValueError( f"_compute_per_zone_geometry: heights_px length " f"{len(heights_px)} != grid rows R={R} " f"(css_areas={layout_css.get('areas')!r})" ) if len(widths_px) != C: raise ValueError( f"_compute_per_zone_geometry: widths_px length " f"{len(widths_px)} != grid cols C={C} " f"(css_areas={layout_css.get('areas')!r})" ) per_zone: list[dict] = [] for dz in debug_zones: pos = dz["position"] occupied_rows = sorted( {r for r, row in enumerate(rows_grid) if pos in row} ) occupied_cols = sorted( {c for r, row in enumerate(rows_grid) for c, tok in enumerate(row) if tok == pos} ) if not occupied_rows or not occupied_cols: raise ValueError( f"_compute_per_zone_geometry: zone position {pos!r} " f"not present in css_areas {rows_grid}" ) zh = ( sum(heights_px[r] for r in occupied_rows) + gap * (len(occupied_rows) - 1) ) zw = ( sum(widths_px[c] for c in occupied_cols) + gap * (len(occupied_cols) - 1) ) per_zone.append({ "position": pos, "zone_height_px": zh, "zone_width_px": zw, "zone_height_ratio": round(zh / SLIDE_BODY_HEIGHT, 3), "zone_width_ratio": round(zw / SLIDE_BODY_WIDTH, 3), }) return per_zone def _build_fr_default(preset: dict) -> dict: """fr-default sink — populate widths_px / heights_px from catalog fr ratios. Replaces the legacy empty-array sink so that downstream consumers (Step 7/8 trace, _compute_per_zone_geometry) always receive length-locked arrays matching the catalog grid dimensions. """ rows_grid, _ = _parse_css_areas(preset["css_areas"]) R = len(rows_grid) C = len(rows_grid[0]) avail_h = SLIDE_BODY_HEIGHT - GRID_GAP * (R - 1) avail_w = SLIDE_BODY_WIDTH - GRID_GAP * (C - 1) heights_px = _parse_fr_string(preset["css_rows"], avail_h) widths_px = _parse_fr_string(preset["css_cols"], avail_w) return { "areas": preset["css_areas"], "cols": preset["css_cols"], "rows": preset["css_rows"], "heights_px": heights_px, "widths_px": widths_px, "ratios": [round(h / SLIDE_BODY_HEIGHT, 3) for h in heights_px], "width_ratios": [round(w / SLIDE_BODY_WIDTH, 3) for w in widths_px], "computation": "fr_default_from_preset", "dynamic_rows": False, "dynamic_cols": False, "raw_zone_layout": None, } def _build_rows_dynamic(preset: dict, zones_data: list[dict], gap: int = GRID_GAP) -> dict: """horizontal-2 path — dynamic row heights, static fr column widths. Preserves the legacy compute_zone_layout output (heights_px / ratios / computation / raw_zone_layout) byte-for-byte; only adds the new col-axis keys (widths_px from css_cols fr, width_ratios, dynamic_cols=False). """ rows_grid, _ = _parse_css_areas(preset["css_areas"]) C = len(rows_grid[0]) avail_w = SLIDE_BODY_WIDTH - gap * (C - 1) widths_px = _parse_fr_string(preset["css_cols"], avail_w) zl = compute_zone_layout(zones_data, gap=gap) rows_str = " ".join(f"{h}px" for h in zl["heights_px"]) return { "areas": preset["css_areas"], "cols": preset["css_cols"], "rows": rows_str, "heights_px": zl["heights_px"], "widths_px": widths_px, "ratios": zl["ratios"], "width_ratios": [round(w / SLIDE_BODY_WIDTH, 3) for w in widths_px], "computation": zl["computation"], "dynamic_rows": True, "dynamic_cols": False, "raw_zone_layout": zl, } def _build_grid_dynamic_2d(preset: dict, zones_data: list[dict], gap: int = GRID_GAP) -> dict: """2-D dynamic path — dynamic row heights + dynamic column widths. IMP-09 PR 2 (B-4) handler for the five preset families whose topology is neither pure 'rows' nor pure 'cols': - T (top-1-bottom-2) - inverted-T (top-2-bottom-1) - side-T-left (left-1-right-2) - side-T-right (left-2-right-1) - 2x2 (grid-2x2) Strategy: 1) _aggregate_zone_signals_per_track builds R per-row + C per-col virtual zones (max content_weight.score + max min_height_px of single-span zones, falling back to all-span zones). 2) Row virtuals → compute_zone_layout → heights_px (R). 3) Col virtuals → compute_zone_layout_cols → widths_px (C). 4) Assemble layout_css dict with computation='2d_dynamic_aggregated' and dynamic_rows=True, dynamic_cols=True. raw_zone_layout carries both solver outputs + the virtual zone lists so step08 trace can explain the per-track aggregation. """ row_virtuals, col_virtuals = _aggregate_zone_signals_per_track( preset, zones_data ) zl_row = compute_zone_layout(row_virtuals, gap=gap) zl_col = compute_zone_layout_cols(col_virtuals, gap=gap) heights_px = zl_row["heights_px"] widths_px = zl_col["widths_px"] rows_str = " ".join(f"{h}px" for h in heights_px) cols_str = " ".join(f"{w}px" for w in widths_px) return { "areas": preset["css_areas"], "cols": cols_str, "rows": rows_str, "heights_px": heights_px, "widths_px": widths_px, "ratios": [round(h / SLIDE_BODY_HEIGHT, 3) for h in heights_px], "width_ratios": [round(w / SLIDE_BODY_WIDTH, 3) for w in widths_px], "computation": "2d_dynamic_aggregated", "dynamic_rows": True, "dynamic_cols": True, "raw_zone_layout": { "row_layout": zl_row, "col_layout": zl_col, "row_virtuals": row_virtuals, "col_virtuals": col_virtuals, }, } def _build_cols_dynamic(preset: dict, zones_data: list[dict], gap: int = GRID_GAP) -> dict: """vertical-2 path — dynamic column widths, static fr row heights. Mirror of _build_rows_dynamic. Returns a pixel grid-template-columns string. PR 2 promotes vertical-2 override to dynamic_rows=True; in PR 1 dynamic_rows stays False (legacy). """ rows_grid, _ = _parse_css_areas(preset["css_areas"]) R = len(rows_grid) avail_h = SLIDE_BODY_HEIGHT - gap * (R - 1) heights_px = _parse_fr_string(preset["css_rows"], avail_h) zl = compute_zone_layout_cols(zones_data, gap=gap) cols_str = " ".join(f"{w}px" for w in zl["widths_px"]) return { "areas": preset["css_areas"], "cols": cols_str, "rows": preset["css_rows"], "heights_px": heights_px, "widths_px": zl["widths_px"], "ratios": [round(h / SLIDE_BODY_HEIGHT, 3) for h in heights_px], "width_ratios": zl["width_ratios"], "computation": zl["computation"], "dynamic_rows": False, "dynamic_cols": True, "raw_zone_layout": zl, } def _override_to_grid_tracks( preset: dict, zones_data: list[dict], override_zone_geometries: dict[str, dict], gap: int = GRID_GAP, ) -> dict: """2-D override path — derive heights_px (R) + widths_px (C) from user-supplied zone_id -> {x, y, w, h} (0~1 within slide-body). IMP-09 PR 2 (B-4) override handler for the five preset families whose topology is neither pure 'rows' nor pure 'cols': - T (top-1-bottom-2) - inverted-T (top-2-bottom-1) - side-T-left (left-1-right-2) - side-T-right (left-2-right-1) - 2x2 (grid-2x2) Strategy: 1) Parse css_areas into R x C grid. 2) For each row r: aggregate h via max over single-row zones (occupied_rows == {r}); fallback to all-span zones; else 0.0. 3) For each col c: same with w. 4) Normalize per-axis (divide by total) and multiply by avail_*, absorbing rounding diff into the last element. 5) If total_h or total_w == 0 (degenerate / empty override), fall back to _build_grid_dynamic_2d default path. """ rows_grid, _ = _parse_css_areas(preset["css_areas"]) R = len(rows_grid) C = len(rows_grid[0]) # Hot-fix (2026-05-22): partial override 버그 fix — override 없는 track 은 # default 비율로 fallback. 이전엔 0 반환 → normalize 후 다른 track 이 모든 공간 흡수. _default_result = _build_grid_dynamic_2d(preset, zones_data, gap=gap) _default_widths = _default_result.get("widths_px", []) or [] _default_heights = _default_result.get("heights_px", []) or [] _sum_w = sum(_default_widths) if _default_widths else 1.0 _sum_h = sum(_default_heights) if _default_heights else 1.0 occupancy: list[tuple[dict, set[int], set[int]]] = [] for z in zones_data: pos = z["position"] occ_rows = {r for r, row in enumerate(rows_grid) if pos in row} occ_cols = { c for row in rows_grid for c, tok in enumerate(row) if tok == pos } occupancy.append((z, occ_rows, occ_cols)) def _track_value(idx: int, axis: str) -> float: if axis == "row": single = [z for z, rr, _cc in occupancy if rr == {idx}] allspan = [z for z, rr, _cc in occupancy if rr == set(range(R))] key = "h" _fallback = (_default_heights[idx] / _sum_h) if idx < len(_default_heights) and _sum_h else (1.0 / R) else: single = [z for z, _rr, cc in occupancy if cc == {idx}] allspan = [z for z, _rr, cc in occupancy if cc == set(range(C))] key = "w" _fallback = (_default_widths[idx] / _sum_w) if idx < len(_default_widths) and _sum_w else (1.0 / C) candidates = single or allspan vals = [ float(override_zone_geometries[z["position"]][key]) for z in candidates if z["position"] in override_zone_geometries ] return max(vals) if vals else _fallback row_values = [_track_value(r, "row") for r in range(R)] col_values = [_track_value(c, "col") for c in range(C)] total_h = sum(row_values) total_w = sum(col_values) if total_h == 0 or total_w == 0: return _build_grid_dynamic_2d(preset, zones_data, gap=gap) row_ratios = [v / total_h for v in row_values] col_ratios = [v / total_w for v in col_values] avail_h = SLIDE_BODY_HEIGHT - gap * (R - 1) avail_w = SLIDE_BODY_WIDTH - gap * (C - 1) heights_px = [int(round(r * avail_h)) for r in row_ratios] widths_px = [int(round(r * avail_w)) for r in col_ratios] diff_h = avail_h - sum(heights_px) if diff_h != 0 and heights_px: heights_px[-1] += diff_h diff_w = avail_w - sum(widths_px) if diff_w != 0 and widths_px: widths_px[-1] += diff_w rows_str = " ".join(f"{h}px" for h in heights_px) cols_str = " ".join(f"{w}px" for w in widths_px) return { "areas": preset["css_areas"], "cols": cols_str, "rows": rows_str, "heights_px": heights_px, "widths_px": widths_px, "ratios": [round(rr, 3) for rr in row_ratios], "width_ratios": [round(rr, 3) for rr in col_ratios], "computation": "user_override_geometry", "dynamic_rows": True, "dynamic_cols": True, "raw_zone_layout": { "override_applied": True, "source": override_zone_geometries, }, } # Layout preset → zone position 순서 = LAYOUT_PRESETS[preset]["positions"] 직접 사용. # 이전 ZONE_POSITIONS_BY_PRESET (type-b 등 legacy 명) 는 dead code 로 제거 (2026-04-29). def build_layout_css(layout_preset: str, zones_data: list[dict], gap: int = GRID_GAP, override_zone_geometries: Optional[dict[str, dict]] = None) -> dict: """Composition v0 layout preset → CSS grid 문자열. IMP-09 PR 1 contract — every layout_css return path carries matching-length heights_px (= grid rows R) and widths_px (= grid cols C), plus ratios / width_ratios / dynamic_rows / dynamic_cols. The horizontal-2 grid CSS strings (areas/cols/rows) remain byte-identical to the legacy path. Dynamic dispatch: - topology="rows" -> _build_rows_dynamic (horizontal-2: row heights) - topology="cols" -> _build_cols_dynamic (vertical-2: col widths) - other topologies (single / T / inverted-T / side-T / 2x2) fall through to _build_fr_default in PR 1; PR 2 enables the 2-D dispatcher. Step D-ext (사용자 lock 2026-05-08) — override_zone_geometries (zone_id -> {x,y,w,h} slide-body 내부 0~1) 가 들어오면 그 비율로 layout_css 강제. PR 1 lock: horizontal-2 / vertical-2 만 처리 (legacy inline preserve). 다른 preset 은 warn-and-fallthrough (PR 2 가 unified _override_to_grid_tracks 로 promote). """ preset = LAYOUT_PRESETS[layout_preset] positions = preset["positions"] topology = preset.get("topology") # ── Step D-ext : user override 처리 ── if override_zone_geometries: if layout_preset == "horizontal-2": # IMP-44 u1 — unknown-key guard: drop foreign-preset keys # (예: vertical-2 keys {left,right} sent to horizontal-2), emit # structured warning, keep matching keys. All-unknown → fall # through to default dynamic dispatch (no false override_applied). unknown_keys = sorted( k for k in override_zone_geometries if k not in positions ) if unknown_keys: print( f" [override-warning] layout_preset={layout_preset} " f"expected_positions={list(positions)} unknown_keys={unknown_keys} " f"(dropped foreign-preset keys; default split for non-overridden).", file=sys.stderr, ) filtered_overrides = { k: v for k, v in override_zone_geometries.items() if k in positions } if filtered_overrides: # heights_px override — zone 의 h 비율로 SLIDE_BODY_HEIGHT 분배. # Hot-fix (2026-05-22): partial override = 나머지 공간을 비-override zone 들에 # 균등 분배 (drag boundary intent). 이전엔 0.0 fallback → 100/0 깨짐. overridden_h = sum( float(filtered_overrides[p]["h"]) for p in positions if p in filtered_overrides ) non_overridden = [p for p in positions if p not in filtered_overrides] per_non = max(0.0, 1.0 - overridden_h) / max(len(non_overridden), 1) ratios = [] for pos in positions: geom = filtered_overrides.get(pos) ratios.append(float(geom["h"]) if geom else per_non) total = sum(ratios) if total > 0: heights_px = [int(round(r / total * SLIDE_BODY_HEIGHT)) for r in ratios] rows = " ".join(f"{h}px" for h in heights_px) return { "areas": preset["css_areas"], "cols": preset["css_cols"], "rows": rows, "heights_px": heights_px, "widths_px": [SLIDE_BODY_WIDTH], "ratios": [round(r / total, 3) for r in ratios], "width_ratios": [1.0], "computation": "user_override_geometry", "dynamic_rows": True, "dynamic_cols": False, "raw_zone_layout": {"override_applied": True, "source": filtered_overrides}, } elif layout_preset == "vertical-2": # IMP-44 u1 — unknown-key guard: drop foreign-preset keys # (예: horizontal-2 keys {top,bottom} sent to vertical-2), emit # structured warning, keep matching keys. All-unknown → fall # through to default dynamic dispatch (no false override_applied). unknown_keys = sorted( k for k in override_zone_geometries if k not in positions ) if unknown_keys: print( f" [override-warning] layout_preset={layout_preset} " f"expected_positions={list(positions)} unknown_keys={unknown_keys} " f"(dropped foreign-preset keys; default split for non-overridden).", file=sys.stderr, ) filtered_overrides = { k: v for k, v in override_zone_geometries.items() if k in positions } if filtered_overrides: # cols override — zone 의 w 비율로 fr 분배 (legacy: fr-string cols). # PR 1 keeps fr-string cols for legacy preserve; widths_px is # populated in pixels for _compute_per_zone_geometry length contract. # Hot-fix (2026-05-22): partial override = 나머지 공간을 비-override zone 들에 # 균등 분배 (drag boundary intent). 이전엔 0.0 fallback → 100/0 깨짐. overridden_w = sum( float(filtered_overrides[p]["w"]) for p in positions if p in filtered_overrides ) non_overridden = [p for p in positions if p not in filtered_overrides] per_non = max(0.0, 1.0 - overridden_w) / max(len(non_overridden), 1) ratios = [] for pos in positions: geom = filtered_overrides.get(pos) ratios.append(float(geom["w"]) if geom else per_non) total = sum(ratios) if total > 0: cols = " ".join(f"{round(r / total * 100, 2)}fr" for r in ratios) normalized = [r / total for r in ratios] widths_px = [ int(round(rr * (SLIDE_BODY_WIDTH - gap * (len(ratios) - 1)))) for rr in normalized ] diff = (SLIDE_BODY_WIDTH - gap * (len(ratios) - 1)) - sum(widths_px) if diff != 0 and widths_px: widths_px[-1] += diff return { "areas": preset["css_areas"], "cols": cols, "rows": preset["css_rows"], "heights_px": [SLIDE_BODY_HEIGHT], "widths_px": widths_px, "ratios": [1.0], "width_ratios": [round(rr, 3) for rr in normalized], "computation": "user_override_geometry", "dynamic_rows": False, "dynamic_cols": True, "raw_zone_layout": {"override_applied": True, "source": filtered_overrides}, } elif topology in ("T", "inverted-T", "side-T-left", "side-T-right", "2x2"): # IMP-09 PR 2 — 2-D override path (T / inverted-T / side-T / 2x2). # Degenerate inputs (total_h == 0 or total_w == 0) fall back to # _build_grid_dynamic_2d inside the helper. # # IMP-44 u2 — unknown-key guard mirrors u1 (1-D): drop foreign- # preset keys (예: vertical-2 keys {left,right} sent to T-preset), # emit structured warning, keep matching keys. All-unknown → fall # through to _build_grid_dynamic_2d default (no false override_applied). unknown_keys = sorted( k for k in override_zone_geometries if k not in positions ) if unknown_keys: print( f" [override-warning] layout_preset={layout_preset} " f"expected_positions={list(positions)} unknown_keys={unknown_keys} " f"(dropped foreign-preset keys; default split for non-overridden).", file=sys.stderr, ) filtered_overrides = { k: v for k, v in override_zone_geometries.items() if k in positions } if filtered_overrides: return _override_to_grid_tracks( preset, zones_data, filtered_overrides, gap=gap ) return _build_grid_dynamic_2d(preset, zones_data, gap=gap) else: # warn-and-fallthrough preserved for remaining presets (single). # PR 3 territory. print( f" [override-warning] zone-geometry override 는 layout '{layout_preset}' 미지원 " f"(현재 horizontal-2 / vertical-2 / 2-D presets 만). default layout_css 사용.", file=sys.stderr, ) # ── Dynamic branch — topology dispatch ── if topology == "rows": return _build_rows_dynamic(preset, zones_data, gap) if topology == "cols": return _build_cols_dynamic(preset, zones_data, gap) if topology in ("T", "inverted-T", "side-T-left", "side-T-right", "2x2"): return _build_grid_dynamic_2d(preset, zones_data, gap) # PR 3 will dispatch single here. return _build_fr_default(preset) # ─── Abort ────────────────────────────────────────────────────── def abort_with_error(run_dir: Path, section: MdxSection, match: Optional[V4Match], stage: str, reason: str): error_data = { "section": {"id": section.section_id, "title": section.title}, "frame": { "id": match.frame_id if match else None, "number": match.frame_number if match else None, "template_id": match.template_id if match else None, }, "v4_label": match.label if match else None, "phase_z_status": to_phase_z_status(match) if match else None, "confidence": match.confidence if match else None, "stage": stage, "reason": reason, } run_dir.mkdir(parents=True, exist_ok=True) err_path = run_dir / "error.json" err_path.write_text(json.dumps(error_data, ensure_ascii=False, indent=2), encoding="utf-8") print(f"\n[Phase Z-2 MVP-1.5b] ABORT @ {stage}", file=sys.stderr) print(f" section : {section.section_id} — {section.title}", file=sys.stderr) if match: print(f" frame : {match.frame_id} ({match.template_id})", file=sys.stderr) print(f" status : V4 label '{match.label}' → Phase Z '{to_phase_z_status(match)}'", file=sys.stderr) print(f" reason : {reason}", file=sys.stderr) print(f" error : {err_path}", file=sys.stderr) sys.exit(1) # ─── IMP-06 Step 6 zone-section assignment override (backend/CLI/composition only) ── def _build_position_assignment_plan( units: list, positions: list[str], override_section_assignments: Optional[dict[str, list[str]]], sections_by_id: dict[str, "MdxSection"], override_frames: Optional[dict[str, str]] = None, v4: Optional[dict] = None, ) -> tuple[list[dict], dict]: """IMP-06 (#6 / Codex #6,#7 15-axis lock) — section-to-position assignment plan. Pure helper invoked AFTER ``plan_composition()`` returns ``units`` and AFTER ``override_layout`` has been applied so ``positions`` is the final layout-preset position vocabulary. Locked behavior : - explicit override wins per position - no section id appears in more than one final rendered position - overlapping auto units are skipped WHOLE (no split, no cascade, no replan) - template_id resolution ladder : (1) ``override_frames`` exact ``unit_id`` (catalog validation downstream) (2) exact existing auto unit reuse (same ``source_section_ids``) (3) single-section override -> ``lookup_v4_match_with_fallback`` with ``raw_content=section.raw_content`` (direct executable only) (4) multi-section ad-hoc override (no exact auto + no override-frame) -> skipped_reason='ad_hoc_merged_no_template' - additive trace : ``previous_source_section_ids`` (position-level history), ``skipped_collided_auto_units`` (collision-level), ``uncovered_section_ids`` (post-override coverage gap), ``v4_selector_trace`` (selector failure embed), ``skipped_reason`` for failed assignments Returns ``(plan, summary)`` where : - ``plan`` : list[dict] keyed by position with the per-position record - ``summary`` : dict with applied/skipped counts + uncovered ids NOTE : the helper does NOT mutate ``units`` and does NOT raise on validation failures; caller is responsible for fail-fast validation of unknown zone ids or unknown section ids before calling. """ overrides = override_section_assignments or {} override_frames = override_frames or {} # Section ids reserved by any explicit override. overridden_section_ids: set[str] = set() for _zid, sids in overrides.items(): overridden_section_ids.update(sids) # Build position -> auto unit baseline. Auto plan uses sequential mapping of # ``units`` over ``positions`` (the same order Step 6 of the pipeline uses). auto_by_position: dict[str, object] = {} for i, pos in enumerate(positions): auto_by_position[pos] = units[i] if i < len(units) else None # Reverse lookup : section_id -> auto unit (for collision detection). auto_unit_by_section: dict[str, object] = {} for u in units: for sid in u.source_section_ids: auto_unit_by_section[sid] = u # Track auto units that get whole-skipped because of collision. skipped_collided_unit_ids: set[str] = set() plan: list[dict] = [] def _unit_id(sids: list[str]) -> str: return "+".join(sids) def _resolve_template_for_override(zone_id: str, sids: list[str]) -> tuple[ Optional[str], Optional[str], Optional[dict] ]: """template_id resolution ladder. Returns (template_id, skipped_reason, v4_selector_trace).""" unit_id = _unit_id(sids) # (1) explicit --override-frame for exact unit_id if unit_id in override_frames: return override_frames[unit_id], None, None # (2) exact existing auto unit reuse for u in units: if list(u.source_section_ids) == list(sids): return getattr(u, "frame_template_id", None) or getattr(u, "template_id", None), None, None # (3) single-section selector if len(sids) == 1: sid = sids[0] section = sections_by_id.get(sid) if v4 is None or section is None: return None, "no_v4_section", None raw_content = getattr(section, "raw_content", None) # IMP-08 B-3 : forward sub-section V4 aliases (decimal heading_number) # when canonical ordinal id misses; safe for top-level sids (empty list). alias_keys = list(getattr(section, "v4_alias_keys", []) or []) match, trace = lookup_v4_match_with_fallback( v4, sid, raw_content=raw_content, alias_keys=alias_keys ) if match is None: return None, "no_direct_render_template", trace return match.template_id, None, trace # (4) ad-hoc multi-section override without exact auto + without override-frame return None, "ad_hoc_merged_no_template", None # Iterate positions deterministically. Explicit overrides win. for pos in positions: if pos in overrides: sids = overrides[pos] auto_unit = auto_by_position.get(pos) previous_source_section_ids = ( list(auto_unit.source_section_ids) if auto_unit is not None else [] ) # Sections that the previous auto unit at this position contained but # the explicit override did not take = uncovered post-override. sids_set = set(sids) uncovered_from_previous = [ sid for sid in previous_source_section_ids if sid not in sids_set ] template_id, skipped_reason, selector_trace = _resolve_template_for_override(pos, sids) # IMP-06 Stage 4 (Codex #9 R1 + Claude #9 Catch L + Codex #10) — replaced_auto_unit # populated only when the same position previously had an auto unit and that # auto unit was different from the requested override. Documents "this auto # unit was removed from this position to apply the override" as a distinct # audit fact (vs skipped_collided_auto_units which is cross-position skip). replaced_auto_unit = None if auto_unit is not None and list(auto_unit.source_section_ids) != list(sids): replaced_auto_unit = { "unit_id": _unit_id(list(auto_unit.source_section_ids)), "source_section_ids": list(auto_unit.source_section_ids), "reason": "same_position_override_replacement", } plan.append({ "position": pos, "assignment_source": "cli_override", "unit_id": _unit_id(sids), "source_section_ids": list(sids), "template_id": template_id, "previous_source_section_ids": previous_source_section_ids, "section_assignment_override": { "override_applied": True, "override_source": "cli", "zone_id": pos, "requested_section_ids": list(sids), }, "replaced_auto_unit": replaced_auto_unit, "skipped_collided_auto_units": [], "uncovered_section_ids": uncovered_from_previous, "skipped_reason": skipped_reason, "v4_selector_trace": selector_trace, }) else: # Auto-retain unless overlap with overridden sections. auto_unit = auto_by_position.get(pos) if auto_unit is None: plan.append({ "position": pos, "assignment_source": "empty", "unit_id": None, "source_section_ids": [], "template_id": None, "previous_source_section_ids": [], "section_assignment_override": None, "replaced_auto_unit": None, "skipped_collided_auto_units": [], "uncovered_section_ids": [], "skipped_reason": "no_auto_unit_available", "v4_selector_trace": None, }) continue overlap = [sid for sid in auto_unit.source_section_ids if sid in overridden_section_ids] if overlap: # Whole-skip the auto unit. Sections in the auto unit that were NOT taken # by an override become uncovered. unit_id_str = _unit_id(list(auto_unit.source_section_ids)) skipped_collided_unit_ids.add(unit_id_str) uncovered = [ sid for sid in auto_unit.source_section_ids if sid not in overridden_section_ids ] plan.append({ "position": pos, "assignment_source": "empty", "unit_id": None, "source_section_ids": [], "template_id": None, "previous_source_section_ids": list(auto_unit.source_section_ids), "section_assignment_override": None, "replaced_auto_unit": None, "skipped_collided_auto_units": [{ "unit_id": unit_id_str, "source_section_ids": list(auto_unit.source_section_ids), "reason": "override_collision", }], "uncovered_section_ids": uncovered, "skipped_reason": "override_collision", "v4_selector_trace": None, }) else: plan.append({ "position": pos, "assignment_source": "auto", "unit_id": _unit_id(list(auto_unit.source_section_ids)), "source_section_ids": list(auto_unit.source_section_ids), "template_id": ( getattr(auto_unit, "frame_template_id", None) or getattr(auto_unit, "template_id", None) ), "previous_source_section_ids": list(auto_unit.source_section_ids), "section_assignment_override": None, "replaced_auto_unit": None, "skipped_collided_auto_units": [], "uncovered_section_ids": [], "skipped_reason": None, "v4_selector_trace": None, }) # Summary aggregates. applied = [p for p in plan if p["assignment_source"] == "cli_override"] skipped_assignments = [p for p in plan if p["skipped_reason"] is not None] all_uncovered: list[str] = [] for p in plan: all_uncovered.extend(p.get("uncovered_section_ids", [])) summary = { "section_assignment_overrides_applied": [ {"position": p["position"], "source_section_ids": p["source_section_ids"]} for p in applied ], "section_assignment_overrides_skipped": [ {"position": p["position"], "reason": p["skipped_reason"]} for p in skipped_assignments ], "applied_count": len(applied), "skipped_count": len(skipped_assignments), "uncovered_section_ids": all_uncovered, "skipped_collided_auto_unit_ids": sorted(skipped_collided_unit_ids), } return plan, summary # ─── Slot mapping (catalog-only dispatch) ────────────────────── def _known_contract_ids() -> list[str]: from phase_z2_mapper import load_frame_contracts return list(load_frame_contracts().keys()) def map_mdx_to_slots(section: MdxSection, template_id: str) -> dict: """template_id → slot_payload via catalog contract only. F13/F29/F16 등 모든 frame 의 slot 구조 / cardinality / role / payload builder 는 `templates/phase_z2/catalog/frame_contracts.yaml` 에 선언. legacy hand-coded mapper / MAPPER_BY_TEMPLATE / COLOR_CLASS_BY_KEYWORD / 관련 helper 는 F13/F29/F16 transition (2026-04-29) 후 모두 제거. template_id 가 catalog 에 없으면 ValueError — fallback 없음. 새 frame 추가 = catalog yaml 에 entry 추가 + (필요시) 새 builder/parser 등록. """ contract = get_contract(template_id) if contract is None: raise ValueError( f"No frame_contracts entry for template_id='{template_id}'. " f"Add an entry in templates/phase_z2/catalog/frame_contracts.yaml. " f"Known contracts: {sorted(_known_contract_ids())}." ) return map_with_contract(section, contract) # ─── Asset copy ───────────────────────────────────────────────── def copy_assets(template_id: str, run_dir: Path) -> Optional[Path]: """Frame asset (Figma) 폴더 복사 — frame_id 는 catalog contract 에서 도출. contract 에 `frame_id` 없으면 (asset 없는 frame) None 반환. 이전엔 pipeline.py 에 TEMPLATE_TO_FRAME_ID Python dict 가 있었지만 catalog 로 이전 (2026-04-29). """ contract = get_contract(template_id) frame_id = (contract or {}).get("frame_id") if not frame_id: return None src = ASSETS_SOURCE_BASE / str(frame_id) / "assets" if not src.exists(): return None dst = run_dir / "assets" / template_id dst.parent.mkdir(parents=True, exist_ok=True) if dst.exists(): shutil.rmtree(dst) shutil.copytree(src, dst) return dst # ─── Render (single slide + Type B) ──────────────────────────── def _read_token_css() -> str: token_dir = PROJECT_ROOT / "templates" / "styles" / "tokens" files = ["typography.css", "spacing.css", "colors.css"] parts = [] for f in files: path = token_dir / f if path.exists(): parts.append(f"/* === {f} === */\n{path.read_text(encoding='utf-8')}") return "\n\n".join(parts) def _attempt_zone_ratio_retry( *, run_dir: Path, out_path: Path, slide_title: str, slide_footer: Optional[str], zones_data: list[dict], debug_zones: list[dict], layout_preset: str, layout_css: dict, overflow: dict, fit_classification: dict, router_decision: dict, gap_px: int, ) -> dict: """A3 zone_ratio_retry orchestration. locked rules : - retry budget = 1 - slide-base / spacing / gap 고정 - target zone height 만 증가, sibling donor 에서 같은 양 차감 - donor 룰 strict (visual ok / capacity ok / slack > 0 / min_height 보존) - (b) revert : redistribution fail 또는 rerender 후 visual fail 시 original final.html 그대로 Returns: retry_trace dict (always returned, even when no retry attempted) with : retry_attempted : bool retry_action : 'zone_ratio_retry' or None plan : phase_z2_retry.plan_zone_ratio_retry() 결과 (있을 때만) rerender_attempted : bool retry_passed : bool retry_failure_reason : str or None retried_candidate_path : str or None (rerender 한 경우 진단 artifact 경로) post_retry_overflow : dict (retry_passed=True 일 때만) post_retry_debug_zones : list (retry_passed=True 일 때만 — height_px 갱신본) post_retry_layout_css : dict (retry_passed=True 일 때만) """ base_trace = { "retry_attempted": False, "retry_action": None, "plan": None, "rerender_attempted": False, "retry_passed": False, "retry_failure_reason": None, "retried_candidate_path": None, "safety_margin_px": DEFAULT_SAFETY_MARGIN_PX, "policy": ( "A3 locked rules : retry budget=1, slide-base/spacing/gap fixed, " "donor strict (sibling/visual ok/capacity ok/slack>0/min_height 보존), " "(b) revert on redistribution fail or rerender visual fail." ), } # 1. retry attempt 자체가 적절한지 판단 if not router_decision.get("router_active"): base_trace["retry_skipped_reason"] = "router_active=False (visual check passed — no overflow)" return base_trace proposed = router_decision.get("proposed_actions_summary") or [] if "zone_ratio_retry" not in proposed: base_trace["retry_skipped_reason"] = ( f"zone_ratio_retry not in proposed_actions {proposed} (다른 action category)" ) return base_trace # IMP-09 PR 1 retry gate — row-axis retry is only valid for layouts whose # row geometry is dynamic. 2-D / dynamic_cols layouts and fr_default sinks # would either misapply row-only redistribution or produce a no-op trace. if layout_css.get("dynamic_cols", False): base_trace["retry_skipped_reason"] = ( "layout has dynamic_cols (2-D topology) — " "row-axis retry not applicable to 2-D layouts (IMP-09 lock)" ) return base_trace if not layout_css.get("dynamic_rows", False): base_trace["retry_skipped_reason"] = ( "layout is fr_default_from_preset (no dynamic geometry) — retry no-op" ) return base_trace # 2. plan base_trace["retry_attempted"] = True base_trace["retry_action"] = "zone_ratio_retry" plan = plan_zone_ratio_retry( debug_zones=debug_zones, overflow=overflow, fit_classification=fit_classification, router_decision=router_decision, safety_margin_px=DEFAULT_SAFETY_MARGIN_PX, ) base_trace["plan"] = plan if plan is None: base_trace["retry_failure_reason"] = "plan_zone_ratio_retry returned None — no target classification matched zone_ratio_retry" return base_trace if not plan.get("feasible"): # redistribution check 실패 → rerender 안 함, original final.html 그대로 base_trace["retry_failure_reason"] = plan.get("failure_reason") print( f" retry : zone_ratio_retry redistribution INFEASIBLE — " f"target {plan['target_zone_position']} needs {plan['target_added_px']}px, " f"{plan.get('failure_reason')}" ) return base_trace # 3. feasible — apply plan to layout_css, rerender to candidate path (NOT final.html yet) new_layout_css = apply_retry_to_layout_css( layout_css, plan, zones_data, total_height=SLIDE_BODY_HEIGHT, gap_px=gap_px, ) candidate_path = run_dir / "retried_candidate.html" candidate_html = render_slide( slide_title, slide_footer, zones_data, layout_preset, new_layout_css, gap_px=gap_px, ) candidate_path.write_text(candidate_html, encoding="utf-8") base_trace["rerender_attempted"] = True base_trace["retried_candidate_path"] = str(candidate_path.relative_to(PROJECT_ROOT)) print( f" retry : zone_ratio_retry attempted — target {plan['target_zone_position']} " f"+{plan['target_added_px']}px (donor {plan['donor_zone_position']} -{plan['donor_reduced_px']}px) " f"→ rerender to retried_candidate.html → visual check" ) # 4. 후 visual check on candidate candidate_overflow = run_overflow_check(candidate_path) if candidate_overflow.get("passed", False): # 성공 — final.html 을 candidate 로 promote out_path.write_text(candidate_html, encoding="utf-8") # debug_zones height_px / ratio 갱신 (post-retry 상태) new_heights = new_layout_css["heights_px"] new_ratios = new_layout_css["ratios"] post_retry_debug_zones = [] for i, dz in enumerate(debug_zones): new_dz = dict(dz) new_dz["height_px"] = new_heights[i] if i < len(new_heights) else dz.get("height_px") new_dz["ratio"] = new_ratios[i] if i < len(new_ratios) else dz.get("ratio") new_dz["zone_height_post_retry"] = True post_retry_debug_zones.append(new_dz) base_trace["retry_passed"] = True base_trace["post_retry_overflow"] = candidate_overflow base_trace["post_retry_debug_zones"] = post_retry_debug_zones base_trace["post_retry_layout_css"] = new_layout_css print(f" retry : PASSED — final.html promoted to retried version") return base_trace # 5. rerender 후에도 visual fail → (b) revert : final.html 은 original 그대로 (이미 written) base_trace["retry_passed"] = False base_trace["retry_failure_reason"] = ( f"rerender visual_check failed: {candidate_overflow.get('fail_reasons')}. " f"reverting to original final.html (retried_candidate.html stays as diagnostic only)." ) base_trace["candidate_overflow_summary"] = { "passed": False, "fail_reasons": candidate_overflow.get("fail_reasons", []), } print(f" retry : FAILED — candidate visual_check 도 실패. revert to original. ({candidate_path.name} 은 diagnostic 으로 보존)") return base_trace # IMP-12 u8 — Step 17 salvage cascade orchestrator (deterministic, no normal-path AI). # Plan/apply pairs: phase_z2_retry (u4/u5/u6). Routing: failure_router.route_retry_failure (u3). # Pipeline wiring (cascade_inputs assembly + retry_trace merge) lands in u9. # IMP-88 u6 — extended with layout_adjust + frame_internal_fit_candidate dispatch. # Mirror of failure_router.SALVAGE_FAILURE_TYPE_BY_ACTION (single source-of-truth lives # there; this local map gates which actions the salvage loop can execute and feeds the # loop-cap range(len(...)) so cascade depth scales with implemented executors). _SALVAGE_FAIL_BY_ACTION = { "cross_zone_redistribute": "cross_zone_redistribute_insufficient", "glue_compression": "glue_absorption_insufficient", "font_step_compression": "font_step_insufficient", "layout_adjust": "layout_adjust_insufficient", "frame_internal_fit_candidate": "frame_internal_fit_candidate_insufficient", } def _attempt_salvage_chain( *, run_dir: Path, out_path: Path, slide_title: str, slide_footer: Optional[str], zones_data: list[dict], layout_preset: str, layout_css: dict, cascade_inputs: dict, initial_failure_type: str, gap_px: int, ) -> dict: """IMP-12 u8 — deterministic Step 17 salvage cascade (cross_zone → glue → font_step). Per stage: plan → apply CSS → rerender → run_overflow_check. PASS promotes final.html; cascade-exit (layout_adjust/frame_reselect/none) or all-fail preserves (b) revert. Honors IMP-09 dynamic_cols / fr_default gate. """ trace = {"salvage_attempted": False, "salvage_passed": False, "salvage_steps": []} if layout_css.get("dynamic_cols", False) or not layout_css.get("dynamic_rows", False): trace["salvage_skipped_reason"] = "IMP-09 gate — dynamic_cols/no dynamic_rows; cascade no-op" return trace trace["salvage_attempted"] = True failure_type = initial_failure_type ci = cascade_inputs for _ in range(len(_SALVAGE_FAIL_BY_ACTION)): routing = route_retry_failure(failure_type) or {} next_action = routing.get("next_proposed_action") if next_action not in _SALVAGE_FAIL_BY_ACTION: trace["salvage_terminal_action"] = next_action trace["salvage_terminal_rationale"] = routing.get("rationale") return trace # IMP-88 u6 — layout_adjust takes a distinct render path (fresh # render_slide with the new preset + remapped zones_data + new # layout_css), so it is dispatched BEFORE the shared CSS-overlay # planner cluster below. No common margin / slide-body shrink # ([[feedback_phase_z_spacing_direction]]) — topology swap only. if next_action == "layout_adjust": plan = plan_layout_adjust( current_layout_preset=layout_preset, zones_data=zones_data) new_layout_css = ( apply_layout_adjust_layout_css(plan, gap_px=gap_px) if (plan and plan.get("feasible")) else None ) candidate_path = run_dir / f"salvage_{next_action}_candidate.html" candidate_html, candidate_overflow, passed = None, None, False if new_layout_css: candidate_html = render_slide( slide_title, slide_footer, plan["new_zones_data"], plan["new_layout_preset"], new_layout_css, gap_px=gap_px, ) candidate_path.write_text(candidate_html, encoding="utf-8") candidate_overflow = run_overflow_check(candidate_path) passed = bool(candidate_overflow.get("passed", False)) step = { "action": next_action, "plan": plan, "passed": passed, "new_layout_preset": ( plan.get("new_layout_preset") if isinstance(plan, dict) else None ), "candidate_path": ( str(candidate_path.relative_to(PROJECT_ROOT)) if new_layout_css else None ), } if passed: out_path.write_text(candidate_html, encoding="utf-8") step["post_salvage_overflow"] = candidate_overflow trace["salvage_steps"].append(step) trace["salvage_passed"] = True return trace step["failure_reason"] = ( (plan.get("failure_reason") if isinstance(plan, dict) else None) or (candidate_overflow.get("fail_reasons") if candidate_overflow else None) or "infeasible or render emitted no candidate") trace["salvage_steps"].append(step) failure_type = _SALVAGE_FAIL_BY_ACTION[next_action] continue if next_action == "cross_zone_redistribute": if ci.get("fit_analysis") is None: plan = {"action": "cross_zone_redistribute", "feasible": False, "failure_reason": "cascade_inputs.fit_analysis missing — cross_zone_redistribute requires fit_analysis."} else: plan = plan_cross_zone_redistribute( fit_analysis=ci["fit_analysis"], containers=ci.get("containers") or {}, min_margin_px=ci.get("min_margin_px")) apply_fn = apply_cross_zone_redistribute_css elif next_action == "glue_compression": plan = plan_glue_compression( excess_px=float(ci.get("excess_px") or 0.0), block_count=int(ci.get("block_count") or 0), zone_position=str(ci.get("zone_position") or "")) apply_fn = apply_glue_compression_css elif next_action == "frame_internal_fit_candidate": # IMP-88 u6 — per-zone frame-scoped envelope variant. Resolves # frame_template_id from zones_data via cascade_inputs.zone_position # so the planner stays within the frame contract envelope # (no shared margin shrink per [[feedback_phase_z_spacing_direction]]). _target_pos = str(ci.get("zone_position") or "") _target_zone = next( (z for z in zones_data if z.get("position") == _target_pos), {}, ) or {} _frame_tid = str(_target_zone.get("template_id") or "") plan = plan_frame_internal_fit_candidate( frame_template_id=_frame_tid, overflow_zone={"excess_y": float(ci.get("excess_px") or 0.0)}, ) apply_fn = apply_frame_internal_fit_candidate_css else: plan = plan_font_step_compression( current_font_px=float(ci.get("current_font_px") or 0.0), excess_after_glue_px=float(ci.get("excess_after_glue_px") or ci.get("excess_px") or 0.0), available_lines=int(ci.get("available_lines") or 0), chars_per_line=int(ci.get("chars_per_line") or 0), zone_position=str(ci.get("zone_position") or "")) apply_fn = apply_font_step_compression_css css_override = apply_fn(plan) if (plan and plan.get("feasible")) else "" candidate_path = run_dir / f"salvage_{next_action}_candidate.html" candidate_html, candidate_overflow, passed = None, None, False if css_override: base = render_slide(slide_title, slide_footer, zones_data, layout_preset, layout_css, gap_px=gap_px) style = f"" candidate_html = base.replace("", f"{style}\n", 1) if "" in base else style + base candidate_path.write_text(candidate_html, encoding="utf-8") candidate_overflow = run_overflow_check(candidate_path) passed = bool(candidate_overflow.get("passed", False)) step = {"action": next_action, "plan": plan, "passed": passed, "css_override": css_override or None, "candidate_path": str(candidate_path.relative_to(PROJECT_ROOT)) if css_override else None} if passed: out_path.write_text(candidate_html, encoding="utf-8") step["post_salvage_overflow"] = candidate_overflow trace["salvage_steps"].append(step) trace["salvage_passed"] = True return trace step["failure_reason"] = ( (plan.get("failure_reason") if isinstance(plan, dict) else None) or (candidate_overflow.get("fail_reasons") if candidate_overflow else None) or "infeasible or no CSS emitted") trace["salvage_steps"].append(step) failure_type = _SALVAGE_FAIL_BY_ACTION[next_action] return trace # IMP-88 (#88) u7 — Step 17 image_fit single-pass entry executor. # image_fit is NOT a salvage cascade stage (deliberately OUT of # _SALVAGE_FAIL_BY_ACTION per u6 guard). Instead it is a Step 17 ENTRY # single-pass: aggregate per-image plan/apply via plan_image_fit (u4) + # apply_image_fit_css (u4), emit one CSS overlay, re-render once, # run_overflow_check. PASS promotes final.html; FAIL records `image_fit` # step with failure_reason so failure_router (u2) # SALVAGE_FAILURE_TYPE_BY_ACTION classifies it as `image_fit_insufficient` # and the cascade entry block routes onto layout_adjust. # Honors [[feedback_phase_z_spacing_direction]] — frame-scoped img CSS only, # no common margin / slide-body / zone gap shrink. AI isolation contract # (PZ-1) — deterministic data-surface, no AI call. def _attempt_step17_image_fit_single_pass( *, run_dir: Path, out_path: Path, slide_title: str, slide_footer: Optional[str], zones_data: list[dict], layout_preset: str, layout_css: dict, image_events: list[dict], gap_px: int, delta_tol: float = IMAGE_ASPECT_DELTA_TOL, ) -> dict: """IMP-88 u7 — Step 17 image_fit single-pass executor. Returns a result dict with shape: - triggered : bool — True iff any feasible image_fit plan emitted CSS. - passed : bool — True iff post-rerender overflow check passed. - step : dict|None — salvage_steps[] entry shape (action="image_fit", image_fit_event_plans, candidate_path, post_salvage_overflow on pass / failure_reason on fail). - candidate_html : str|None — rendered HTML when triggered (None otherwise). - candidate_overflow : dict|None — run_overflow_check result when triggered. - event_plans : list[dict] — every plan_image_fit result (feasible + no-op), surfaced for telemetry even when none emit CSS. Side effect on PASS only: writes candidate_html to out_path. """ event_plans = [] css_chunks = [] for ev in (image_events or []): plan = plan_image_fit(image_event=ev, delta_tol=delta_tol) event_plans.append(plan) if plan.get("feasible"): css = apply_image_fit_css(plan) if css: css_chunks.append(css) if not css_chunks: return { "triggered": False, "passed": False, "step": None, "candidate_html": None, "candidate_overflow": None, "event_plans": event_plans, } candidate_path = run_dir / "salvage_image_fit_candidate.html" base = render_slide( slide_title, slide_footer, zones_data, layout_preset, layout_css, gap_px=gap_px, ) style = "" candidate_html = ( base.replace("", f"{style}\n", 1) if "" in base else style + base ) candidate_path.write_text(candidate_html, encoding="utf-8") candidate_overflow = run_overflow_check(candidate_path) passed = bool(candidate_overflow.get("passed", False)) step = { "action": "image_fit", "passed": passed, "image_fit_event_plans": event_plans, "candidate_path": ( str(candidate_path.relative_to(PROJECT_ROOT)) if candidate_path.is_absolute() else str(candidate_path) ), } if passed: step["post_salvage_overflow"] = candidate_overflow out_path.write_text(candidate_html, encoding="utf-8") else: step["failure_reason"] = ( candidate_overflow.get("fail_reasons") or "image_fit single-pass: overflow persists" ) return { "triggered": True, "passed": passed, "step": step, "candidate_html": candidate_html, "candidate_overflow": candidate_overflow, "event_plans": event_plans, } def _remeasure_after_frame_reselect( *, candidate_path: Path, plan: Optional[dict] = None, ) -> dict: """IMP-35 (#64) u1 — post-frame remeasure helper for the cascade terminal. Contract (q4 / Stage 2): frame_reselect_insufficient is detected by an *explicit overflow re-measure* after a V4 top-k alternate frame swap — NOT a failure-flag carryover. This helper runs run_overflow_check on the re-rendered candidate HTML and shapes the salvage_steps entry that classify_retry_failure / SALVAGE_FAILURE_TYPE_BY_ACTION read. Future frame_reselect orchestrator (post-IMP-35) writes the candidate HTML and calls this helper to append the entry to retry_trace.salvage_steps. On passed=True the orchestrator promotes the candidate to final.html; on passed=False the classifier emits frame_reselect_insufficient → u2 routes onto details_popup_escalation (Step 17 POPUP gate / u5). """ candidate_overflow = run_overflow_check(candidate_path) passed = bool(candidate_overflow.get("passed", False)) return { "action": "frame_reselect", "plan": plan, "passed": passed, "candidate_path": ( str(candidate_path.relative_to(PROJECT_ROOT)) if candidate_path.is_absolute() else str(candidate_path) ), "post_salvage_overflow": candidate_overflow, "failure_reason": ( None if passed else (candidate_overflow.get("fail_reasons") or "post-frame remeasure: overflow persists") ), } # ─── IMP-42 u3 (#71) — unconditional Step 12 / Step 13 DIAG log helper ── def _emit_diag_zones_shape(stage_label: str, zones_data: list[dict], **extra_fields) -> None: """IMP-42 u3 (#71) — emit shape-only zone metadata to stdout. Used at the Step 12 slot_payload emit site and the Step 13 render_slide entry site to make the silent 3-hop handoff visible in the terminal. Shape-only — never logs raw slot_payload values — so the diag is sample-agnostic (RULE 0) and never leaks user content. No env gate: silence is the bug this IMP fights, so the log fires unconditionally on every slide loop. """ payload = { "zones_count": len(zones_data), "zones": [ { "i": i, "position": z.get("position"), "template_id": z.get("template_id"), "slot_keys": ( sorted(z["slot_payload"].keys()) if isinstance(z.get("slot_payload"), dict) else None ), } for i, z in enumerate(zones_data) ], } payload.update(extra_fields) print( f"[DIAG] phase_z2 {stage_label} " + json.dumps(payload, ensure_ascii=False, sort_keys=True), flush=True, ) # ─── IMP-42 u2 (#71) — post-render HTML invalid path char detector ── _INVALID_PATH_ATTR_RE = re.compile( r"""(src|href)\s*=\s*["']([^"']*)["']|url\(\s*['"]?([^)'"\s]+)['"]?\s*\)""", re.IGNORECASE, ) _INVALID_PATH_CHARS = ("\\", "&", "'") def _scan_rendered_html_for_invalid_path_chars(html: str, context: str) -> None: """IMP-42 u2 (#71) — fail loud when rendered HTML asset references contain invalid path characters in src / href / url(...) values. Catches three silent fail vectors at the rendered HTML boundary that surface downstream as 404 / asset-load failures far from upstream cause: - Windows backslash from str(Path) (e.g. ``assets\\img.png``). - Autoescape entity ``&`` (raw ``&`` in raw path source string). - Autoescape entity ``'`` (raw ``'`` in raw path source string). Raises ValueError on first hit, citing context, attr type, value snippet. Scope-locked to rendered HTML asset attrs only; never inspects CSS grid metadata or static template strings. """ for match in _INVALID_PATH_ATTR_RE.finditer(html): if match.group(3) is not None: attr_label = "url(...)" value = match.group(3) else: attr_label = match.group(1).lower() value = match.group(2) for bad in _INVALID_PATH_CHARS: if bad in value: snippet = value if len(value) <= 120 else value[:117] + "..." raise ValueError( f"render_slide: {context} — invalid path char {bad!r} in " f"{attr_label} value (value={snippet}). " "Likely upstream: Windows backslash from str(Path) or " "autoescape of '&' / \"'\" in raw path string." ) def render_slide(slide_title: str, slide_footer: Optional[str], zones_data: list[dict], layout_preset: str, layout_css: dict, gap_px: int = GRID_GAP, *, embedded_mode: str = "auto") -> str: """Single slide HTML — slide_base.html + 8-preset layout vocabulary. layout_css = build_layout_css() 결과 — areas/cols/rows 문자열 + 동적 heights flag. Template 은 layout_css.{areas,cols,rows} 를 grid CSS 에 직접 주입. embedded_mode (IMP-14): "auto" | "embedded" | "standalone". Controls slide_base.html body CSS contract. Default "auto" preserves backward-compat with run_overflow_check standalone path and lets iframe consumers signal via ?embedded=1 or window.self!==window.top. """ if embedded_mode not in ("auto", "embedded", "standalone"): raise ValueError( f"render_slide: invalid embedded_mode={embedded_mode!r}; " "expected one of 'auto', 'embedded', 'standalone'" ) # IMP-42 u3 (#71) — unconditional Step 13 entry DIAG log. _emit_diag_zones_shape( "Step 13 render_slide entry", zones_data, layout_preset=layout_preset, embedded_mode=embedded_mode, ) env = Environment( loader=FileSystemLoader(str(TEMPLATE_DIR)), autoescape=select_autoescape(["html"]), ) for zone_index, zone in enumerate(zones_data): # Stage 4 Part 2 (Codex #10 Catch N) — empty zone produced by section # assignment override has no partial template; render an empty string so # the slide_base zones loop preserves grid identity without TemplateNotFound. if zone.get("template_id") == "__empty__": zone["partial_html"] = "" continue # IMP-42 u1 (#71) — fail-loud precondition for Step 13 partial render. # Catches the silent fail vector where Step 12 emits a zone dict missing # `template_id` / `slot_payload`. Error message cites zone_index + # missing key so the diag is actionable (vs Jinja TemplateNotFound / # KeyError surfacing far from the upstream emit site). template_id = zone.get("template_id") if not isinstance(template_id, str) or not template_id: raise TypeError( f"render_slide: zones_data[{zone_index}] precondition failed — " f"`template_id` must be a non-empty str, got {type(template_id).__name__}={template_id!r}" ) if "slot_payload" not in zone: raise TypeError( f"render_slide: zones_data[{zone_index}] precondition failed — " f"`slot_payload` key missing (template_id={template_id!r})" ) slot_payload = zone["slot_payload"] if not isinstance(slot_payload, dict): raise TypeError( f"render_slide: zones_data[{zone_index}] precondition failed — " f"`slot_payload` must be a dict, got {type(slot_payload).__name__} " f"(template_id={template_id!r})" ) partial = env.get_template(f"families/{template_id}.html") rendered_partial = partial.render(slot_payload=slot_payload) # IMP-42 u2 (#71) — fail loud on invalid path chars in rendered HTML # asset refs (src / href / url(...)). Catches Windows backslash and # autoescape entity vectors before they reach the browser as 404. _scan_rendered_html_for_invalid_path_chars( rendered_partial, f"zones_data[{zone_index}] template_id={template_id!r}", ) zone["partial_html"] = rendered_partial base = env.get_template("slide_base.html") rendered_base = base.render( slide_title=slide_title, slide_footer=slide_footer, zones=zones_data, layout_preset=layout_preset, layout_css=layout_css, gap_px=gap_px, token_css=_read_token_css(), embedded_mode=embedded_mode, ) # IMP-42 u2 (#71) — also scan the assembled slide_base output to cover # asset refs introduced by the slide-base shell itself (title / footer / # popup slots) outside the per-zone partial scope. _scan_rendered_html_for_invalid_path_chars(rendered_base, "slide_base") return rendered_base # ─── Selenium check (single slide + per-zone) ────────────────── def run_overflow_check(html_path: Path) -> dict: """Single slide + per-zone overflow + clipping check.""" from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service options = Options() options.add_argument("--headless=new") options.add_argument("--no-sandbox") options.add_argument("--disable-dev-shm-usage") options.add_argument("--window-size=1400,900") chromedriver_candidates = [ PROJECT_ROOT / "chromedriver", PROJECT_ROOT / "chromedriver.exe", ] driver = None last_err = None for path in chromedriver_candidates: if path.is_file(): try: driver = webdriver.Chrome(service=Service(str(path)), options=options) break except Exception as e: last_err = e if driver is None: try: driver = webdriver.Chrome(options=options) except Exception as e: return {"passed": False, "error": f"selenium init failed: {last_err or e}"} try: driver.get(html_path.resolve().as_uri()) driver.set_window_size(1400, 900) driver.implicitly_wait(1) result = driver.execute_script(r""" const measure = (el) => ({ clientWidth: el.clientWidth, clientHeight: el.clientHeight, scrollWidth: el.scrollWidth, scrollHeight: el.scrollHeight, excess_x: Math.max(0, el.scrollWidth - el.clientWidth), excess_y: Math.max(0, el.scrollHeight - el.clientHeight), overflowed: (el.scrollWidth > el.clientWidth + 5) || (el.scrollHeight > el.clientHeight + 5), }); const slide = document.querySelector('.slide'); if (!slide) return { error: '.slide not found' }; const slideM = measure(slide); slideM.size_correct = slide.clientWidth === 1280 && slide.clientHeight === 720; // A-6 (IMP-01 #1) — slide-relative bbox base const slideRect = slide.getBoundingClientRect(); const body = document.querySelector('.slide-body'); const bodyM = body ? measure(body) : null; const zones = []; const zone_geometries_px = []; // IMP-15 실행-2 (issue #46) — element-identity dedup map for table_events. // Map keyed by DOM node reference (NOT class string) so that // two wrappers sharing identical className resolve to distinct map entries. // Populated alongside the existing per-zone clipped_inner scan below. const clippedWrapperMap = new Map(); let clippedIdxCounter = 0; slide.querySelectorAll('.zone').forEach((z) => { const pos = z.getAttribute('data-zone-position') || 'unknown'; const tid = z.getAttribute('data-template-id') || '?'; const m = measure(z); m.position = pos; m.template_id = tid; // A-6 (IMP-01 #1) — zone bbox in slide-relative px (additive trace, no layout side effect) const zoneRect = z.getBoundingClientRect(); zone_geometries_px.push({ position: pos, template_id: tid, x: Math.round(zoneRect.left - slideRect.left), y: Math.round(zoneRect.top - slideRect.top), w: Math.round(zoneRect.width), h: Math.round(zoneRect.height), }); // 내부 clipping 검사 — frame-family root/cell 단위. // tolerance / threshold 그대로. inner_content_signals 만 추가 보강 (detection 데이터 늘림). const clipped = []; z.querySelectorAll('[class*="f13b"], [class*="f29b"], [class*="f16b"]').forEach((el) => { const dx = el.scrollWidth - el.clientWidth; const dy = el.scrollHeight - el.clientHeight; if (dx > 5 || dy > 5) { // inner content signals — clipped cell 안에 *어떤 종류의 콘텐츠가 들어있는지* 보고. // classifier 가 frame_internal_cell 만 봐서는 부족하니 inner 까지 본다. const inner_signals = []; if (el.querySelector('.transform-block, .transform-row, .transform-rows')) { inner_signals.push('structural_unit'); } if (el.querySelector('table')) { inner_signals.push('tabular'); } if (el.querySelector('.text-line')) { inner_signals.push('text_flow'); } clipped.push({ class_name: el.className, inner_content_signals: inner_signals, excess_x: Math.max(0, dx), excess_y: Math.max(0, dy), clientWidth: el.clientWidth, clientHeight: el.clientHeight, scrollWidth: el.scrollWidth, scrollHeight: el.scrollHeight, }); // IMP-15 실행-2 (issue #46) — element-identity registration. // Key by DOM node `el`, NOT className: two wrappers with identical // class string still hash to distinct Map entries. if (!clippedWrapperMap.has(el)) { clippedWrapperMap.set(el, clippedIdxCounter); clippedIdxCounter++; } } }); m.clipped_inner = clipped; zones.push(m); }); // B5 v0 — frame_slot_metrics (per-cell measurement of [data-frame-slot-id]) // 현재 F29 partial 만 marker 보유 (process_column / product_column × 3 cells = 6 entries 기대). // 다른 frame (F13 / F16) 은 marker 미적용 → entry 0 — 정상. const frame_slot_metrics = []; slide.querySelectorAll('[data-frame-slot-id]').forEach((cell) => { const slotId = cell.getAttribute('data-frame-slot-id'); const m2 = measure(cell); const parentZone = cell.closest('.zone'); const zonePos = parentZone ? (parentZone.getAttribute('data-zone-position') || 'unknown') : 'unknown'; const zoneTid = parentZone ? (parentZone.getAttribute('data-template-id') || '?') : '?'; frame_slot_metrics.push({ zone_position: zonePos, zone_template_id: zoneTid, frame_slot_id: slotId, class_name: cell.className, clientWidth: m2.clientWidth, clientHeight: m2.clientHeight, scrollWidth: m2.scrollWidth, scrollHeight: m2.scrollHeight, excess_x: m2.excess_x, excess_y: m2.excess_y, overflowed: m2.overflowed, }); }); // IMP-15 실행-1 (issue #45) — image_events[] for image_aspect_mismatch detection. // 하나의 entry per under .slide. natural vs rendered aspect 비교. // zone_position : closest('.zone') data-zone-position. 없으면 literal "unknown". const image_events = []; slide.querySelectorAll('img').forEach((img) => { const parentZone = img.closest('.zone'); const zonePos = parentZone ? (parentZone.getAttribute('data-zone-position') || 'unknown') : 'unknown'; const zoneTid = parentZone ? (parentZone.getAttribute('data-template-id') || '?') : '?'; const imgRect = img.getBoundingClientRect(); const rendered_w = imgRect.width; const rendered_h = imgRect.height; const natural_w = img.naturalWidth; const natural_h = img.naturalHeight; const natural_ratio = (natural_w > 0 && natural_h > 0) ? (natural_w / natural_h) : null; const rendered_ratio = (rendered_w > 0 && rendered_h > 0) ? (rendered_w / rendered_h) : null; const delta = (natural_ratio !== null && rendered_ratio !== null) ? (rendered_ratio - natural_ratio) : null; image_events.push({ src: img.getAttribute('src') || '', zone_position: zonePos, zone_template_id: zoneTid, natural_w: natural_w, natural_h: natural_h, rendered_w: Math.round(rendered_w), rendered_h: Math.round(rendered_h), natural_ratio: natural_ratio, rendered_ratio: rendered_ratio, delta: delta, bbox: { x: Math.round(imgRect.left - slideRect.left), y: Math.round(imgRect.top - slideRect.top), w: Math.round(rendered_w), h: Math.round(rendered_h), }, }); }); // IMP-15 실행-2 (issue #46) — table_events[] for table_self_overflow detection. // One entry per under .slide. wrapper_clipped_index is the integer index // (from clippedWrapperMap) of the nearest ancestor that is itself in the clipped // wrapper set, or null. Element-identity walk (NOT className) so that two same-class // wrappers (W1 clipped, W2 not) resolve independently for any contained
. const table_events = []; slide.querySelectorAll('table').forEach((tbl) => { const parentZone = tbl.closest('.zone'); const zonePos = parentZone ? (parentZone.getAttribute('data-zone-position') || 'unknown') : 'unknown'; const zoneTid = parentZone ? (parentZone.getAttribute('data-template-id') || '?') : '?'; let wrapper_clipped_index = null; let node = tbl.parentElement; while (node && node !== slide) { if (clippedWrapperMap.has(node)) { wrapper_clipped_index = clippedWrapperMap.get(node); break; } node = node.parentElement; } const tblRect = tbl.getBoundingClientRect(); const dx = tbl.scrollWidth - tbl.clientWidth; const dy = tbl.scrollHeight - tbl.clientHeight; table_events.push({ zone_position: zonePos, zone_template_id: zoneTid, clientWidth: tbl.clientWidth, clientHeight: tbl.clientHeight, scrollWidth: tbl.scrollWidth, scrollHeight: tbl.scrollHeight, excess_x: Math.max(0, dx), excess_y: Math.max(0, dy), wrapper_clipped_index: wrapper_clipped_index, bbox: { x: Math.round(tblRect.left - slideRect.left), y: Math.round(tblRect.top - slideRect.top), w: Math.round(tblRect.width), h: Math.round(tblRect.height), }, }); }); return { slide: slideM, slide_body: bodyM, zones, frame_slot_metrics, zone_geometries_px, image_events, table_events }; """) screenshot_path = html_path.parent / "preview.png" try: driver.save_screenshot(str(screenshot_path)) result["screenshot"] = str(screenshot_path.relative_to(PROJECT_ROOT)) except Exception as e: result["screenshot_error"] = str(e) finally: driver.quit() if "error" in result: return {"passed": False, **result} fail_reasons = [] if not result["slide"]["size_correct"]: fail_reasons.append( f"slide size != 1280x720 (got {result['slide']['clientWidth']}x{result['slide']['clientHeight']})" ) if result["slide"]["overflowed"]: fail_reasons.append( f"slide overflowed by {result['slide']['excess_y']}px (vert) / {result['slide']['excess_x']}px (horiz)" ) if result.get("slide_body") and result["slide_body"]["overflowed"]: fail_reasons.append( f"slide-body overflowed by {result['slide_body']['excess_y']}px (vert)" ) for z in result["zones"]: if z["overflowed"]: fail_reasons.append( f"zone--{z['position']} ({z['template_id']}) overflowed by {z['excess_y']}px (vert) / {z['excess_x']}px (horiz)" ) for c in z.get("clipped_inner", []): fail_reasons.append( f"zone--{z['position']}: inner clipped .{c['class_name']} — " f"excess {c['excess_y']}px vert / {c['excess_x']}px horiz " f"(content {c['scrollHeight']} vs container {c['clientHeight']})" ) # IMP-15 실행-1 (issue #45) — image_aspect_mismatch aggregation. # |natural_ratio - rendered_ratio| > IMAGE_ASPECT_DELTA_TOL ⇒ fail_reason append. # Entries with null ratio (image not loaded / natural dims = 0) are skipped (no false positive). for ev in result.get("image_events", []): delta = ev.get("delta") if delta is None: continue if abs(delta) > IMAGE_ASPECT_DELTA_TOL: n_ratio = ev.get("natural_ratio") r_ratio = ev.get("rendered_ratio") src = ev.get("src", "") pos = ev.get("zone_position", "unknown") tid = ev.get("zone_template_id", "?") fail_reasons.append( f"image aspect mismatch in zone--{pos}: " f"natural={n_ratio:.3f} rendered={r_ratio:.3f} delta={delta:+.3f} " f"(template={tid}, tol={IMAGE_ASPECT_DELTA_TOL}, src={src})" ) # IMP-15 실행-2 (issue #46) — table_self_overflow aggregation. # Emit fail_reason only when (excess_x>TOL OR excess_y>TOL) AND wrapper_clipped_index is None. # The clipped-wrapper case is already accounted for by the clipped_inner fail_reason above; # element-identity dedup (clippedWrapperMap keyed by DOM node ref, NOT className) prevents # double-counting and—critically—prevents two same-class wrappers from masking each other. for ev in result.get("table_events", []): if ev.get("wrapper_clipped_index") is not None: continue excess_x = ev.get("excess_x", 0) or 0 excess_y = ev.get("excess_y", 0) or 0 if excess_x > TABLE_SCROLL_TOL_PX or excess_y > TABLE_SCROLL_TOL_PX: pos = ev.get("zone_position", "unknown") tid = ev.get("zone_template_id", "?") fail_reasons.append( f"table self-overflow in zone--{pos}: " f"excess {excess_y}px vert / {excess_x}px horiz " f"(content {ev.get('scrollWidth')}x{ev.get('scrollHeight')} vs " f"container {ev.get('clientWidth')}x{ev.get('clientHeight')}, " f"template={tid}, tol={TABLE_SCROLL_TOL_PX})" ) result["passed"] = len(fail_reasons) == 0 result["fail_reasons"] = fail_reasons return result def write_overflow_error(run_dir: Path, overflow: dict) -> Path: error_data = { "stage": "visual_runtime_check", "reason": "Visual runtime contract 위반 — slide / slide-body / zone overflow / clipping.", "fail_reasons": overflow.get("fail_reasons", []), "details": overflow, } err_path = run_dir / "error.json" err_path.write_text(json.dumps(error_data, ensure_ascii=False, indent=2), encoding="utf-8") return err_path # ─── Debug.json (single slide + zones[]) ─────────────────────── def _is_empty_shell_unit(u: CompositionUnit) -> bool: """IMP-87 u1 — true when a CompositionUnit is the IMP-30 u4 empty-shell placeholder (frame_template_id="__empty__" / label="empty_shell" / merge_type="empty_shell"). Used by compute_slide_status to redefine `full_mdx_coverage` over rendered content units only: an empty-shell unit attaches the aligned section_ids to a ``__empty__`` frame for layout purposes, but the slide surface carries no MDX content for those sections. Counting it as coverage would violate feedback_artifact_status_naming (overall / coverage must reflect actual content state, not pipeline completion). The three markers are checked as independent OR-branches so a CompositionUnit synthesised by any of the IMP-30 u4 entry points (phase A / phase B / e2e) is classified consistently. """ if getattr(u, "frame_template_id", None) == "__empty__": return True if getattr(u, "label", None) == "empty_shell": return True if getattr(u, "merge_type", None) == "empty_shell": return True return False def _final_status_html_class(overall: str) -> str: """IMP-87 u3 — map ``overall`` enum string → CSS class for the step20 final_status.html dashboard. EMPTY_SHELL_NO_CONTENT MUST resolve to ``"fail"`` (red) so the Case B honesty defect (Stage 1 mdx05) surfaces in the same colour band as visual failures and regressions, not the legacy ``"partial"`` amber band. The explicit check runs BEFORE the legacy substring-based mapping because the literal ``"EMPTY_SHELL_NO_CONTENT"`` contains neither ``"PASS"`` nor ``"FAIL"`` / ``"REGRESSION"`` and would otherwise default to ``"partial"`` (Stage 2 axis A5 lock). All other enums preserve pre-IMP-87 substring semantics so the legacy PASS / RENDERED_WITH_VISUAL_REGRESSION / PARTIAL_COVERAGE / etc. paths keep the colour they had before u3. """ if overall == "EMPTY_SHELL_NO_CONTENT": return "fail" if "PASS" in overall: return "pass" if "FAIL" in overall or "REGRESSION" in overall: return "fail" return "partial" def _is_blocked_overall(overall: str) -> bool: """IMP-87 u3 — true iff ``overall`` warrants a BLOCKED CLI exit (returncode 1) independent of the visual_check / full_mdx_coverage axes. Currently the single blocked enum is EMPTY_SHELL_NO_CONTENT (Stage 1 mdx05 Case B). The CLI consults this helper BEFORE the legacy visual_fail / partial_coverage branches so a content-empty placeholder slide that happens to pass Selenium overflow checks (no content → no overflow) cannot silently return a content-empty artifact without an exit signal (Stage 2 axis A4 lock). """ return overall == "EMPTY_SHELL_NO_CONTENT" def compute_slide_status(sections: list[MdxSection], units: list[CompositionUnit], comp_debug: dict, overflow: dict, adapter_needed_units: Optional[list[dict]] = None, debug_zones: Optional[list[dict]] = None) -> dict: """Slide 산출물의 정확한 상태 계산 — 자동 파이프라인 결과 보고. 축 : - rendered : final.html 이 디스크에 쓰였는가 - visual_check_passed : Selenium per-zone overflow / clipping 통과 여부 - full_mdx_coverage : aligned 된 모든 section_id 가 어떤 selected unit 에 의해 covered - adapter_needed_count : mapper FitError 로 자동 렌더 못 한 unit 수 (별 review 개념 X — 자동 실패 보고) - content_truncated_count : builder 가 truncate 한 zone 수 (informational) - provisional_first_render_count : IMP-30 first-render invariant 로 합성된 unit 수 (u1 V4Match synthesis / u3 last-resort fill / u4 empty-shell — needs user/AI adaptation 신호) overall enum : EMPTY_SHELL_NO_CONTENT — IMP-87 u2 : every selected unit is an IMP-30 u4 empty-shell placeholder (no content-rendered units). Takes precedence over the 4-way ladder below because such a slide can technically pass Selenium overflow checks but carries no MDX content. PASS — visual OK + full coverage + adapter_needed=0 RENDERED_WITH_VISUAL_REGRESSION — full coverage 이지만 visual fail PARTIAL_COVERAGE — 일부 section 필터됨, 렌더된 부분만 visual OK PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION — 둘 다 (adapter_needed > 0 시 status note 추가, overall 은 위 enum 사용) (IMP-30 u6 : provisional_first_render_count 도 qualifier 일 뿐, overall enum 변경 X. Stage 1 Q3 + Codex #10 D4 lock.) """ aligned_ids = [s.section_id for s in sections] # IMP-87 u1 — split coverage into legacy display vs honesty axis. # ``covered`` (legacy, preserved for downstream display / IMP-05 / IMP-06 # readers) still tracks every section attached to any selected unit. # ``content_covered`` is the new honesty axis: only non-empty-shell units # count as "rendered with content". Sections attached solely to an # empty-shell placeholder (IMP-30 u4 frame_template_id="__empty__") # are routed into ``filtered_section_ids`` so an EMPTY-SHELL-only slide # cannot inherit full_mdx_coverage=True (Case B honesty defect lock, # Stage 1 anchor c53722ad). covered: set = set() content_covered: set = set() for u in units: covered.update(u.source_section_ids) if not _is_empty_shell_unit(u): content_covered.update(u.source_section_ids) filtered_ids = sorted(set(aligned_ids) - content_covered) full_coverage = len(filtered_ids) == 0 visual_passed = bool(overflow.get("passed", False)) # IMP-87 u2 — Additive empty/content accounting (used by the overall enum # precedence block below and surfaced on the return dict for downstream # introspection). ``content_rendered_section_ids`` mirrors the new honesty # axis (content_covered) at the surface level so consumers can ask "how # much real MDX content actually rendered" without re-running the helper. # ``empty_shell_section_ids`` exposes the placeholder counterpart for the # same reason. Both pairs are purely additive — no existing field is # removed or repurposed (legacy ``covered_section_ids`` display semantics # locked by u1). empty_shell_units_list = [u for u in units if _is_empty_shell_unit(u)] content_units_list = [u for u in units if not _is_empty_shell_unit(u)] empty_shell_section_ids = sorted({ sid for u in empty_shell_units_list for sid in (u.source_section_ids or []) }) content_rendered_section_ids = sorted(content_covered) adapter_needed_units = list(adapter_needed_units or []) content_truncated = [] fallback_selections = [] for z in (debug_zones or []): if z.get("fallback_used"): fallback_selections.append({ "position": z["position"], "source_section_ids": z["source_section_ids"], "template_id": z["v4_template_id"], "selected_v4_rank": z.get("v4_selected_rank"), "selection_path": z.get("selection_path"), "fallback_reason": z.get("fallback_reason"), }) tc = z.get("content_truncated_count") if tc: content_truncated.append({ "position": z["position"], "source_section_ids": z["source_section_ids"], "template_id": z["v4_template_id"], "truncated_count": tc, }) # 필터된 section 의 사유 (auto pipeline 결정 트레이스 — review 개념 X) filtered_section_reasons = [] for c in comp_debug.get("candidates_summary", []): if c.get("selection_state") == "selected": continue cand_ids = c.get("source_section_ids", []) if any(sid in filtered_ids for sid in cand_ids): filtered_section_reasons.append({ "section_ids": cand_ids, "merge_type": c.get("merge_type"), "template_id": c.get("template_id"), "v4_label": c.get("label"), "phase_z_status": c.get("phase_z_status"), "score": c.get("score"), "selection_state": c.get("selection_state"), # filtered_status / filtered_weak / filtered_lost "filter_reasons": c.get("filter_reasons", []), }) # IMP-06 blocker-fix (Codex #10 Catch O schema + Codex #16 coverage invariant) — # surface override-uncovered sections as additive list entries in # `filtered_section_reasons` and ensure `filtered_section_ids` includes them # so coverage does not silently miss sections that were dropped by an explicit # zone-section override. v4_fb_summary = comp_debug.get("v4_fallback_summary", {}) or {} section_assignment_summary = comp_debug.get("section_assignment_summary") or {} section_assignment_uncovered_ids: list[str] = list( section_assignment_summary.get("uncovered_section_ids") or [] ) if section_assignment_uncovered_ids: # Codex #16 invariant : final filtered_section_ids must contain the # override-uncovered ids even if they were originally "covered" by the # pre-override auto plan. full_coverage must be re-evaluated too so # Step 20 `overall` enum reflects the post-override reality. for sid in section_assignment_uncovered_ids: if sid not in filtered_ids: filtered_ids.append(sid) filtered_ids = sorted(set(filtered_ids)) full_coverage = len(filtered_ids) == 0 # Append a separate list entry per override-uncovered section so existing # readers of filtered_section_reasons (list-shaped) keep working. plan_by_position = { (p.get("position") or ""): p for p in (comp_debug.get("section_assignment_plan") or []) } for sid in section_assignment_uncovered_ids: # Find the position whose plan entry recorded this uncovered id. source_position = None for pos, entry in plan_by_position.items(): if sid in (entry.get("uncovered_section_ids") or []): source_position = pos break filtered_section_reasons.append({ "section_ids": [sid], "merge_type": None, "template_id": None, "v4_label": None, "phase_z_status": None, "score": None, "selection_state": "section_assignment_override_uncovered", "filter_reasons": ["section_assignment_override_uncovered"], "source": "section_assignment_override", "position": source_position, }) # IMP-87 u2 — EMPTY_SHELL_NO_CONTENT precedence over the legacy 4-way # ladder (Stage 2 axis A3). If the slide has aligned MDX sections but # every selected unit is an IMP-30 u4 empty-shell placeholder (zero # content units selected, at least one empty-shell unit selected), the # visible artifact carries no real content — ``overall`` MUST report # EMPTY_SHELL_NO_CONTENT so the u3 CLI exit / final_status.html styling # can branch on it. The check runs before the visual/coverage ladder # because a content-empty placeholder slide can technically pass Selenium # overflow checks (no content → no overflow); without this precedence the # ladder would mislabel it PASS (Stage 1 mdx05 Case B honesty defect). is_empty_shell_only = ( len(aligned_ids) > 0 and len(empty_shell_units_list) > 0 and len(content_units_list) == 0 ) if is_empty_shell_only: overall = "EMPTY_SHELL_NO_CONTENT" elif full_coverage and visual_passed: overall = "PASS" elif full_coverage and not visual_passed: overall = "RENDERED_WITH_VISUAL_REGRESSION" elif not full_coverage and visual_passed: overall = "PARTIAL_COVERAGE" else: overall = "PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION" # IMP-05 L3 (Codex #10 D4 / #17 idea F / Claude #21 idea J) — Step 20 qualifier fields. # Additive only — top-level overall enum unchanged. Defensive defaults so non-fallback # paths (empty v4_fallback_summary) do not crash and report 0 / [] cleanly. _v4_fb_summary = comp_debug.get("v4_fallback_summary", {}) or {} _fallback_selection_count = _v4_fb_summary.get("fallback_selection_count", 0) _selection_paths = _v4_fb_summary.get("selection_paths", []) # IMP-30 u6 — Step 20 additive qualifier fields for the first-render invariant. # provisional_first_render_count = number of selected units whose .provisional # flag is True (set by u1 V4Match synthesis → u2 CompositionUnit propagation, # u3 last-resort fill, or u4 empty-shell synthesis). The list mirrors the shape # of fallback_selections / adapter_needed_units for symmetry. Top-level overall # enum stays unchanged per IMP-05 Codex #10 D4 + Stage 1 Q3 decision: this # signal is a qualifier, not a new failure class. Defensive getattr keeps the # function safe when units come from legacy code paths predating u2. provisional_first_render_units: list[dict] = [] for u in units: if not getattr(u, "provisional", False): continue provisional_first_render_units.append({ "source_section_ids": list(getattr(u, "source_section_ids", []) or []), "phase_z_status": getattr(u, "phase_z_status", None), "frame_template_id": getattr(u, "frame_template_id", None), "frame_id": getattr(u, "frame_id", None), "label": getattr(u, "label", None), "selection_path": getattr(u, "selection_path", None), "fallback_reason": getattr(u, "fallback_reason", None), "v4_rank": getattr(u, "v4_rank", None), }) return { "rendered": True, "visual_check_passed": visual_passed, "full_mdx_coverage": full_coverage, "aligned_section_ids": aligned_ids, "covered_section_ids": sorted(covered), # IMP-87 u2 — additive empty/content accounting (overall enum precedence above). "content_rendered_section_ids": content_rendered_section_ids, "content_rendered_unit_count": len(content_units_list), "empty_shell_section_ids": empty_shell_section_ids, "empty_shell_unit_count": len(empty_shell_units_list), "filtered_section_ids": filtered_ids, "filtered_section_reasons": filtered_section_reasons, "selection_path": "fallback_used" if fallback_selections else "rank_1", "fallback_used": bool(fallback_selections), "fallback_selections": fallback_selections, # IMP-05 L3 qualifier fields — grouped near existing fallback fields for readability. "fallback_selection_count": _fallback_selection_count, "selection_paths": _selection_paths, "visual_fail_reasons": list(overflow.get("fail_reasons") or []), "adapter_needed_count": len(adapter_needed_units), "adapter_needed_units": adapter_needed_units, "content_truncated_count": len(content_truncated), "content_truncated_units": content_truncated, # IMP-30 u6 — additive provisional qualifiers (overall enum unchanged). "provisional_first_render_count": len(provisional_first_render_units), "provisional_first_render_units": provisional_first_render_units, "overall": overall, "note": ( "자동 파이프라인 결과 보고. review/UI 개념 X. final.html 파일명 != PASS 의미. " "overall == PASS 는 visual OK + full coverage + adapter_needed=0 일 때만. " "adapter_needed_count > 0 = mapper 가 contract 와 안 맞아 자동 렌더 못 한 zone 존재. " "content_truncated_count > 0 = builder 가 truncate 한 zone 존재 (rendered 됐지만 일부 콘텐츠 손실). " "provisional_first_render_count > 0 = IMP-30 first-render invariant 가 작동한 unit 존재 " "(empty_shell / chain_exhausted_provisional / 등 — needs user/AI adaptation)." ), } # ─── Per-step artifact write (locked schema) ──────────────────── # 모든 step JSON 공통 필드: step_num, step_name, step_status, # pipeline_path_connected, input, output, note, data # 사용자 lock — 한 슬라이드 결과물 고치지 말고 시스템 layer 박기 (오답노트 #2) def _write_step_artifact( run_dir: Path, step_num: int, name: str, data, *, step_status: str = "done", pipeline_path_connected: bool = True, inputs: Optional[list[str]] = None, outputs: Optional[list[str]] = None, note: Optional[str] = None, ) -> Path: """Write per-step JSON artifact to {run_dir}/steps/step{NN}_{name}.json. Locked schema (사용자 직설): step_num, step_name, step_status, pipeline_path_connected, input, output, note, data. Status values: 'done' / 'partial' / 'trace-only' / 'future' / 'failed'. """ steps_dir = run_dir / "steps" steps_dir.mkdir(exist_ok=True) fname = f"step{step_num:02d}_{name}.json" fpath = steps_dir / fname payload = { "step_num": step_num, "step_name": name, "step_status": step_status, "pipeline_path_connected": pipeline_path_connected, "input": inputs or [], "output": outputs or [fname], "note": note, "data": data, } fpath.write_text( json.dumps(payload, ensure_ascii=False, indent=2, default=str), encoding="utf-8", ) return fpath # IMP-43 (#72) u3 — Step 6 reuse snapshot sidecar writer. # # Scope (u3 only — Stage 2 unit split): # * Writes ``run_dir/_reuse_snapshot.json`` *after* the Step 6 artifact. # * JSON-only (per Stage 2 guardrail — pickle forbidden); schema + # ``build_snapshot`` live in u2 (``src.phase_z2_reuse_snapshot``). # * Write failure WARNS and CONTINUES — the snapshot is an OPTIONAL # sidecar; absence means future ``--reuse-from`` (u4) will fail # closed when it cannot find / load the file. The main pipeline # run must not abort on snapshot write failure. # * Returns the run_dir-relative path (``"_reuse_snapshot.json"``) on # success, ``None`` on failure. The caller stamps the returned value # (or the constant when known ahead of time) into the Step 6 artifact. def _write_reuse_snapshot( run_dir: Path, *, mdx_source_text: str, slide_title: Optional[str], slide_footer: Optional[str], sections: list, stage0_adapter_diagnostics: Optional[dict], stage0_normalized_assets: Optional[dict], v4_evidence: list, layout_preset_pre_override: Optional[str], units: list, comp_debug: Optional[dict], v4_fallback_traces: Optional[dict], ai_preflight: Optional[dict], ) -> Optional[str]: try: mdx_sha256 = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest() snapshot = build_snapshot( mdx_sha256=mdx_sha256, slide_title=slide_title, slide_footer=slide_footer, sections=sections, stage0_adapter_diagnostics=stage0_adapter_diagnostics, stage0_normalized_assets=stage0_normalized_assets, v4_evidence=v4_evidence, layout_preset_pre_override=layout_preset_pre_override, units=units, comp_debug=comp_debug, v4_fallback_traces=v4_fallback_traces, ai_preflight=ai_preflight, ) fpath = run_dir / SNAPSHOT_FILENAME fpath.write_text( json.dumps(snapshot, ensure_ascii=False, indent=2), encoding="utf-8", ) return SNAPSHOT_FILENAME except Exception as exc: print( f" [reuse-snapshot] WARN — failed to write {SNAPSHOT_FILENAME} " f"(reason={type(exc).__name__}: {exc}); --reuse-from will not be " f"available from this run.", file=sys.stderr, ) return None # IMP-43 (#72) u4 — --reuse-from copy + restore + entry helpers. # # Scope (u4 only — Stage 2 unit split): # * Pure path resolution / file copy / snapshot load+validate / # section + unit rehydration / marker writing. # * NO edits to ``run_phase_z2_mvp1`` body — the kwarg threading and # the entry-point branch that invokes these helpers land in u5. # * NO sys.exit(2) translation — helpers RAISE # (FileNotFoundError / SnapshotValidationError / OSError); u4b adds # the stderr + exit-code-2 wrapper, the prev_run_dir == new_run_dir # accidental-write guard, and the mdx_sha256 mismatch surface # fingerprint. # # Restore contract (Stage 2 boundary): Step 0/1/2/5/6 artifacts + # ``_reuse_snapshot.json``. Step numbers 3 / 4 are deliberately absent # — the pipeline DOES write ``step03_content_objects.json`` and # ``step04_internal_composition.json`` AFTER the Step 6 artifact and # BEFORE the Step 7 artifact (see ``_write_step_artifact`` call sites # for ``run_dir, 3`` and ``run_dir, 4`` above the ``run_dir, 7`` call # in this file), but both are emitted with # ``step_status="trace-only"`` and ``pipeline_path_connected=False``: # they are diagnostic projections derived from the Step 6 # ``debug_zones`` snapshot, not deterministic inputs that Step 7+ # consume. Restoring them is unnecessary because downstream code # reads ``debug_zones`` directly (rehydrated from the snapshot), and # copying trace-only files would muddle the boundary audit. Stage 2 # boundary lock = pipeline-path-connected pre-Step 7 artifacts only. _REUSE_STEP_ARTIFACTS: tuple[str, ...] = ( "step00_preconditions.json", "step01_mdx_upload.json", "step01_mdx_source.md", "step02_normalized.json", "step05_v4_evidence.json", "step06_composition_plan.json", ) REUSE_MARKER_FILENAME = "_reuse_marker.json" def _resolve_reuse_from_prev_run_dir(reuse_from: str) -> Path: """Resolve ``--reuse-from PREV_RUN_ID`` to its ``phase_z2`` run_dir. Pure path computation — does NOT check existence. u4b adds the fail-closed prev-run-missing translation around this helper. """ return RUNS_DIR / reuse_from / "phase_z2" def _copy_reuse_artifacts_from_prev_run( prev_run_dir: Path, new_run_dir: Path ) -> dict[str, str]: """Copy Step 0/1/2/5/6 artifacts + ``_reuse_snapshot.json`` into new_run_dir. Returns ``{artifact_name: new_run_dir-relative_path}`` for all copied files. Raises ``FileNotFoundError`` when any required artifact is missing in ``prev_run_dir`` (u4b translates to exit 2). """ new_steps = new_run_dir / "steps" new_steps.mkdir(parents=True, exist_ok=True) copied: dict[str, str] = {} for fname in _REUSE_STEP_ARTIFACTS: src = prev_run_dir / "steps" / fname if not src.exists(): raise FileNotFoundError( f"reuse artifact missing in prev_run_dir: steps/{fname} " f"(expected at {src})" ) shutil.copyfile(src, new_steps / fname) copied[fname] = f"steps/{fname}" snap_src = prev_run_dir / SNAPSHOT_FILENAME if not snap_src.exists(): raise FileNotFoundError( f"reuse snapshot missing in prev_run_dir: {SNAPSHOT_FILENAME} " f"(expected at {snap_src})" ) shutil.copyfile(snap_src, new_run_dir / SNAPSHOT_FILENAME) copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME return copied def _load_and_validate_reuse_snapshot( new_run_dir: Path, *, mdx_source_text: str ) -> dict: """Load + validate the reuse snapshot already copied into ``new_run_dir``. Computes the expected ``mdx_sha256`` from ``mdx_source_text`` UTF-8 bytes — same derivation as ``_write_reuse_snapshot`` so the integrity check is symmetric. Delegates structural validation to u2's ``validate_snapshot``; that raises ``SnapshotValidationError`` (subclass of ``ValueError``) on schema_version mismatch, mdx_sha256 mismatch, missing required keys, or malformed wrappers — u4b catches and translates. """ from src.phase_z2_reuse_snapshot import validate_snapshot snap_path = new_run_dir / SNAPSHOT_FILENAME snapshot = json.loads(snap_path.read_text(encoding="utf-8")) expected_sha = hashlib.sha256(mdx_source_text.encode("utf-8")).hexdigest() validate_snapshot(snapshot, expected_mdx_sha256=expected_sha) return snapshot @dataclass class _RehydratedV4Candidate: """V4Match-shape duck type restored from snapshot ``v4_candidates``. Exposes the 6-attribute contract that the reuse path's downstream consumers read off ``unit.v4_candidates`` entries: * template_id / frame_id / frame_number / confidence / label — read by ``_apply_frame_override_to_unit`` (frame swap). * v4_rank — read by ``_build_application_plan_unit`` (Step 9 payload, ``data.application_plan.zones[i].v4_candidates[j]``). Default ``None`` keeps the dataclass safe to construct from legacy snapshots that pre-date the u4 fix where the snapshot serializer did not persist per-candidate rank. Kept local — circular-dep-free; the production ``V4Match`` dataclass additionally carries section_id / selection_path / fallback_reason / provisional that the reuse boundary deliberately does not require. """ template_id: str frame_id: str frame_number: int confidence: float label: str v4_rank: Optional[int] = None def _rehydrate_mdx_sections_from_snapshot(snapshot: dict) -> list: """Rebuild ``list[MdxSection]`` from snapshot ``sections`` wrapper. Mirrors the ``serialize_section`` field list (u2 source of truth). Returns a Python list of ``MdxSection`` dataclass instances so the Step 7+ pipeline code that does ``[s.section_id for s in sections]`` keeps byte-for-byte behavior. """ entries = snapshot["sections"]["value"] return [ MdxSection( section_id=e["section_id"], section_num=e["section_num"], title=e["title"], raw_content=e["raw_content"], heading_number=e.get("heading_number"), v4_alias_keys=list(e.get("v4_alias_keys") or []), sub_sections=list(e.get("sub_sections") or []), ) for e in entries ] def _rehydrate_composition_units_from_snapshot(snapshot: dict) -> list: """Rebuild ``list[CompositionUnit]`` from snapshot ``units`` wrapper. ``v4_candidates`` entries are restored as ``_RehydratedV4Candidate`` instances so attribute access (``cand.template_id`` etc.) works end-to-end through ``_apply_frame_override_to_unit`` without serializing the production ``V4Match`` dataclass shape. Uses the ``src.phase_z2_composition`` import path to match line 4976 / 5125's local re-imports — the module is loaded under both ``phase_z2_composition`` and ``src.phase_z2_composition`` due to historical sys.path duality, so a top-level CompositionUnit reference would create a class-identity mismatch against tests and downstream code that imports via the ``src.`` path. """ from src.phase_z2_composition import CompositionUnit as _CompositionUnit entries = snapshot["units"]["value"] units: list = [] for e in entries: cands = [ _RehydratedV4Candidate( template_id=c["template_id"], frame_id=c["frame_id"], frame_number=int(c["frame_number"]), confidence=float(c["confidence"]), label=c["label"], v4_rank=( int(c["v4_rank"]) if c.get("v4_rank") is not None else None ), ) for c in (e.get("v4_candidates") or []) ] units.append(_CompositionUnit( source_section_ids=list(e["source_section_ids"]), merge_type=e["merge_type"], frame_template_id=e["frame_template_id"], frame_id=e["frame_id"], frame_number=int(e["frame_number"]), confidence=float(e["confidence"]), label=e["label"], phase_z_status=e["phase_z_status"], raw_content=e["raw_content"], title=e["title"], v4_rank=e.get("v4_rank"), selection_path=e.get("selection_path") or "rank_1", fallback_reason=e.get("fallback_reason"), score=float(e.get("score") or 0.0), rationale=dict(e.get("rationale") or {}), auto_selectable=bool(e.get("auto_selectable", True)), filter_reasons=list(e.get("filter_reasons") or []), notes=list(e.get("notes") or []), v4_candidates=cands, provisional=bool(e.get("provisional", False)), )) return units REUSE_MARKER_SCHEMA_VERSION = 1 def _write_reuse_marker( new_run_dir: Path, *, prev_run_id: str, copied_artifacts: dict[str, str], ) -> Path: """Write ``_reuse_marker.json`` to ``new_run_dir`` for audit trail. Records prev_run_id, copied artifact map, the locked Step 0/1/2/5/6 boundary, and ``resume_at_step=7``. Informational sidecar — absence does not break the reused run; presence lets operators trace which prev_run_id the reuse path was sourced from. u5 invokes this after a successful copy + restore. """ marker = { "schema_version": REUSE_MARKER_SCHEMA_VERSION, "reuse_from_prev_run_id": prev_run_id, "snapshot_filename": SNAPSHOT_FILENAME, "copied_artifacts": dict(copied_artifacts), "boundary_steps": list(_REUSE_STEP_ARTIFACTS), "resume_at_step": 7, "note": ( "IMP-43 (#72) u4 — this run was sourced from prev_run_id via " "--reuse-from. Steps 0/1/2/5/6 artifacts copied; Step 7+ " "re-executed in this run_dir." ), } fpath = new_run_dir / REUSE_MARKER_FILENAME fpath.write_text( json.dumps(marker, ensure_ascii=False, indent=2), encoding="utf-8", ) return fpath # IMP-43 (#72) u4b — fail-closed wrapper around the u4 helpers. # # Scope (u4b only — Stage 2 unit split): # * Translate the u4 raises (FileNotFoundError, SnapshotValidationError, # json.JSONDecodeError, OSError) into the CLI fail-closed contract: # stderr message + ``sys.exit(2)``. # * Add the prev_run_dir == new_run_dir accidental-write guard BEFORE # any copy attempt — otherwise ``_copy_reuse_artifacts_from_prev_run`` # would overwrite prev_run_dir's own step files with itself and # mutate the "read-only" reuse source. # * Add the missing-prev-run-dir surface so the user gets a clean # "run id not found" message instead of the raw FileNotFoundError # stack from inside _copy_reuse_artifacts_from_prev_run. # * Surface the mdx_sha256 mismatch as its OWN axis (distinct from # generic snapshot validation failures) so the operator can tell # "wrong --mdx-path for this prev_run_id" apart from "snapshot file # is broken". # # Out of scope: signature threading into ``run_phase_z2_mvp1`` (u5), # the actual call site dispatch into Step 7+ (u5). # # Diagnostic format (factual-verification guardrail): # [error] --reuse-from fail-closed: # value: # path: # upstream: # reason: : (only when exc != None) # # axis vocabulary (closed enum — tests pin this set): # * prev_run_dir_missing # * prev_run_dir_equals_new_run_dir # * reuse_artifact_missing # * reuse_copy_os_error # OSError != FileNotFoundError during copy # # (PermissionError, IsADirectoryError, # # OSError(errno.EXDEV), full-disk, etc.) # * snapshot_missing_after_copy # * snapshot_corrupt_json # * snapshot_read_os_error # OSError != FileNotFoundError during # # snapshot read (permission denied, # # path-became-dir, lower-level IO) # * mdx_sha256_mismatch # * snapshot_validation_failed REUSE_FAIL_CLOSED_AXES: frozenset[str] = frozenset({ "prev_run_dir_missing", "prev_run_dir_equals_new_run_dir", "reuse_artifact_missing", "reuse_copy_os_error", "snapshot_missing_after_copy", "snapshot_corrupt_json", "snapshot_read_os_error", "mdx_sha256_mismatch", "snapshot_validation_failed", }) def _abort_reuse_from( *, axis: str, value: Any, path: str, upstream: str, exc: Optional[BaseException] = None, ) -> "NoReturn": """Print provenance-tagged stderr message and ``sys.exit(2)``. All four `value+path+upstream+axis` fields are mandatory so the operator can pinpoint the failed precondition without grepping the pipeline source. ``exc`` (when supplied) adds the underlying type + message — useful for ``json.JSONDecodeError`` line/col info or OSError errno. """ if axis not in REUSE_FAIL_CLOSED_AXES: raise AssertionError( f"_abort_reuse_from: unknown axis {axis!r} " f"(expected one of {sorted(REUSE_FAIL_CLOSED_AXES)})" ) lines = [ f"[error] --reuse-from fail-closed: {axis}", f" value: {value!r}", f" path: {path}", f" upstream: {upstream}", ] if exc is not None: lines.append(f" reason: {type(exc).__name__}: {exc}") print("\n".join(lines), file=sys.stderr) sys.exit(2) def _paths_equivalent(a: Path, b: Path) -> bool: """Return True when ``a`` and ``b`` resolve to the same filesystem location, falling back to lexical equality when ``a`` doesn't exist yet (Path.resolve(strict=False) still normalizes case + sep on Windows + collapses ``..`` segments). """ try: return a.resolve(strict=False) == b.resolve(strict=False) except (OSError, RuntimeError): return a == b def execute_reuse_from_or_fail_closed( *, reuse_from: str, new_run_dir: Path, mdx_source_text: str, ) -> tuple[Path, dict[str, str], dict]: """Orchestrate u4 helpers under the u4b fail-closed contract. Returns ``(prev_run_dir, copied_artifacts, snapshot)`` on success. Calls ``sys.exit(2)`` on any of the seven fail-closed axes; does NOT return in that case. The caller (u5, into ``run_phase_z2_mvp1``) does NOT need to wrap this in its own try/except — every reachable failure inside this function terminates the process directly. """ from src.phase_z2_reuse_snapshot import SnapshotValidationError prev_run_dir = _resolve_reuse_from_prev_run_dir(reuse_from) # Guard 1: prev_run_dir must exist. if not prev_run_dir.exists(): _abort_reuse_from( axis="prev_run_dir_missing", value=reuse_from, path=str(prev_run_dir), upstream="--reuse-from CLI argument", ) # Guard 2: prev_run_dir must NOT be the same as new_run_dir. # Without this, the copy step would overwrite prev_run_dir's own # files with themselves and break the RO guarantee on the reuse # source. The check resolves both sides so a relative-vs-absolute # or symlinked collision still trips it. if _paths_equivalent(prev_run_dir, new_run_dir): _abort_reuse_from( axis="prev_run_dir_equals_new_run_dir", value=reuse_from, path=str(prev_run_dir), upstream=( "_resolve_reuse_from_prev_run_dir(reuse_from) == new_run_dir " "(would overwrite prev_run_dir during copy)" ), ) # Copy step 0/1/2/5/6 + snapshot from prev_run_dir → new_run_dir. # FileNotFoundError MUST be caught before the bare OSError handler — # it is a subclass of OSError and the missing-artifact case has its # own dedicated axis. try: copied = _copy_reuse_artifacts_from_prev_run(prev_run_dir, new_run_dir) except FileNotFoundError as exc: _abort_reuse_from( axis="reuse_artifact_missing", value=str(exc), path=str(prev_run_dir), upstream=( "Step 0/1/2/5/6 deterministic artifacts + " f"{SNAPSHOT_FILENAME} under prev_run_dir/steps/" ), exc=exc, ) except OSError as exc: # PermissionError, IsADirectoryError, OSError(errno.EXDEV) when # crossing filesystems with shutil.copyfile, disk-full, etc. # Without this branch the raw traceback would escape the wrapper # and contradict the docstring contract ("every reachable # failure inside this function terminates the process directly"). _abort_reuse_from( axis="reuse_copy_os_error", value=str(exc), path=str(prev_run_dir), upstream=( "_copy_reuse_artifacts_from_prev_run " "(OSError != FileNotFoundError; shutil.copyfile or " "Path.mkdir surface)" ), exc=exc, ) # Load + validate snapshot. Exception fan-out below mirrors the # u4 helper raise surface; each fail-closed axis is reported # separately so operators can tell the cases apart. # FileNotFoundError MUST be caught before the bare OSError handler. try: snapshot = _load_and_validate_reuse_snapshot( new_run_dir, mdx_source_text=mdx_source_text, ) except FileNotFoundError as exc: # Should not happen — copy step would have failed first — but # left explicit to make the contract symmetric. _abort_reuse_from( axis="snapshot_missing_after_copy", value=str(exc), path=str(new_run_dir / SNAPSHOT_FILENAME), upstream="_copy_reuse_artifacts_from_prev_run side effect", exc=exc, ) except json.JSONDecodeError as exc: _abort_reuse_from( axis="snapshot_corrupt_json", value=str(exc), path=str(new_run_dir / SNAPSHOT_FILENAME), upstream=f"json.loads({SNAPSHOT_FILENAME})", exc=exc, ) except OSError as exc: # Permission denied on the copied snapshot, snap_path turned out # to be a directory, lower-level IO error. JSONDecodeError is # ValueError (independent of OSError) so order with that branch # does not matter; this branch only needs to follow FNF. _abort_reuse_from( axis="snapshot_read_os_error", value=str(exc), path=str(new_run_dir / SNAPSHOT_FILENAME), upstream=( "_load_and_validate_reuse_snapshot " "(OSError != FileNotFoundError; Path.read_text surface)" ), exc=exc, ) except SnapshotValidationError as exc: msg = str(exc) if "mdx_sha256 mismatch" in msg: _abort_reuse_from( axis="mdx_sha256_mismatch", value=msg, path=str(new_run_dir / SNAPSHOT_FILENAME), upstream=( "sha256(mdx_source_text) vs " f"{SNAPSHOT_FILENAME}#/mdx_sha256" ), exc=exc, ) else: _abort_reuse_from( axis="snapshot_validation_failed", value=msg, path=str(new_run_dir / SNAPSHOT_FILENAME), upstream="src.phase_z2_reuse_snapshot.validate_snapshot", exc=exc, ) return prev_run_dir, copied, snapshot def _write_step_html( run_dir: Path, step_num: int, name: str, title: str, body_html: str, *, step_status: str = "done", inputs: Optional[list[str]] = None, outputs: Optional[list[str]] = None, ) -> Path: """Write per-step HTML artifact with locked header (input/output/status). HTML 산출물 = 사용자가 시각으로 판단해야 하는 step (7/8/9/13/20). """ steps_dir = run_dir / "steps" steps_dir.mkdir(exist_ok=True) fname = f"step{step_num:02d}_{name}.html" fpath = steps_dir / fname inputs_lines = "\n".join(f" - {i}" for i in (inputs or [])) outputs_lines = "\n".join(f" - {o}" for o in (outputs or [fname])) status_class = ( "pass" if step_status == "done" else "fail" if step_status == "failed" else "partial" ) inputs_li = "".join(f"
  • {i}
  • " for i in (inputs or [])) outputs_li = "".join(f"
  • {o}
  • " for o in (outputs or [fname])) full_html = f""" Step {step_num:02d} — {title}

    Step {step_num:02d} — {title}

    Status: {step_status}
    Input:
      {inputs_li}
    Output:
      {outputs_li}
    {body_html} """ fpath.write_text(full_html, encoding="utf-8") return fpath def write_debug_json(run_dir: Path, layout_preset: str, debug_zones: list[dict], layout_css: dict, visual_runtime_check: Optional[dict] = None, composition_debug: Optional[dict] = None, slide_status: Optional[dict] = None, fit_classification: Optional[dict] = None, router_decision: Optional[dict] = None, retry_trace: Optional[dict] = None) -> Path: debug = { "v4_source": str(V4_RESULT_PATH.relative_to(PROJECT_ROOT)), "v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS, "mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES), "mode": "composition_v0_layout_8preset", "mode_note": ( "MVP-1.5b w/ composition planner v0 — sections → candidates (separate / " "parent_merged) → score → greedy select → 8-preset layout vocabulary " "(single / horizontal-2 / vertical-2 / top-1-bottom-2 / top-2-bottom-1 / " "left-1-right-2 / left-2-right-1 / grid-2x2). v0 layout = count-based; " "v1 axes (cardinality_fit / hierarchy_coherence / density_score) 추후." ), "layout_preset": layout_preset, "layout_css": layout_css, "slide_status": slide_status, "fit_classification": fit_classification, "router_decision": router_decision, "retry_trace": retry_trace, "composition_planner_debug": composition_debug, "zones": debug_zones, "visual_runtime_check": visual_runtime_check, # A-6 (IMP-01 #1) — additive top-level zone bbox trace (slide-relative px) "zone_geometries_px": (visual_runtime_check or {}).get("zone_geometries_px", []), # IMP-15 실행-4 (issue #48) — additive top-level Step 14 event streams "image_events": (visual_runtime_check or {}).get("image_events", []), "table_events": (visual_runtime_check or {}).get("table_events", []), } debug_path = run_dir / "debug.json" debug_path.write_text(json.dumps(debug, ensure_ascii=False, indent=2), encoding="utf-8") return debug_path # ─── Step 9 application-plan helpers (IMP-32 u1) ─────────────── def _application_candidates_for_unit(unit) -> list[dict]: """Step 9 (IMP-32 u1) — application candidate dicts from unit.v4_candidates. Pure extraction of inline block at src/phase_z2_pipeline.py:4487-4501. Behavior preserved: key set/order, APPLICATION_MODE_BY_V4_LABEL lookup, required_changes placeholder = [] (v0 = trace-only). """ app_candidates = [] for c in unit.v4_candidates: mode, auto_app, delegated = APPLICATION_MODE_BY_V4_LABEL.get( c.label, ("exclude", False, None) ) app_candidates.append({ "template_id": c.template_id, "frame_id": c.frame_id, "v4_label": c.label, "application_mode": mode, "auto_applicable": auto_app, "required_changes": [], # v0 = trace-only "delegated_to": delegated, }) return app_candidates def _v4_all_judgments_for_unit(v4_all_for_unit) -> list[dict]: """Step 9 (IMP-32 u2) — V4 all-judgment dicts (reject 포함) for a unit. Pure extraction of inline block at src/phase_z2_pipeline.py:4529-4545 (post-u1 line numbers). IMP-11 D-2 markers preserved in this helper: single `_contract = get_contract(c.template_id)` bind, `catalog_registered` boolean, and `min_height_px` chain `(_contract or {}).get("visual_hints", {}).get("min_height_px")`. Key set/order unchanged: template_id, frame_id, frame_number, v4_rank, confidence, label, catalog_registered, min_height_px. """ # IMP-11 D-2 (u1) — per-candidate min_height_px source = catalog # frame_contracts[template_id].visual_hints.min_height_px (logical 1280×720 px). # None when contract unregistered (frontend tolerates undefined). # Single get_contract lookup binds both catalog_registered and min_height_px. v4_all_judgments_list = [] for c in v4_all_for_unit: _contract = get_contract(c.template_id) v4_all_judgments_list.append({ "template_id": c.template_id, "frame_id": c.frame_id, "frame_number": c.frame_number, "v4_rank": c.v4_rank, "confidence": c.confidence, "label": c.label, "catalog_registered": _contract is not None, "min_height_px": (_contract or {}).get("visual_hints", {}).get("min_height_px"), }) return v4_all_judgments_list def _build_application_plan_unit( unit, zone_plan, selection_trace, plan_record, v4_all_for_unit, layout_preset, layout_candidates_list, ) -> dict: """Step 9 (IMP-32 u3) — per-unit application_plan dict assembly. Pure extraction of the inline `application_plan_units.append({...})` block currently at src/phase_z2_pipeline.py:4577-4623 (post-u1/u2 line numbers). Byte-identical output (key set + key order + value identity) when called with the same per-unit inputs: - unit : Step 6 unit (source_section_ids, v4_candidates, v4_rank, selection_path, fallback_reason, frame_template_id). - zone_plan : Step 8 per-unit zone_plan dict (region_layout_ candidates, display_strategy_candidates). - selection_trace : v4_fallback_traces[unit.source_section_ids[0]] (candidates list for candidate_evidence / fallback_chain compat alias). - plan_record : plan_record_by_unit_id[id(unit)] or None (IMP-06 plan-aware additive fields). - v4_all_for_unit : lookup_v4_all_judgments(...) result (Step 7-A axis trace — reject 포함 모든 V4 후보). - layout_preset : Step 7 preset name (e.g., "Type A"). - layout_candidates_list : Step 7 candidate list. Per-index/per-id lookups (zone_region_plans[i], v4_fallback_traces.get(...), plan_record_by_unit_id.get(id(unit)), section_alias_by_id, lookup_v4_all_ judgments(...)) stay at the call-site (u4). Invariants preserved: - candidate_evidence = selection_trace.get("candidates", []) — primary field. - fallback_chain = same list — compat alias for pre-IMP-05 readers. - v4_candidates list comprehension fields + order unchanged. - IMP-06 additive plan fields (position / assignment_source / section_ assignment_override / replaced_auto_unit / skipped_collided_auto_units / skipped_reason) — None / False / [] when no override CLI used. IMP-39 u3 (issue #68) additive fields : - ``ranking_sort_policy`` : full policy dict from ``load_ranking_sort_policy()`` (cached). Forwards the single-source ordering contract (label_priority map + tie_break_axes) to the Step 9 payload so the frontend (``designAgentApi.ts``) can mirror the backend sort without re-implementing the policy locally. u4 wires consumption. - ``sorted_candidate_evidence`` : explicit alias of the policy-sorted ``selection_trace["candidates"]`` list. Identical contents to ``candidate_evidence`` (u2 sorted the underlying ``judgments`` window before the selector loop appended ``trace["candidates"]``), but the explicit name documents the post-u2 contract for the frontend. """ unit_id = "+".join(unit.source_section_ids) has_v4 = bool(unit.v4_candidates) candidate_status = "ok" if has_v4 else "no_non_reject_v4_candidate" application_status = "ok" if has_v4 else "no_v4_candidate" current_default = unit.frame_template_id if has_v4 else None # IMP-39 u3 (issue #68) — forward the single-source ranking policy to the # Step 9 per-unit payload. ``load_ranking_sort_policy()`` is module-cached # (``_RANKING_SORT_POLICY_CACHE``), so the per-unit call is O(1) after # first invocation. The full policy dict (not just ``policy_type``) is # forwarded so the frontend can mirror label_priority + tie_break_axes # without re-declaring the contract locally. ranking_sort_policy = load_ranking_sort_policy() # IMP-06 blocker-fix (Codex #13 Blocker 3 / #16) — plan-aware additive # fields. additive = pre-IMP-06 readers (no override CLI used) see # position=None / assignment_source=None / section_assignment_override # =False / replaced_auto_unit=None / skipped_collided_auto_units=[] / # skipped_reason=None — i.e. byte-identical absent overrides. plan_position = plan_record.get("position") if plan_record else None plan_assignment_source = plan_record.get("assignment_source") if plan_record else None plan_section_override = bool(plan_record.get("section_assignment_override")) if plan_record else False plan_replaced_auto = plan_record.get("replaced_auto_unit") if plan_record else None plan_skipped_collided = list(plan_record.get("skipped_collided_auto_units") or []) if plan_record else [] plan_skipped_reason = plan_record.get("skipped_reason") if plan_record else None app_candidates = _application_candidates_for_unit(unit) v4_all_judgments_list = _v4_all_judgments_for_unit(v4_all_for_unit) return { "unit_id": unit_id, "layout_preset": layout_preset, "layout_candidates": layout_candidates_list, "region_layout_candidates": zone_plan.get("region_layout_candidates", []), "display_strategy_candidates": zone_plan.get("display_strategy_candidates", []), "candidate_status": candidate_status, "application_status": application_status, "current_default_candidate": current_default, "selected_v4_rank": unit.v4_rank, "selection_path": unit.selection_path, "fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path), "fallback_reason": unit.fallback_reason, # IMP-05 L2 (Codex #10 D4 / #16 idea A) — Step 9 per-unit candidate evidence. # candidate_evidence is the primary field for future frontend / AI consumers. # fallback_chain is kept as a compat alias for any pre-IMP-05 reader. "candidate_evidence": selection_trace.get("candidates", []), "fallback_chain": selection_trace.get("candidates", []), # compat alias; prefer candidate_evidence "v4_candidates": [ { "template_id": c.template_id, "frame_id": c.frame_id, "frame_number": c.frame_number, "v4_rank": c.v4_rank, "confidence": c.confidence, "label": c.label, } for c in unit.v4_candidates ], # Step 7-A axis 보강 (사용자 lock 2026-05-08) — frontend UI 가 reject # 포함 모든 V4 후보를 시각 차별 (회색) 로 보여줄 수 있도록 trace. # length = 0~32. label 별 count : v4_candidates 는 non-reject only, # v4_all_judgments 는 reject 포함. # catalog_registered = frame_contracts.yaml 에 contract 있는지 여부. # false 면 사용자가 override 시도해도 Step 7-A 가 skip (render path 미연결). # IMP-11 D-2 (u1) : per-candidate min_height_px added (None when unregistered). "v4_all_judgments": v4_all_judgments_list, "application_candidates": app_candidates, # IMP-06 blocker-fix (Codex #13 Blocker 3 / #16) — plan-aware # additive fields. None / False / [] when no override CLI used. "position": plan_position, "assignment_source": plan_assignment_source, "section_assignment_override": plan_section_override, "replaced_auto_unit": plan_replaced_auto, "skipped_collided_auto_units": plan_skipped_collided, "skipped_reason": plan_skipped_reason, # IMP-39 u3 (issue #68) — single-source ranking policy forwarded to # frontend so backend selector "rank 1" and frontend # ``frame_candidates[0]`` share one ordering contract. Additive only; # pre-u3 readers ignore both keys. "ranking_sort_policy": ranking_sort_policy, "sorted_candidate_evidence": selection_trace.get("candidates", []), } # ─── Main entry ──────────────────────────────────────────────── class Step0PreflightError(RuntimeError): """IMP-92 u4 — Step 0 AI preflight fail-fast surface. Raised at boot when ``settings.ai_fallback_enabled`` is True and the Anthropic API ping reveals a persistent setup problem (invalid API key, invalid model ID, billing / permission denied). Transient errors (429 / 5xx) do NOT fail boot — they are recorded as ``"transient"`` in the Step 0 artifact and the pipeline proceeds; the in-pipeline retry layer + u2 operational classifier handle them downstream. """ def _run_step0_ai_preflight() -> dict: """IMP-92 u4 — Boot-time AI fallback preflight ping (gated). When ``settings.ai_fallback_enabled`` is False (default), returns ``{"status": "skipped", "reason": "ai_fallback_disabled", ...}`` without instantiating ``anthropic.Anthropic`` — preserves the PZ-1 AI=0 normal path and the ``feedback_demo_env_toggle_policy`` default-OFF contract (no API call on normal runs). When enabled, issues a single 1-token Anthropic ``messages.create`` to validate the configured ``(ai_fallback_model, anthropic_api_key)`` pair. Persistent setup errors raise ``Step0PreflightError`` so the pipeline fails fast at boot rather than at first AI repair attempt. Transient errors are recorded as ``"transient"`` and the pipeline continues. Setup errors (fail-fast): * ``anthropic.AuthenticationError`` (401) — invalid API key * ``anthropic.PermissionDeniedError`` (403) — billing / permission * ``anthropic.NotFoundError`` (404) — invalid model ID * generic ``anthropic.APIStatusError`` (402) — billing / payment required (Anthropic surfaces 402 without a typed subclass; dispatched here by HTTP status code, mirroring u2 ``classify_operational_error``). Transient (record + continue): * ``anthropic.RateLimitError`` (429) * ``anthropic.InternalServerError`` (5xx) * generic ``anthropic.APIStatusError`` with HTTP 429 / 5xx """ import anthropic from src.config import settings as _settings if not _settings.ai_fallback_enabled: return { "status": "skipped", "reason": "ai_fallback_disabled", "model": _settings.ai_fallback_model, } try: client = anthropic.Anthropic( api_key=_settings.anthropic_api_key, timeout=_settings.ai_fallback_timeout_s, ) client.messages.create( model=_settings.ai_fallback_model, max_tokens=1, messages=[{"role": "user", "content": "ping"}], ) except ( anthropic.AuthenticationError, anthropic.PermissionDeniedError, anthropic.NotFoundError, ) as exc: raise Step0PreflightError( f"Anthropic API preflight failed for model " f"{_settings.ai_fallback_model!r}: " f"{type(exc).__name__}: {exc}. " "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." ) from exc except (anthropic.RateLimitError, anthropic.InternalServerError) as exc: return { "status": "transient", "model": _settings.ai_fallback_model, "transient_error": f"{type(exc).__name__}: {exc}", } except anthropic.APIStatusError as exc: # IMP-92 u4 — fall back to HTTP status code dispatch when the SDK # surfaces a setup error as the generic ``APIStatusError`` instead # of a typed subclass. Mirrors u2 ``classify_operational_error`` # so HTTP 402 (Payment Required / billing) becomes a fail-fast # Step0PreflightError, matching the issue body's explicit # operational contract. status_code = getattr(exc, "status_code", None) if status_code is None: status_code = getattr(getattr(exc, "response", None), "status_code", None) if status_code == 429 or (status_code is not None and 500 <= status_code < 600): return { "status": "transient", "model": _settings.ai_fallback_model, "transient_error": f"{type(exc).__name__}: {exc}", } raise Step0PreflightError( f"Anthropic API preflight failed for model " f"{_settings.ai_fallback_model!r}: " f"HTTP {status_code} {type(exc).__name__}: {exc}. " "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." ) from exc return { "status": "passed", "model": _settings.ai_fallback_model, } def _resolve_slide_css_from_frontmatter(mdx_source_text: str) -> Optional[str]: """IMP-45 (#74) u4 — minimal frontmatter probe for ``slide_overrides.css``. Targeted re-parse of the same YAML frontmatter block that :func:`parse_mdx` reads at line 415-418 — extracts only the nested ``slide_overrides.css`` string when present. Kept inline rather than routed through :func:`src.mdx_normalizer.normalize_mdx_content` so Step 13 render does not depend on the Stage 0 normalize adapter (project lock 2026-05-08). Mirrors the validation rules in :func:`src.mdx_normalizer._extract_slide_overrides` (u2) : - No frontmatter / unparseable YAML / non-mapping → ``None``. - Missing ``slide_overrides`` mapping → ``None``. - ``slide_overrides.css`` non-string or empty → ``None``. - ``slide_overrides.css`` non-empty ``str`` → that string. """ fm_match = re.match(r"^---\n(.*?)\n---\n", mdx_source_text, re.DOTALL) if fm_match is None: return None try: fm = yaml.safe_load(fm_match.group(1)) except yaml.YAMLError: return None if not isinstance(fm, dict): return None overrides = fm.get("slide_overrides") if not isinstance(overrides, dict): return None css = overrides.get("css") if isinstance(css, str) and css: return css return None def run_phase_z2_mvp1( mdx_path: Path, run_id: Optional[str] = None, *, override_layout: Optional[str] = None, override_frames: Optional[dict[str, str]] = None, override_zone_geometries: Optional[dict[str, dict]] = None, override_section_assignments: Optional[dict[str, list[str]]] = None, override_image_overrides: Optional[dict[str, dict]] = None, override_slide_css: Optional[str] = None, reuse_from: Optional[str] = None, ) -> Path: """MVP-1.5b entry — single slide + composition planner v0 + 8 preset vocabulary. Pipeline : parse_mdx → align_sections_to_v4_granularity → plan_composition → mapper per unit → render slide_base + frame partial → Selenium check User overrides (Step 7-A axis, 2026-05-08) : override_layout : 자동 결정된 layout_preset 을 사용자 선택값으로 강제 (8 preset 중 하나). override_frames : {unit_id: template_id} — 자동 결정된 frame template 을 사용자 선택값 으로 강제. unit_id = "+".join(source_section_ids) (e.g., "03-1" 또는 "03-1+03-2"). 매칭 unit 의 v4_candidates 에 있는 entry 면 그 entry 의 score / label 도 함께 갱신. 없으면 template_id 만 변경. override_image_overrides : {image_id: {x, y, w, h}} — IMP-51 (#79) u5 axis. image_id = stable id stamped on user-content `` tags by ``src/image_id_stamper.py`` (u4). x/y/w/h are percent-of-slide coordinates (0–100, slide-absolute). Forward-compat kwarg: the render-time CSS injection that consumes this mapping lands in u7; until u7 wires the consumer, accepting the kwarg keeps the backend contract (KNOWN_AXES u1 + Vite allowlist u2 + typed client u3 + stamper u4) end-to-end addressable from CLI without diverging the function signature. override_slide_css : Optional slide-level CSS string — IMP-45 (#74) u4 axis. Marker-wrapped