From 1efbf672bd1fc9933da35d8f6db7a2118c969968 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Thu, 21 May 2026 00:40:58 +0900 Subject: [PATCH] feat(#39): IMP-30 first-render invariant + abort bypass (2 paths) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Restore first-render invariant: final.html + Step 20 slide_status MUST be written for every input where Step 0~5 succeed. Two abort paths replaced with provisional/empty-shell synthesis; MDX content preserved, AI-free. - u1 V4Match.provisional + lookup_v4_match_with_fallback(allow_provisional) chain_exhausted -> synthesize rank-1 provisional (opt-in, default-off) - u2 CompositionUnit.provisional propagation (single / parent_merged / parent_merged_inferred constructors) - u3 select_composition_units(allow_provisional_fill=True) last-resort fill + _candidate_state="selected_provisional" - u4 pipeline.py path-(a) abort guard replaced with provisional retry + terminal __empty__ shell (no sys.exit(1)) - u5 zones_data.provisional -> slide_base.html zone--provisional class + data-provisional + needs-adaptation badge (template-only) - u6 compute_slide_status additive provisional_first_render_count/_units (overall enum unchanged per IMP-05 Codex #10 D4) - u7 regression: tests/test_phase_z2_imp30_first_render.py (28 tests) + tests/test_phase_z2_v4_fallback.py (+5 cases) Guardrails verified: MVP1_ALLOWED_STATUSES unchanged, no calculate_fit, no LLM in fallback path, no MDX 03/04/05 hardcoding. Anchor sync (Rule 13): tests/orchestrator_unit/test_imp17_comment_anchor.py re-pinned 564/565 -> 570/571 to track V4Match.provisional shift at src/phase_z2_pipeline.py:179-184. Cross-ref: IMP-05 (#5) §5 defer + Codex #2 first-render invariant. --- src/phase_z2_composition.py | 91 +- src/phase_z2_pipeline.py | 306 +++- templates/phase_z2/slide_base.html | 40 +- .../test_imp17_comment_anchor.py | 22 +- tests/test_phase_z2_imp30_first_render.py | 1557 +++++++++++++++++ tests/test_phase_z2_v4_fallback.py | 122 ++ 6 files changed, 2105 insertions(+), 33 deletions(-) create mode 100644 tests/test_phase_z2_imp30_first_render.py diff --git a/src/phase_z2_composition.py b/src/phase_z2_composition.py index 8050cb4..7b09123 100644 --- a/src/phase_z2_composition.py +++ b/src/phase_z2_composition.py @@ -368,6 +368,15 @@ class CompositionUnit: # 0 길이 = "no_non_reject_v4_candidate" 신호 (Step 9 application_plan input). v4_candidates: list = field(default_factory=list) + # IMP-30 u2 — provisional first-render flag. True when the V4Match + # backing this unit was synthesized via lookup_v4_match_with_fallback + # (allow_provisional=True) after chain_exhausted, or when u3 inserts + # a last-resort provisional fill for an uncovered section. Carried as + # data (not re-derived from label/selection_path downstream) so the + # render path / status / zone template can surface "needs adaptation" + # uniformly. Default False keeps non-provisional units byte-identical. + provisional: bool = False + # ─── Heading Tree ────────────────────────────────────────────── @@ -490,6 +499,7 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, raw_content=s.raw_content, title=s.title, v4_candidates=_v4_cands(s.section_id), + provisional=getattr(match, "provisional", False), ) _apply_capacity_fit(c, capacity_fit_fn) candidates.append(c) @@ -524,6 +534,7 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, raw_content=merged_raw, title=pid, v4_candidates=_v4_cands(pid), + provisional=getattr(parent_match, "provisional", False), ) _apply_capacity_fit(c_pm, capacity_fit_fn) candidates.append(c_pm) @@ -624,6 +635,10 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, notes=notes, # rep_child 의 V4 후보 list (rep_match 와 같은 출처, frame_* 와 일관). v4_candidates=_v4_cands(rep_child.section_id), + # IMP-30 u2 — rep_match drives frame selection so its provisional + # flag flows here. If a non-rep child match is provisional but the + # rep is not, this unit is not provisional (the rep frame is real). + provisional=getattr(rep_match, "provisional", False), ) _apply_capacity_fit(c_inf, capacity_fit_fn) candidates.append(c_inf) @@ -670,7 +685,13 @@ def score_candidate(c: CompositionUnit) -> CompositionUnit: # ─── Selection ───────────────────────────────────────────────── -def select_composition_units(candidates, allowed_statuses: set[str]) -> list[CompositionUnit]: +def select_composition_units( + candidates, + allowed_statuses: set[str], + *, + all_section_ids: Optional[list[str]] = None, + allow_provisional_fill: bool = False, +) -> list[CompositionUnit]: """Greedy non-overlapping selection by score, with coverage tiebreak. 1. 모든 candidate 점수 매김 @@ -685,6 +706,27 @@ def select_composition_units(candidates, allowed_statuses: set[str]) -> list[Com auto_selectable=False candidate 는 자동 선택 X. debug 의 candidates_summary 에는 남음. UI/editor layer 에서 사용자가 별도 처리 가능 (현 v0 범위 X). + + IMP-30 u3 — last-resort provisional fill (opt-in via allow_provisional_fill): + After the normal greedy pass, sections in ``all_section_ids`` that are + still uncovered are filled with the highest-score *provisional* + candidate (``c.provisional == True``) that includes at least one + uncovered section and does not collide with already-covered ones. A + provisional candidate's backing V4Match was synthesized via + ``lookup_v4_match_with_fallback(allow_provisional=True)`` (IMP-30 u1) + after chain_exhausted; its ``phase_z_status`` is therefore typically + *outside* ``allowed_statuses`` (extract_matched_zone / fallback_candidate), + which is why it gets filtered out of the normal greedy pass. The fill + preserves first-render invariant for sections whose rank-1~3 are all + restructure/reject. Default ``allow_provisional_fill=False`` keeps + pre-u3 behavior byte-identical (IMP-05 regression guard). + + Args: + candidates: full candidate pool from collect_candidates(). + allowed_statuses: phase_z_status set considered auto-renderable. + all_section_ids: ordered section id list (only consulted when + allow_provisional_fill=True; required for coverage check). + allow_provisional_fill: opt-in for last-resort provisional fill. """ scored = [score_candidate(c) for c in candidates] viable = [ @@ -701,6 +743,28 @@ def select_composition_units(candidates, allowed_statuses: set[str]) -> list[Com selected.append(c) covered.update(c.source_section_ids) + # IMP-30 u3 — last-resort provisional fill (opt-in, default off). + # Honors first-render invariant by surfacing chain_exhausted sections as + # provisional zones instead of dropping them. Skip reasons on + # non-provisional filtered candidates are preserved (not mutated here). + if allow_provisional_fill and all_section_ids: + uncovered = {sid for sid in all_section_ids if sid not in covered} + if uncovered: + provisional_pool = [ + c for c in scored + if c.provisional + and any(sid in uncovered for sid in c.source_section_ids) + ] + provisional_pool.sort( + key=lambda c: (c.score, len(c.source_section_ids)), + reverse=True, + ) + for c in provisional_pool: + if any(sid in covered for sid in c.source_section_ids): + continue + selected.append(c) + covered.update(c.source_section_ids) + return selected @@ -740,7 +804,9 @@ def select_layout_preset(units: list[CompositionUnit]) -> Optional[str]: def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, allowed_statuses: set[str], capacity_fit_fn=None, - v4_candidates_lookup_fn=None) -> tuple[list[CompositionUnit], Optional[str], dict]: + v4_candidates_lookup_fn=None, + *, + allow_provisional_fill: bool = False) -> tuple[list[CompositionUnit], Optional[str], dict]: """Composition planner v0.2 entry. v0.2 변경 : @@ -753,6 +819,14 @@ def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, logic 변화 X — 단일 frame_template_id / frame_id / label / confidence 는 그대로. runtime 결과 무변. Step 9 application_plan input 위한 schema 확장. + IMP-30 u3 — last-resort provisional fill (opt-in, default off): + ``allow_provisional_fill`` is plumbed to select_composition_units(). + When True, uncovered sections receive a provisional fill from candidates + whose backing V4Match was synthesized via ``allow_provisional=True`` + (IMP-30 u1). ``_candidate_state`` returns ``selected_provisional`` for + those filled units so the debug summary distinguishes greedy selections + from provisional fills. Default False keeps IMP-05 behavior identical. + v0.1 / v0.1.1 동작 (유지) : - parent_merged_inferred candidate 생성 (parent V4 없어도) - review 개념 X. auto_selectable + filter_reasons 만으로 자동 결정 @@ -771,11 +845,22 @@ def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, ) scored_all = [score_candidate(c) for c in candidates] - units = select_composition_units(candidates, allowed_statuses) + units = select_composition_units( + candidates, + allowed_statuses, + all_section_ids=[s.section_id for s in sections] if allow_provisional_fill else None, + allow_provisional_fill=allow_provisional_fill, + ) preset = select_layout_preset(units) def _candidate_state(c: CompositionUnit) -> str: if c in units: + # IMP-30 u3 — provisional-fill units surface as a distinct state so + # downstream debug consumers can tell greedy selection apart from + # last-resort fill. unit.provisional flows from u1 (V4Match + # synthesis) → u2 (CompositionUnit propagation). + if c.provisional: + return "selected_provisional" return "selected" if c.phase_z_status not in allowed_statuses: return "filtered_status" # V4 label → status not auto-renderable diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 8e450be..42233a1 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -176,6 +176,12 @@ class V4Match: v4_rank: Optional[int] = None selection_path: str = "rank_1" fallback_reason: Optional[str] = None + # IMP-30 u1 — provisional first-render flag. True when the selector + # synthesizes a rank-1 V4 candidate after chain_exhausted because the + # opt-in allow_provisional kwarg was set. Default False keeps IMP-05 + # behavior byte-identical; downstream surfaces this for zone-level + # "needs adaptation" marking without altering V4 evidence. + provisional: bool = False def to_phase_z_status(match: V4Match) -> str: @@ -585,11 +591,26 @@ def lookup_v4_match_with_fallback( raw_content: Optional[str] = None, max_rank: int = 3, alias_keys: Optional[list] = None, + allow_provisional: bool = False, ) -> tuple[Optional[V4Match], dict]: """Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable. This is an IMP-05 selector only. It uses existing V4 labels, frame-contract presence, and the Phase Z capacity precheck; it does not call calculate_fit. + + IMP-30 u1 — when ``allow_provisional=True`` and the rank-1..max_rank chain + is exhausted (no candidate passes MVP1 filter + contract + capacity), the + selector synthesizes a *provisional* V4Match from the rank-1 judgment so + the first-render invariant can be satisfied downstream. The synthesized + match carries ``provisional=True``, ``selection_path="provisional_rank_1"``, + and ``fallback_reason`` mirrors the existing chain-exhaust reason. The + candidate trace shape is unchanged (synthetic injection only updates the + top-level ``selection_path`` + ``selected_*`` mirrors). When the rank-1 + judgment itself is missing (``empty_v4_judgments`` / ``no_v4_section``), + no provisional is synthesized — the caller (u3 / u4) handles those cases + with a placeholder zone or empty-shell. + + Default ``allow_provisional=False`` keeps the IMP-05 behavior byte-identical. """ resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys) sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None @@ -692,6 +713,32 @@ def lookup_v4_match_with_fallback( trace["selection_path"] = "chain_exhausted" trace["fallback_reason"] = first_skip_reason or "no_auto_renderable_rank_1_to_3" + + # IMP-30 u1 — opt-in provisional first-render synthesis. When the caller + # signals allow_provisional, promote rank-1 judgment as a provisional + # match so downstream composition can satisfy the first-render invariant. + # Top-level mirrors (selection_path / selected_*) are updated; candidate + # trace entries are left intact (their skip reasons remain accurate). + # Default-off keeps IMP-05 behavior byte-identical. + if allow_provisional: + rank_1_judgment = judgments[0] + provisional_match = _v4_match_from_judgment( + section_id, rank_1_judgment, rank=1 + ) + provisional_match.selection_path = "provisional_rank_1" + provisional_match.fallback_reason = trace["fallback_reason"] + provisional_match.provisional = True + trace.update({ + "selection_path": "provisional_rank_1", + "selected_rank": 1, + "selected_template_id": provisional_match.template_id, + "selected_frame_id": provisional_match.frame_id, + "selected_label": provisional_match.label, + "fallback_used": True, + "provisional": True, + }) + return provisional_match, trace + return None, trace @@ -2437,6 +2484,9 @@ def compute_slide_status(sections: list[MdxSection], - full_mdx_coverage : aligned 된 모든 section_id 가 어떤 selected unit 에 의해 covered - adapter_needed_count : mapper FitError 로 자동 렌더 못 한 unit 수 (별 review 개념 X — 자동 실패 보고) - content_truncated_count : builder 가 truncate 한 zone 수 (informational) + - provisional_first_render_count : IMP-30 first-render invariant 로 합성된 unit 수 + (u1 V4Match synthesis / u3 last-resort fill / + u4 empty-shell — needs user/AI adaptation 신호) overall enum : PASS — visual OK + full coverage + adapter_needed=0 @@ -2444,6 +2494,8 @@ def compute_slide_status(sections: list[MdxSection], PARTIAL_COVERAGE — 일부 section 필터됨, 렌더된 부분만 visual OK PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION — 둘 다 (adapter_needed > 0 시 status note 추가, overall 은 위 enum 사용) + (IMP-30 u6 : provisional_first_render_count 도 qualifier 일 뿐, overall enum 변경 X. + Stage 1 Q3 + Codex #10 D4 lock.) """ aligned_ids = [s.section_id for s in sections] covered = set() @@ -2555,6 +2607,29 @@ def compute_slide_status(sections: list[MdxSection], _fallback_selection_count = _v4_fb_summary.get("fallback_selection_count", 0) _selection_paths = _v4_fb_summary.get("selection_paths", []) + # IMP-30 u6 — Step 20 additive qualifier fields for the first-render invariant. + # provisional_first_render_count = number of selected units whose .provisional + # flag is True (set by u1 V4Match synthesis → u2 CompositionUnit propagation, + # u3 last-resort fill, or u4 empty-shell synthesis). The list mirrors the shape + # of fallback_selections / adapter_needed_units for symmetry. Top-level overall + # enum stays unchanged per IMP-05 Codex #10 D4 + Stage 1 Q3 decision: this + # signal is a qualifier, not a new failure class. Defensive getattr keeps the + # function safe when units come from legacy code paths predating u2. + provisional_first_render_units: list[dict] = [] + for u in units: + if not getattr(u, "provisional", False): + continue + provisional_first_render_units.append({ + "source_section_ids": list(getattr(u, "source_section_ids", []) or []), + "phase_z_status": getattr(u, "phase_z_status", None), + "frame_template_id": getattr(u, "frame_template_id", None), + "frame_id": getattr(u, "frame_id", None), + "label": getattr(u, "label", None), + "selection_path": getattr(u, "selection_path", None), + "fallback_reason": getattr(u, "fallback_reason", None), + "v4_rank": getattr(u, "v4_rank", None), + }) + return { "rendered": True, "visual_check_passed": visual_passed, @@ -2574,12 +2649,17 @@ def compute_slide_status(sections: list[MdxSection], "adapter_needed_units": adapter_needed_units, "content_truncated_count": len(content_truncated), "content_truncated_units": content_truncated, + # IMP-30 u6 — additive provisional qualifiers (overall enum unchanged). + "provisional_first_render_count": len(provisional_first_render_units), + "provisional_first_render_units": provisional_first_render_units, "overall": overall, "note": ( "자동 파이프라인 결과 보고. review/UI 개념 X. final.html 파일명 != PASS 의미. " "overall == PASS 는 visual OK + full coverage + adapter_needed=0 일 때만. " "adapter_needed_count > 0 = mapper 가 contract 와 안 맞아 자동 렌더 못 한 zone 존재. " - "content_truncated_count > 0 = builder 가 truncate 한 zone 존재 (rendered 됐지만 일부 콘텐츠 손실)." + "content_truncated_count > 0 = builder 가 truncate 한 zone 존재 (rendered 됐지만 일부 콘텐츠 손실). " + "provisional_first_render_count > 0 = IMP-30 first-render invariant 가 작동한 unit 존재 " + "(empty_shell / chain_exhausted_provisional / 등 — needs user/AI adaptation)." ), } @@ -3154,25 +3234,145 @@ def run_phase_z2_mvp1( units = plan_units if not units or layout_preset is None: - # composition planner 결과 = 0 units. Sections 가 모두 V4 lookup 실패 또는 - # status filter 통과 못 함. error.json 기록 후 abort. - run_dir.mkdir(parents=True, exist_ok=True) - error_data = { - "stage": "composition_planner", - "reason": ( - "Composition planner v0 selected 0 viable units. " - f"Either no V4 entries for any section, or all candidates filtered out by " - f"allowed_statuses={sorted(MVP1_ALLOWED_STATUSES)}." - ), - "aligned_section_ids": [s.section_id for s in sections], - "composition_debug": comp_debug, - } - err_path = run_dir / "error.json" - err_path.write_text(json.dumps(error_data, ensure_ascii=False, indent=2), encoding="utf-8") - print(f"\n[Phase Z-2 MVP-1.5b] ABORT @ composition_planner", file=sys.stderr) - print(f" reason : 0 viable units after composition v0", file=sys.stderr) - print(f" error : {err_path}", file=sys.stderr) - sys.exit(1) + # IMP-30 u4 — first-render invariant. The pre-u4 path here was + # `sys.exit(1)` after writing error.json. That violated the invariant + # ("final.html + Step 20 slide_status MUST be written for every input + # where Step 0~5 succeed") whenever V4 evidence for any section was + # restructure/reject (chain_exhausted) or missing (no_v4_section / + # empty_v4_judgments). + # + # Recovery has two phases: + # Phase A — provisional retry (u1 + u3 opt-in). Re-run plan_composition + # with allow_provisional=True (in lookup_fn) and allow_provisional_fill + # =True. Synthesizes rank-1 provisional V4Match on chain_exhausted + # (u1) and last-resort-fills uncovered sections with provisional + # candidates (u3). Skipped when the CLI override path was used — + # re-running plan_composition there would discard the override. + # Phase B — terminal empty-shell. If retry still yields zero units + # (true "no rank-1 V4 anywhere" case, or override path with no + # resolvable assignments), synthesize a single placeholder + # CompositionUnit with frame_template_id="__empty__", layout_preset + # ="single". The per-unit loop's __empty__ guard emits a placeholder + # zones_data / debug_zones record; final.html renders the slide + # base shell (title + footer + empty zone) so the first-render + # invariant holds. Provisional flag = True surfaces the "needs + # adaptation" signal (u5 zone class + u6 status qualifier). + provisional_recovered = False + if section_assignment_plan is None: + def _lookup_fn_provisional(sid: str) -> Optional[V4Match]: + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + max_rank=3, + alias_keys=section_alias_by_id.get(sid), + allow_provisional=True, + ) + v4_fallback_traces[sid] = trace + return match + + units_retry, layout_preset_retry, comp_debug_retry = plan_composition( + sections, + _lookup_fn_provisional, + V4_LABEL_TO_PHASE_Z_STATUS, + MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + allow_provisional_fill=True, + ) + comp_debug["imp30_u4_provisional_retry"] = { + "applied": True, + "result_unit_count": len(units_retry), + "result_layout_preset": layout_preset_retry, + "candidates_summary": comp_debug_retry.get("candidates_summary"), + } + if units_retry and layout_preset_retry is not None: + units = units_retry + layout_preset = layout_preset_retry + provisional_recovered = True + # v4_fallback_traces was overwritten by _lookup_fn_provisional; + # refresh the IMP-05 selection_paths telemetry so Step 20 reflects + # the actual selection (provisional_rank_1) rather than the stale + # chain_exhausted state from the first attempt. + _imp05_selection_paths_retry = [ + { + "section_id": sid, + "selection_path": t.get("selection_path"), + "selected_rank": t.get("selected_rank"), + "selected_template_id": t.get("selected_template_id"), + "fallback_trigger": ( + t.get("fallback_reason") if t.get("fallback_used") else None + ), + } + for sid, t in v4_fallback_traces.items() + ] + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + if "v4_fallback_summary" in comp_debug: + comp_debug["v4_fallback_summary"]["selection_paths"] = ( + _imp05_selection_paths_retry + ) + print( + f" [IMP-30 u4] provisional retry recovered {len(units)} unit(s) " + f"— first-render invariant preserved.", + file=sys.stderr, + ) + + if not provisional_recovered: + # Phase B — terminal empty-shell. No rank-1 V4 evidence for any + # section, or override path produced no renderable assignments. + from src.phase_z2_composition import CompositionUnit as _CompositionUnit + run_dir.mkdir(parents=True, exist_ok=True) + empty_shell_unit = _CompositionUnit( + source_section_ids=[s.section_id for s in sections], + merge_type="empty_shell", + frame_template_id="__empty__", + frame_id="__empty__", + frame_number=0, + confidence=0.0, + label="empty_shell", + phase_z_status="empty_shell", + raw_content="\n\n".join((s.raw_content or "") for s in sections), + title=" / ".join((s.title or "") for s in sections), + v4_rank=None, + selection_path="empty_shell", + fallback_reason="no_v4_rank_1_for_any_section", + score=0.0, + rationale={ + "imp30_u4": "terminal_first_render_empty_shell", + "reason": ( + "no_rank_1_V4_evidence_in_any_section" + if section_assignment_plan is None + else "section_assignment_override_yielded_no_renderable_units" + ), + "aligned_section_ids": [s.section_id for s in sections], + }, + provisional=True, + ) + units = [empty_shell_unit] + layout_preset = "single" + comp_debug["imp30_u4_empty_shell"] = { + "applied": True, + "reason": ( + "no_rank_1_V4_for_any_section" + if section_assignment_plan is None + else "section_assignment_override_yielded_no_renderable_units" + ), + "aligned_section_ids": [s.section_id for s in sections], + } + print( + f"\n[Phase Z-2 IMP-30 u4] EMPTY-SHELL @ composition_planner", + file=sys.stderr, + ) + print( + f" reason : " + f"{'no rank-1 V4 evidence for any section' if section_assignment_plan is None else 'override produced no renderable units'}", + file=sys.stderr, + ) + print( + f" shell : 1 placeholder unit, preset='single' " + f"(sections={[s.section_id for s in sections]})", + file=sys.stderr, + ) print(f" preset : {layout_preset} ({len(units)} units, composition v0 count-based)") for u in units: @@ -3345,6 +3545,63 @@ def run_phase_z2_mvp1( # and is byte-identical to plan_record.position in the normal case. if plan_record is not None and plan_record.get("position"): position = plan_record["position"] + + # IMP-30 u4 — empty-shell synthesized unit. frame_template_id="__empty__" + # has no catalog contract by design; bypass mapper/contract path and emit + # a placeholder zone record so render_slide() short-circuits to empty + # partial_html (existing `__empty__` branch at render_slide:2106). The + # slide_base still renders title + footer + empty grid cell so the + # first-render invariant holds; u5 will surface the provisional flag as + # a zone class + needs-adaptation badge. + if unit.frame_template_id == "__empty__": + zones_data.append({ + "position": position, + "template_id": "__empty__", + "slot_payload": {}, + "content_weight": {"score": 0}, + "min_height_px": 0, + "assignment_source": "imp30_u4_empty_shell", + "section_assignment_override": False, + "provisional": bool(getattr(unit, "provisional", False)), + }) + debug_zones.append({ + "position": position, + "source_section_ids": list(unit.source_section_ids), + "merge_type": unit.merge_type, + "title": unit.title, + "v4_rank1_frame_id": unit.frame_id, + "v4_rank1_frame_number": unit.frame_number, + "v4_template_id": "__empty__", + "v4_label": unit.label, + "v4_confidence": float(unit.confidence or 0.0), + "v4_selected_rank": unit.v4_rank, + "selection_path": unit.selection_path, + "fallback_reason": unit.fallback_reason, + "fallback_used": False, + "phase_z_status": unit.phase_z_status, + "composition_score": float(unit.score or 0.0), + "composition_rationale": dict(unit.rationale or {}), + "composition_notes": list(unit.notes), + "mapper_type": "empty_shell", + "contract_id": "__empty__", + "contract_frame_id": None, + "builder": None, + "min_height_px": 0, + "slot_payload_keys": [], + "content_truncated_count": None, + "assets_dir": None, + "content_weight": {"score": 0}, + "placement_trace": None, + "assignment_source": "imp30_u4_empty_shell", + "section_assignment_override": False, + "replaced_auto_unit": None, + "skipped_collided_auto_units": [], + "uncovered_section_ids": [], + "skipped_reason": "imp30_u4_empty_shell_no_v4_evidence", + "provisional": bool(getattr(unit, "provisional", False)), + }) + continue + synth_section = MdxSection( section_id="+".join(unit.source_section_ids), section_num=0, @@ -3470,6 +3727,12 @@ def run_phase_z2_mvp1( plan_record.get("skipped_reason") if plan_record else None ) + # IMP-30 u5 — `provisional` flag flows as data through V4Match (u1) → + # CompositionUnit (u2) → zones_data here. slide_base.html reads + # zone.provisional to apply the `zone--provisional` class + inline + # needs-adaptation badge. Default False keeps non-provisional zones + # byte-identical to pre-u5; only u1-synthesized rank-1 fills or u4 + # empty-shell synthesize provisional=True units. zones_data.append({ "position": position, "template_id": unit.frame_template_id, @@ -3478,6 +3741,7 @@ def run_phase_z2_mvp1( "min_height_px": min_height_px, "assignment_source": plan_assignment_source, "section_assignment_override": plan_section_override, + "provisional": bool(getattr(unit, "provisional", False)), }) debug_zones.append({ "position": position, @@ -3515,6 +3779,8 @@ def run_phase_z2_mvp1( "skipped_collided_auto_units": plan_skipped_collided, "uncovered_section_ids": plan_uncovered, "skipped_reason": plan_skipped_reason, + # IMP-30 u5 — provisional signal mirror for debug.json consumers. + "provisional": bool(getattr(unit, "provisional", False)), }) # IMP-06 blocker-fix (Codex #10 Catch N) — append explicit empty zone records diff --git a/templates/phase_z2/slide_base.html b/templates/phase_z2/slide_base.html index 1c95fbe..8c8485b 100644 --- a/templates/phase_z2/slide_base.html +++ b/templates/phase_z2/slide_base.html @@ -114,6 +114,43 @@ min-height: 0; } + /* ── IMP-30 u5 : provisional zone marker (first-render invariant) ── + When V4 rank-1 candidate falls outside MVP1_ALLOWED_STATUSES (chain_exhausted) + the pipeline still renders the rank-1 frame so the first-render invariant + holds, but the zone is tagged `provisional` so the user/AI can adapt later + (IMP-31). Visual contract: + - dashed amber border + striped wash → "needs adaptation" at a glance + - inline badge top-right → text label for non-color-perceiving readers + MDX content is preserved as-is; no shrink, no rewrite. */ + .zone--provisional { + outline: 2px dashed #b8860b; + outline-offset: -2px; + background-image: repeating-linear-gradient( + 45deg, + rgba(184, 134, 11, 0.04) 0, + rgba(184, 134, 11, 0.04) 8px, + transparent 8px, + transparent 16px + ); + } + .zone--provisional .zone__needs-adaptation-badge { + position: absolute; + top: 4px; + right: 4px; + z-index: 10; + padding: 2px 6px; + background: #b8860b; + color: #fff; + font-size: 9px; + font-weight: 700; + line-height: 1.2; + letter-spacing: 0.04em; + border-radius: 2px; + text-transform: uppercase; + pointer-events: none; + box-shadow: 0 1px 2px rgba(0, 0, 0, 0.15); + } + /* ── Frame-family text layout contract (shared, reusable) ── feedback-1 (mvp1.5b_test7): visible improvement 강화. Stronger hanging indent + breathing line spacing + visible hierarchy. */ @@ -264,7 +301,8 @@
{% for zone in zones %} -
+
+ {% if zone.provisional %}needs adaptation{% endif %} {{ zone.partial_html | safe }}
{% endfor %} diff --git a/tests/orchestrator_unit/test_imp17_comment_anchor.py b/tests/orchestrator_unit/test_imp17_comment_anchor.py index 015dabc..1af6970 100644 --- a/tests/orchestrator_unit/test_imp17_comment_anchor.py +++ b/tests/orchestrator_unit/test_imp17_comment_anchor.py @@ -4,6 +4,10 @@ Stage 1 finding: line 564 previously referenced a non-existent ID ("IMP-31"). The legitimate slot is IMP-17 (Gitea #17, carve-out — AI fallback only, normal path 밖). Line 565 (IMP-29 frontend zone-level override) must remain untouched. +Anchor re-pin (2026-05-20, IMP-30 u1 follow-up): V4Match.provisional field added at +src/phase_z2_pipeline.py:179-184 shifted the route-hint table down by six lines. +Pinned line numbers updated from 564/565 → 570/571 to track the actual anchor location. + Run: pytest -q tests/orchestrator_unit/test_imp17_comment_anchor.py """ from pathlib import Path @@ -16,14 +20,14 @@ def _lines() -> list[str]: return PIPELINE.read_text(encoding="utf-8").splitlines() -def test_line_564_references_imp17_not_imp31(): - line = _lines()[563] # 1-indexed line 564 - assert "restructure" in line, f"line 564 anchor drifted: {line!r}" - assert "IMP-17" in line, f"line 564 must reference IMP-17 (carve-out): {line!r}" - assert "IMP-31" not in line, f"line 564 must not reference non-existent IMP-31: {line!r}" +def test_line_570_references_imp17_not_imp31(): + line = _lines()[569] # 1-indexed line 570 + assert "restructure" in line, f"line 570 anchor drifted: {line!r}" + assert "IMP-17" in line, f"line 570 must reference IMP-17 (carve-out): {line!r}" + assert "IMP-31" not in line, f"line 570 must not reference non-existent IMP-31: {line!r}" -def test_line_565_still_references_imp29(): - line = _lines()[564] # 1-indexed line 565 - assert "reject" in line, f"line 565 anchor drifted: {line!r}" - assert "IMP-29" in line, f"line 565 must still reference IMP-29 frontend override: {line!r}" +def test_line_571_still_references_imp29(): + line = _lines()[570] # 1-indexed line 571 + assert "reject" in line, f"line 571 anchor drifted: {line!r}" + assert "IMP-29" in line, f"line 571 must still reference IMP-29 frontend override: {line!r}" diff --git a/tests/test_phase_z2_imp30_first_render.py b/tests/test_phase_z2_imp30_first_render.py new file mode 100644 index 0000000..fee5e4f --- /dev/null +++ b/tests/test_phase_z2_imp30_first_render.py @@ -0,0 +1,1557 @@ +"""IMP-30 first-render invariant tests (per-unit slice). + +This file is the shared regression home for IMP-30 units u2~u7. Each +implementation unit adds its own focused tests; u7 (regression coverage) +will broaden the surface (synthetic V4 fixtures for chain_exhausted +provisional, zero-V4 empty-shell, normal-path unchanged). + +u3 scope (this slice) — select_composition_units last-resort provisional +fill for uncovered sections + _candidate_state "selected_provisional": + 1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard). + 2. opt-in fills uncovered sections with provisional candidates whose + phase_z_status would otherwise be filter_status. + 3. opt-in never displaces normal greedy selections. + 4. opt-in respects coverage non-overlap (no section selected twice). + 5. plan_composition._candidate_state returns "selected_provisional" + for fills and "selected" for normal greedy picks. + +Synthetic naming convention (Codex #10 E1): + - MOCK_ prefix mandatory + - _a / _b suffixes = enumeration only (NOT ordering / priority) + - rank/order expressed by V4 rank field, NEVER ID suffix +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +from src.phase_z2_composition import ( + CompositionUnit, + plan_composition, + select_composition_units, +) + + +# ─── Synthetic match shape (duck-typed V4Match-like) ─────────────────── + +@dataclass +class _StubV4Match: + """Duck-typed V4Match surface used by collect_candidates / score path. + + Mirrors src.phase_z2_pipeline.V4Match fields touched by composition: + template_id / frame_id / frame_number / confidence / label / v4_rank / + selection_path / fallback_reason / provisional. Composition module + intentionally does not import V4Match (circular dep avoidance), so a + plain stub object with the same attributes is the contract. + """ + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + v4_rank: Optional[int] = None + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + provisional: bool = False + + +@dataclass +class _StubSection: + """Minimal section surface used by collect_candidates (section_id / + raw_content / title). Matches MdxSection's attribute names without + importing pipeline (keeps test isolated to composition module).""" + section_id: str + title: str = "" + raw_content: str = "" + + +# Phase Z status mapping fixture — only the keys exercised here are listed. +# Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this +# stub deliberately mirrors only what the tests touch. +_LABEL_TO_STATUS = { + "use_as_is": "matched_zone", + "light_edit": "adapt_matched_zone", + "restructure": "extract_matched_zone", + "reject": "fallback_candidate", +} + +_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"} + + +# ─── Helpers ──────────────────────────────────────────────────────────── + +def _make_lookup(matches_by_section: dict[str, _StubV4Match]): + """Return v4_lookup_fn (section_id -> _StubV4Match | None).""" + def _fn(section_id: str): + return matches_by_section.get(section_id) + return _fn + + +def _make_candidates_lookup_empty(): + """v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here).""" + def _fn(section_id: str): + return [] + return _fn + + +# ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ───────── + +def test_u3_default_off_preserves_imp05_behavior(): + """IMP-05 regression guard. With allow_provisional_fill=False (default), + select_composition_units must yield the same units as pre-u3 even when + provisional candidates exist in the pool. + + Setup: + - S1: use_as_is + provisional=False (normal selection) + - S2: restructure + provisional=True (would be fill-eligible) + Expected (default-off): + - units = [S1 unit] only. S2 stays uncovered. + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", frame_number=1, + confidence=0.9, label="use_as_is", v4_rank=1, + ), + "S2": _StubV4Match( + template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", frame_number=2, + confidence=0.65, label="restructure", v4_rank=1, + selection_path="provisional_rank_1", + fallback_reason="phase_z_status_not_allowed:extract_matched_zone", + provisional=True, + ), + } + units, preset, debug = plan_composition( + sections, + _make_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + # allow_provisional_fill omitted → default False + ) + assert len(units) == 1 + assert units[0].source_section_ids == ["S1"] + assert units[0].provisional is False + assert preset == "single" + # S2 candidate must still appear in debug summary as filtered_status + summary_by_section = { + tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] + } + assert summary_by_section[("S2",)]["selection_state"] == "filtered_status" + + +# ─── u3 case 2 : opt-in fills uncovered sections with provisional ────── + +def test_u3_opt_in_fills_uncovered_with_provisional(): + """IMP-30 u3 — opt-in path. + + Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but + with allow_provisional_fill=True. S2 must be filled as + selected_provisional unit; greedy S1 selection unchanged. + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", frame_number=1, + confidence=0.9, label="use_as_is", v4_rank=1, + ), + "S2": _StubV4Match( + template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", frame_number=2, + confidence=0.65, label="restructure", v4_rank=1, + selection_path="provisional_rank_1", + fallback_reason="phase_z_status_not_allowed:extract_matched_zone", + provisional=True, + ), + } + units, preset, debug = plan_composition( + sections, + _make_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + allow_provisional_fill=True, + ) + # Both sections must be covered now + section_ids = {sid for u in units for sid in u.source_section_ids} + assert section_ids == {"S1", "S2"} + # Identify which unit covers which section + by_section = {tuple(u.source_section_ids): u for u in units} + s1_unit = by_section[("S1",)] + s2_unit = by_section[("S2",)] + # Normal greedy pick — provisional flag stays False + assert s1_unit.provisional is False + # Provisional fill — provisional flag carried from V4Match (u1) via u2 + assert s2_unit.provisional is True + assert s2_unit.label == "restructure" + # Layout preset reflects 2-unit count + assert preset == "horizontal-2" + + +# ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─ + +def test_u3_candidate_state_marks_selected_provisional(): + """plan_composition._candidate_state must return: + - "selected" for normal greedy picks + - "selected_provisional" for last-resort fills + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", frame_number=1, + confidence=0.9, label="use_as_is", v4_rank=1, + ), + "S2": _StubV4Match( + template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", frame_number=2, + confidence=0.65, label="restructure", v4_rank=1, + selection_path="provisional_rank_1", + provisional=True, + ), + } + units, preset, debug = plan_composition( + sections, + _make_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + allow_provisional_fill=True, + ) + summary_by_section = { + tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] + } + assert summary_by_section[("S1",)]["selection_state"] == "selected" + assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional" + + +# ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ───── + +def test_u3_opt_in_respects_coverage_non_overlap(): + """Provisional fill must not pick a candidate whose source_section_ids + overlap with already-covered sections. + + Setup: + - S1 use_as_is (normal selection) + - S2 restructure provisional (eligible for fill) + - parent_merged_inferred over [S1, S2] with provisional=True + (synthetic — would normally not exist, but stresses non-overlap) + + With allow_provisional_fill=True, the [S1,S2] provisional merge must + NOT be selected (S1 already covered by normal pick). + """ + # 2 children with derive_parent_id → "S" parent. But derive_parent_id + # only triggers on "-sub-" or "-.". Use the + # canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible). + sections = [ + _StubSection("P-sub-1", raw_content="alpha"), + _StubSection("P-sub-2", raw_content="beta"), + ] + matches = { + "P-sub-1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", frame_number=1, + confidence=0.9, label="use_as_is", v4_rank=1, + ), + "P-sub-2": _StubV4Match( + template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", frame_number=2, + confidence=0.65, label="restructure", v4_rank=1, + provisional=True, + ), + # No parent V4 → branch 3 may synthesize parent_merged_inferred + # if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1 + # (higher confidence) → rep_match.provisional=False, so the inferred + # merge is NOT provisional. The normal greedy pass should prefer + # the single P-sub-1 (same score, but inferred merge has coverage + # tiebreak win). Test asserts: covered set is exact, no double-fill. + } + units, preset, debug = plan_composition( + sections, + _make_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + allow_provisional_fill=True, + ) + covered = [] + for u in units: + covered.extend(u.source_section_ids) + # No section appears twice — non-overlap invariant + assert len(covered) == len(set(covered)) + # Both sections covered exactly once + assert set(covered) == {"P-sub-1", "P-sub-2"} + + +# ─── u3 case 5 : opt-in with no provisional candidates is a no-op ────── + +def test_u3_opt_in_noop_when_no_provisional_candidates(): + """allow_provisional_fill=True with zero provisional candidates must + behave identically to default-off. No fill is forced; uncovered sections + simply remain uncovered (u4 owns the zero-unit empty-shell terminal). + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", frame_number=1, + confidence=0.9, label="use_as_is", v4_rank=1, + ), + # S2: restructure but NOT provisional (e.g., pipeline did not opt + # into u1 allow_provisional, or section had real rank-1 restructure) + "S2": _StubV4Match( + template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", frame_number=2, + confidence=0.65, label="restructure", v4_rank=1, + provisional=False, + ), + } + units, preset, debug = plan_composition( + sections, + _make_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + allow_provisional_fill=True, + ) + assert len(units) == 1 + assert units[0].source_section_ids == ["S1"] + assert preset == "single" + # S2 remains filter_status — not provisional, so u3 fill ignores it + summary_by_section = { + tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] + } + assert summary_by_section[("S2",)]["selection_state"] == "filtered_status" + + +# ─── u3 case 6 : select_composition_units direct invocation parity ───── + +def test_u3_select_composition_units_default_off_signature(): + """Direct invocation without keyword-only u3 args must remain valid + (backward-compat for existing callers that import the function directly). + """ + # Build a minimal CompositionUnit by hand — bypass collect_candidates. + c1 = CompositionUnit( + source_section_ids=["S1"], + merge_type="single", + frame_template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", + frame_number=1, + confidence=0.9, + label="use_as_is", + phase_z_status="matched_zone", + raw_content="alpha", + title="S1", + ) + units = select_composition_units([c1], _ALLOWED_STATUSES) + assert len(units) == 1 + assert units[0].source_section_ids == ["S1"] + + +def test_u3_select_composition_units_opt_in_direct(): + """Direct invocation with u3 opt-in must fill uncovered section from + provisional candidate pool, leaving greedy pick untouched. + """ + c_greedy = CompositionUnit( + source_section_ids=["S1"], + merge_type="single", + frame_template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", + frame_number=1, + confidence=0.9, + label="use_as_is", + phase_z_status="matched_zone", + raw_content="alpha", + title="S1", + ) + c_provisional = CompositionUnit( + source_section_ids=["S2"], + merge_type="single", + frame_template_id="MOCK_template_restructure_a", + frame_id="MOCK_frame_002", + frame_number=2, + confidence=0.65, + label="restructure", + phase_z_status="extract_matched_zone", + raw_content="beta", + title="S2", + provisional=True, + ) + units = select_composition_units( + [c_greedy, c_provisional], + _ALLOWED_STATUSES, + all_section_ids=["S1", "S2"], + allow_provisional_fill=True, + ) + assert len(units) == 2 + by_section = {tuple(u.source_section_ids): u for u in units} + assert by_section[("S1",)].provisional is False + assert by_section[("S2",)].provisional is True + + +# ════════════════════════════════════════════════════════════════════════ +# u4 — pipeline abort guard empty-shell synthesis +# ════════════════════════════════════════════════════════════════════════ +# +# u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort +# guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3) +# then terminal empty-shell (Phase B). The shell is a single CompositionUnit +# with frame_template_id="__empty__" and preset="single"; the per-unit +# for-loop's __empty__ branch bypasses mapper/contract and emits a +# placeholder zones_data/debug_zones record so final.html still writes. +# +# These tests verify the composition-side invariants that u4 relies on: +# - CompositionUnit can be constructed in the empty-shell shape. +# - The shell shape carries the data needed for u5 (provisional flag) / +# u6 (status qualifier) / render_slide __empty__ branch (template_id). +# The pipeline-level integration (provisional retry / empty-shell synthesis +# at the abort guard, plus the per-unit __empty__ bypass) is covered by +# u7 (regression coverage) with synthetic V4 fixtures. + + +def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis(): + """The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort + guard must carry the field shape downstream consumers (per-unit + __empty__ branch, compute_slide_status, slide_base template) rely on. + + Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block): + - frame_template_id == "__empty__" → render_slide short-circuits + partial_html to "" (existing __empty__ branch at line 2106). + - phase_z_status == "empty_shell" → Step 20 distinguishes from + matched_zone / adapt_matched_zone / extract_matched_zone / + fallback_candidate (u6 surfaces this as additive qualifier). + - provisional == True → u5 zone--provisional class + needs-adaptation + badge (template-side wiring). + - source_section_ids covers all aligned section ids → compute_slide_status + treats every section as "covered by the shell" (u6 marks the count + of provisional_first_render_units). + - selection_path == "empty_shell" / fallback_reason set → audit trace + survives in step06_composition_plan.json. + """ + aligned_section_ids = ["S1", "S2", "S3"] + raw_contents = ["alpha", "beta", "gamma"] + titles = ["First", "Second", "Third"] + + shell = CompositionUnit( + source_section_ids=list(aligned_section_ids), + merge_type="empty_shell", + frame_template_id="__empty__", + frame_id="__empty__", + frame_number=0, + confidence=0.0, + label="empty_shell", + phase_z_status="empty_shell", + raw_content="\n\n".join(raw_contents), + title=" / ".join(titles), + v4_rank=None, + selection_path="empty_shell", + fallback_reason="no_v4_rank_1_for_any_section", + score=0.0, + rationale={ + "imp30_u4": "terminal_first_render_empty_shell", + "reason": "no_rank_1_V4_evidence_in_any_section", + "aligned_section_ids": aligned_section_ids, + }, + provisional=True, + ) + + assert shell.frame_template_id == "__empty__" + assert shell.frame_id == "__empty__" + assert shell.label == "empty_shell" + assert shell.phase_z_status == "empty_shell" + assert shell.provisional is True + assert shell.selection_path == "empty_shell" + assert shell.fallback_reason == "no_v4_rank_1_for_any_section" + assert shell.source_section_ids == aligned_section_ids + assert shell.v4_rank is None + assert shell.confidence == 0.0 + assert shell.score == 0.0 + # MDX content preserved (no rewrite) — full raw content kept in the unit + # even though no V4 mapping is applied. Adaptation deferred to IMP-31. + assert shell.raw_content == "alpha\n\nbeta\n\ngamma" + # Rationale carries the audit trail consumed by Step 6 artifact + u6. + assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell" + assert shell.rationale["aligned_section_ids"] == aligned_section_ids + + +def test_u4_empty_shell_unit_default_provisional_is_false(): + """Smoke test — provisional flag is opt-in. A plain CompositionUnit + (no explicit provisional=True) does NOT mark itself as empty-shell. + Guards against accidental positive on normal units when u5 / u6 read + unit.provisional. + """ + normal = CompositionUnit( + source_section_ids=["S1"], + merge_type="single", + frame_template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", + frame_number=1, + confidence=0.9, + label="use_as_is", + phase_z_status="matched_zone", + raw_content="alpha", + title="S1", + ) + assert normal.provisional is False + assert normal.frame_template_id != "__empty__" + + +def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed(): + """The empty-shell unit's phase_z_status ('empty_shell') must NOT be + inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over + units filtered by allowed_statuses would treat the shell as a normal + matched zone — defeating the "needs adaptation" signal. + + This test pins the contract at the composition-test level so a status + rename in the pipeline cannot silently leak the shell into normal flows. + """ + # _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES + # ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct + # status so downstream filters reject it. + assert "empty_shell" not in _ALLOWED_STATUSES + + +# ════════════════════════════════════════════════════════════════════════ +# u5 — zones_data carries provisional flag; slide_base.html zone div adds +# zone--provisional class + inline needs-adaptation badge +# ════════════════════════════════════════════════════════════════════════ +# +# u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in +# u1, or directly by u4 empty-shell synthesis) through the zones_data payload +# into the slide_base.html template. Visual contract: +# - zones_data[i]['provisional'] = bool (default False; True only for IMP-30 +# opt-in synthesized units). +# - slide_base.html zone div gets `zone--provisional` class when True; an +# inline `needs adaptation` +# element is rendered inside the zone (top-right corner via absolute pos). +# - data-provisional="1" attribute set for downstream selectors / overflow +# checker / e2e tooling. +# +# The composition / pipeline-level handoff is exercised by u3 / u4 already. +# u5 tests focus on: +# - template-rendering output: class + badge HTML correctly emitted ONLY when +# zones[i].provisional is truthy. (default-off path unchanged.) +# - byte-equivalence: non-provisional zones render the same div shape as +# pre-u5 (just no zone--provisional class / no badge element). + +import re +from pathlib import Path + +from jinja2 import Environment, FileSystemLoader, select_autoescape + + +# ─── u5 helpers ──────────────────────────────────────────────────────── + +def _render_slide_base(zones: list[dict], *, layout_preset: str = "single", + layout_css: dict | None = None) -> str: + """Render templates/phase_z2/slide_base.html directly via Jinja2 with a + minimal zones list. Bypasses render_slide() so u5 can exercise the + template-only contract without spinning up the full pipeline (no mapper, + no contracts, no token CSS loader). slot_payload / partial_html are + stubbed to fixed strings so the test focuses on zone div attributes.""" + template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2" + env = Environment( + loader=FileSystemLoader(str(template_dir)), + autoescape=select_autoescape(["html"]), + ) + if layout_css is None: + layout_css = { + "cols": "1fr", + "rows": "1fr", + "areas": '"single"', + } + # Each zone needs a partial_html (render_slide normally populates this). + # Use a stable placeholder per zone so the assertion can target zone-level + # attributes without coupling to frame template internals. + for z in zones: + z.setdefault("partial_html", "
stub
") + base = env.get_template("slide_base.html") + return base.render( + slide_title="IMP-30 u5 test slide", + slide_footer=None, + zones=zones, + layout_preset=layout_preset, + layout_css=layout_css, + gap_px=12, + token_css="", # empty token CSS — not under test here + embedded_mode="standalone", + ) + + +def _zone_div_for_position(html: str, position: str) -> str: + """Return the opening tag + immediate inner content (up to but not + including partial_html) for the zone div at a given `data-zone-position` + value. Tight enough for class/attribute assertions, lenient enough not + to depend on partial_html internals.""" + pattern = re.compile( + r'
]*>' + r'(?:\s*]*>[^<]*)?', + re.DOTALL, + ) + match = pattern.search(html) + if not match: + return "" + return match.group(0) + + +def _all_zone_div_openings(html: str) -> list[str]: + """Return every zone-div opening tag in the layout body. Used to scope + class / attribute assertions away from the CSS