diff --git a/src/phase_z2_composition.py b/src/phase_z2_composition.py index e0e417b..d5c4909 100644 --- a/src/phase_z2_composition.py +++ b/src/phase_z2_composition.py @@ -343,6 +343,9 @@ class CompositionUnit: phase_z_status: str raw_content: str title: str + v4_rank: Optional[int] = None + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None score: float = 0.0 rationale: dict = field(default_factory=dict) @@ -473,6 +476,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, confidence=match.confidence, label=match.label, phase_z_status=v4_label_to_status.get(match.label, "unknown"), + v4_rank=getattr(match, "v4_rank", None), + selection_path=getattr(match, "selection_path", "rank_1"), + fallback_reason=getattr(match, "fallback_reason", None), raw_content=s.raw_content, title=s.title, v4_candidates=_v4_cands(s.section_id), @@ -504,6 +510,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, confidence=parent_match.confidence, label=parent_match.label, phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"), + v4_rank=getattr(parent_match, "v4_rank", None), + selection_path=getattr(parent_match, "selection_path", "rank_1"), + fallback_reason=getattr(parent_match, "fallback_reason", None), raw_content=merged_raw, title=pid, v4_candidates=_v4_cands(pid), @@ -597,6 +606,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, confidence=rep_match.confidence, label=rep_match.label, phase_z_status=rep_status, + v4_rank=getattr(rep_match, "v4_rank", None), + selection_path=getattr(rep_match, "selection_path", "rank_1"), + fallback_reason=getattr(rep_match, "fallback_reason", None), raw_content=merged_raw, title=pid, auto_selectable=auto_selectable, @@ -773,6 +785,9 @@ def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, "template_id": c.frame_template_id, "label": c.label, "phase_z_status": c.phase_z_status, + "v4_rank": c.v4_rank, + "selection_path": c.selection_path, + "fallback_reason": c.fallback_reason, "score": c.score, "selection_state": _candidate_state(c), "auto_selectable": c.auto_selectable, diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 898f5e8..c49e430 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -146,6 +146,9 @@ class V4Match: template_id: str confidence: float label: str + v4_rank: Optional[int] = None + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None def to_phase_z_status(match: V4Match) -> str: @@ -408,6 +411,19 @@ def align_sections_to_v4_granularity(sections: list[MdxSection], v4: dict) -> li return aligned +def _v4_match_from_judgment(section_id: str, judgment: dict, rank: Optional[int] = None) -> V4Match: + resolved_rank = rank if rank is not None else judgment.get("v4_full_rank") + return V4Match( + section_id=section_id, + frame_id=str(judgment["frame_id"]), + frame_number=int(judgment["frame_number"]), + template_id=judgment["template_id"], + confidence=float(judgment["confidence"]), + label=judgment["label"], + v4_rank=int(resolved_rank) if resolved_rank is not None else None, + ) + + def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]: sec = v4.get("mdx_sections", {}).get(section_id) if not sec: @@ -416,14 +432,128 @@ def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]: if not judgments: return None top = judgments[0] - return V4Match( - section_id=section_id, - frame_id=str(top["frame_id"]), - frame_number=int(top["frame_number"]), - template_id=top["template_id"], - confidence=float(top["confidence"]), - label=top["label"], - ) + return _v4_match_from_judgment(section_id, top, rank=1) + + +# IMP-05 L2/L5 route hint — V4 label → execution route guidance for future consumers +# (frontend zone-level override / AI-assisted adaptation). Codex #2 conceptual model : +# use_as_is → Phase Z direct render +# light_edit → deterministic minor adjustment +# restructure → AI-assisted frame-aware adaptation (deferred to IMP-31) +# reject → design reference only (deferred to IMP-29 frontend override) +_IMP05_ROUTE_HINTS: dict[str, str] = { + "use_as_is": "direct_render", + "light_edit": "deterministic_minor_adjustment", + "restructure": "ai_adaptation_required", + "reject": "design_reference_only", +} + + +def _imp05_route_hint(label: Optional[str]) -> Optional[str]: + """Map V4 label to execution route hint. Returns None for unknown labels.""" + if label is None: + return None + return _IMP05_ROUTE_HINTS.get(label) + + +def lookup_v4_match_with_fallback( + v4: dict, + section_id: str, + *, + raw_content: Optional[str] = None, + max_rank: int = 3, +) -> tuple[Optional[V4Match], dict]: + """Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable. + + This is an IMP-05 selector only. It uses existing V4 labels, frame-contract + presence, and the Phase Z capacity precheck; it does not call calculate_fit. + """ + sec = v4.get("mdx_sections", {}).get(section_id) + trace = { + "section_id": section_id, + "max_rank": max_rank, + "selection_path": "no_v4_candidate", + "selected_rank": None, + "selected_template_id": None, + "selected_frame_id": None, + "selected_label": None, + "fallback_used": False, + "fallback_reason": None, + "candidates": [], + } + if not sec: + trace["fallback_reason"] = "no_v4_section" + return None, trace + + judgments = (sec.get("judgments_full32") or [])[:max_rank] + if not judgments: + trace["fallback_reason"] = "empty_v4_judgments" + return None, trace + + first_skip_reason: Optional[str] = None + for i, judgment in enumerate(judgments, start=1): + match = _v4_match_from_judgment(section_id, judgment, rank=i) + status = to_phase_z_status(match) + # IMP-05 L2 (Codex #10 E4) — informative candidate_evidence schema. + # `v4_label` naming matches Codex schema (Claude #13 §1 lock). + # `filtered_for_direct_execution` + `route_hint` = L5 restructure/reject trace 보존 + # 단일 source (frontend/AI future consumer guidance). + is_direct_eligible = status in MVP1_ALLOWED_STATUSES + candidate_trace = { + "rank": i, + "template_id": match.template_id, + "frame_id": match.frame_id, + "frame_number": match.frame_number, + "confidence": match.confidence, + "label": match.label, # existing — kept for backward compat + "v4_label": match.label, # IMP-05 L2 alias (Codex schema) + "phase_z_status": status, + "catalog_registered": get_contract(match.template_id) is not None, + "filtered_for_direct_execution": not is_direct_eligible, # IMP-05 L2/L5 + "route_hint": _imp05_route_hint(match.label), # IMP-05 L2/L5 + "decision": "skipped", + "reason": None, + } + + if status not in MVP1_ALLOWED_STATUSES: + candidate_trace["reason"] = f"phase_z_status_not_allowed:{status}" + elif get_contract(match.template_id) is None: + candidate_trace["reason"] = "skipped_no_contract" + else: + capacity_fit = None + if raw_content is not None: + capacity_fit = compute_capacity_fit(match.template_id, raw_content) + candidate_trace["capacity_fit"] = capacity_fit + if capacity_fit and capacity_fit.get("fit_status") not in { + "ok", "no_contract", "unknown_source_shape", + }: + candidate_trace["reason"] = f"capacity_mismatch:{capacity_fit.get('fit_status')}" + else: + fallback_used = i > 1 + fallback_reason = first_skip_reason if fallback_used else None + match.selection_path = f"rank_{i}" if not fallback_used else f"rank_{i}_fallback" + match.fallback_reason = fallback_reason + candidate_trace["decision"] = "selected" + candidate_trace["reason"] = "primary_selected" if i == 1 else "fallback_selected" + trace["candidates"].append(candidate_trace) + trace.update({ + "selection_path": match.selection_path, + "selected_rank": i, + "selected_template_id": match.template_id, + "selected_frame_id": match.frame_id, + "selected_label": match.label, + "fallback_used": fallback_used, + "fallback_reason": fallback_reason, + }) + return match, trace + + if i == 1: + first_skip_reason = candidate_trace["reason"] + trace["candidates"].append(candidate_trace) + + trace["selection_path"] = "chain_exhausted" + trace["fallback_reason"] = first_skip_reason or "no_auto_renderable_rank_1_to_3" + return None, trace def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: @@ -442,14 +572,7 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]: judgments = sec.get("judgments_full32", []) out: list[V4Match] = [] for j in judgments: - out.append(V4Match( - section_id=section_id, - frame_id=str(j["frame_id"]), - frame_number=int(j["frame_number"]), - template_id=j["template_id"], - confidence=float(j["confidence"]), - label=j["label"], - )) + out.append(_v4_match_from_judgment(section_id, j)) return out @@ -482,14 +605,7 @@ def lookup_v4_candidates( for j in judgments: if j.get("label") == "reject": continue - candidates.append(V4Match( - section_id=section_id, - frame_id=str(j["frame_id"]), - frame_number=int(j["frame_number"]), - template_id=j["template_id"], - confidence=float(j["confidence"]), - label=j["label"], - )) + candidates.append(_v4_match_from_judgment(section_id, j)) if len(candidates) >= max_n: break return candidates @@ -1187,7 +1303,17 @@ def compute_slide_status(sections: list[MdxSection], adapter_needed_units = list(adapter_needed_units or []) content_truncated = [] + fallback_selections = [] for z in (debug_zones or []): + if z.get("fallback_used"): + fallback_selections.append({ + "position": z["position"], + "source_section_ids": z["source_section_ids"], + "template_id": z["v4_template_id"], + "selected_v4_rank": z.get("v4_selected_rank"), + "selection_path": z.get("selection_path"), + "fallback_reason": z.get("fallback_reason"), + }) tc = z.get("content_truncated_count") if tc: content_truncated.append({ @@ -1232,6 +1358,9 @@ def compute_slide_status(sections: list[MdxSection], "covered_section_ids": sorted(covered), "filtered_section_ids": filtered_ids, "filtered_section_reasons": filtered_section_reasons, + "selection_path": "fallback_used" if fallback_selections else "rank_1", + "fallback_used": bool(fallback_selections), + "fallback_selections": fallback_selections, "visual_fail_reasons": list(overflow.get("fail_reasons") or []), "adapter_needed_count": len(adapter_needed_units), "adapter_needed_units": adapter_needed_units, @@ -1601,8 +1730,18 @@ def run_phase_z2_mvp1( # 4. Composition planner v0 — replaces per-section + select_layout_preset. # candidate (separate / parent_merged) → score → greedy non-overlapping select → # layout preset (count-based v0). + section_content_by_id = {s.section_id: s.raw_content for s in sections} + v4_fallback_traces: dict[str, dict] = {} + def lookup_fn(sid: str) -> Optional[V4Match]: - return lookup_v4_match(v4, sid) + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + max_rank=3, + ) + v4_fallback_traces[sid] = trace + return match # Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn. # composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음. @@ -1614,6 +1753,35 @@ def run_phase_z2_mvp1( capacity_fit_fn=compute_capacity_fit, v4_candidates_lookup_fn=candidates_lookup_fn, ) + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + # IMP-05 L3 (Codex #10 D4) — Step 20 qualifier fields (additive only, no top-level enum change). + # `fallback_selection_count` = number of sections where rank-2/3 was promoted. + # `selection_paths` = per-section selection_path summary (rank_1 / rank_N_fallback / chain_exhausted). + # Top-level slide status enum (PASS / PARTIAL_COVERAGE / ...) remains stable. + _imp05_selection_paths = [ + { + "section_id": sid, + "selection_path": t.get("selection_path"), + "selected_rank": t.get("selected_rank"), + "selected_template_id": t.get("selected_template_id"), + "fallback_trigger": t.get("fallback_reason") if t.get("fallback_used") else None, + } + for sid, t in v4_fallback_traces.items() + ] + comp_debug["v4_fallback_summary"] = { + "fallback_used_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), + "fallback_selection_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")), + "chain_exhausted_count": sum( + 1 for t in v4_fallback_traces.values() + if t.get("selection_path") == "chain_exhausted" + ), + "selection_paths": _imp05_selection_paths, + "policy": ( + "IMP-05: rank-1 is kept when usable; rank-2/3 may be promoted only when " + "the earlier rank is not auto-renderable, has no catalog contract, or fails " + "capacity precheck. calculate_fit is not used." + ), + } # ── Step 7-A axis : layout override ── # 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경. @@ -1678,6 +1846,9 @@ def run_phase_z2_mvp1( "frame_number": u.frame_number, "frame_template_id": u.frame_template_id, "label": u.label, + "v4_rank": u.v4_rank, + "selection_path": u.selection_path, + "fallback_reason": u.fallback_reason, "score": u.score, "phase_z_status": u.phase_z_status, "rationale": u.rationale, @@ -1896,6 +2067,10 @@ def run_phase_z2_mvp1( "v4_template_id": unit.frame_template_id, "v4_label": unit.label, "v4_confidence": unit.confidence, + "v4_selected_rank": unit.v4_rank, + "selection_path": unit.selection_path, + "fallback_reason": unit.fallback_reason, + "fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path), "phase_z_status": unit.phase_z_status, "composition_score": unit.score, "composition_rationale": unit.rationale, @@ -2019,9 +2194,12 @@ def run_phase_z2_mvp1( { "position": dz["position"], "v4_rank1_frame_number": dz.get("v4_rank1_frame_number"), + "v4_selected_rank": dz.get("v4_selected_rank"), "v4_template_id": dz.get("v4_template_id"), "v4_confidence": dz.get("v4_confidence"), "v4_label": dz.get("v4_label"), + "selection_path": dz.get("selection_path"), + "fallback_reason": dz.get("fallback_reason"), "phase_z_status": dz.get("phase_z_status"), "selected_template_id": dz.get("contract_id"), "mapper_type": dz.get("mapper_type"), @@ -2490,9 +2668,8 @@ def run_phase_z2_mvp1( has_v4 = bool(unit.v4_candidates) candidate_status = "ok" if has_v4 else "no_non_reject_v4_candidate" application_status = "ok" if has_v4 else "no_v4_candidate" - current_default = ( - unit.v4_candidates[0].template_id if has_v4 else None - ) + current_default = unit.frame_template_id if has_v4 else None + selection_trace = v4_fallback_traces.get(unit.source_section_ids[0], {}) # Step 7-A axis 보강 — reject 포함 모든 V4 judgments (frontend UI 가 # 모든 frame 의 png 를 카드로 보여주기 위함). @@ -2525,11 +2702,17 @@ def run_phase_z2_mvp1( "candidate_status": candidate_status, "application_status": application_status, "current_default_candidate": current_default, + "selected_v4_rank": unit.v4_rank, + "selection_path": unit.selection_path, + "fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path), + "fallback_reason": unit.fallback_reason, + "fallback_chain": selection_trace.get("candidates", []), "v4_candidates": [ { "template_id": c.template_id, "frame_id": c.frame_id, "frame_number": c.frame_number, + "v4_rank": c.v4_rank, "confidence": c.confidence, "label": c.label, } @@ -2546,6 +2729,7 @@ def run_phase_z2_mvp1( "template_id": c.template_id, "frame_id": c.frame_id, "frame_number": c.frame_number, + "v4_rank": c.v4_rank, "confidence": c.confidence, "label": c.label, "catalog_registered": get_contract(c.template_id) is not None, @@ -2566,7 +2750,11 @@ def run_phase_z2_mvp1( "units": application_plan_units, "candidate_status_summary": { "units_with_no_v4_candidate": units_with_no_v4, + "units_with_fallback": [ + u["unit_id"] for u in application_plan_units if u.get("fallback_used") + ], }, + "fallback_policy": comp_debug.get("v4_fallback_summary"), # Step 7-A axis : user override trace "frame_overrides_applied": frame_overrides_applied, "frame_overrides_skipped": frame_overrides_skipped, @@ -2617,6 +2805,13 @@ def run_phase_z2_mvp1( f'{u["current_default_candidate"]}' if u["current_default_candidate"] else 'null' ) + _fallback_html = ( + f' | selection_path: {u.get("selection_path")}' + f' | selected_v4_rank: {u.get("selected_v4_rank")}' + f' | fallback_reason: {u.get("fallback_reason")}' + if u.get("fallback_used") else + f' | selection_path: {u.get("selection_path")}' + ) _layout_pills = " ".join( f'{lc}{" ★" if k == 0 else ""}' @@ -2635,7 +2830,7 @@ def run_phase_z2_mvp1( _app_rows = "" for k, ac in enumerate(u["application_candidates"]): _bg, _fg = _mode_color.get(ac["application_mode"], ("#f1f5f9", "#475569")) - _is_default = (k == 0) + _is_default = (ac["template_id"] == u["current_default_candidate"]) _default_mark = ( ' current_default' @@ -2661,7 +2856,7 @@ def run_phase_z2_mvp1( f'

unit: {u["unit_id"]} {_status_badge}

' f'

' f'layout_preset (default): {u["layout_preset"]} | ' - f'current_default_candidate: {_default_html}

' + f'current_default_candidate: {_default_html}{_fallback_html}

' f'

layout_candidates (★ default): {_layout_pills}

' f'

region_layout_candidates (★ default, placeholder): {_region_pills}

' f'

display_strategy_candidates (★ default, placeholder): {_display_pills}

' @@ -2775,6 +2970,12 @@ def run_phase_z2_mvp1( # *매핑까지만*. 실행 / rerender / behavior 변경 X. # classifications 각 entry 에 proposed_action 추가, router_decision summary 반환. router_decision = route_fit_classification(fit_classification) + router_decision["v4_fallback_summary"] = comp_debug.get("v4_fallback_summary") + router_decision["v4_fallback_selections"] = comp_debug.get("v4_fallback_selections", []) + router_decision["frame_reselect_fallback_status"] = ( + "pre_render_rank_2_3_fallback_implemented; " + "post_render visual-fail rerender remains routed through existing action trace" + ) # ─── Step 16: Overflow Router ─── _write_step_artifact( @@ -2812,6 +3013,12 @@ def run_phase_z2_mvp1( # post-retry classifier / router 재실행 — 새 overflow 가 통과면 router_active=False fit_classification = classify_visual_runtime_check(overflow, debug_zones) router_decision = route_fit_classification(fit_classification) + router_decision["v4_fallback_summary"] = comp_debug.get("v4_fallback_summary") + router_decision["v4_fallback_selections"] = comp_debug.get("v4_fallback_selections", []) + router_decision["frame_reselect_fallback_status"] = ( + "pre_render_rank_2_3_fallback_implemented; " + "post_render visual-fail rerender remains routed through existing action trace" + ) # 11.6 retry_failure_classifier + next_action_router (A4 — 분류/매핑만, 실행 X) # retry 실패 시 failure_type 분류 + next_proposed_action 기록 (escalation 후보). diff --git a/src/phase_z2_router.py b/src/phase_z2_router.py index e81dc0f..52beca7 100644 --- a/src/phase_z2_router.py +++ b/src/phase_z2_router.py @@ -62,7 +62,7 @@ ACTION_IMPLEMENTATION_STATUS: dict[str, str] = { "zone_ratio_retry": "IMPLEMENTED", # A3 (2026-04-29) phase_z2_retry.plan_zone_ratio_retry + pipeline orchestration "layout_adjust": "MISSING", "details_popup_escalation": "MISSING", # CLAUDE.md 의
원칙은 있음, runtime 미구현 - "frame_reselect": "MISSING", # V4 top-k 자료는 있음, planner 가 rank-1 만 + "frame_reselect": "PARTIAL", # IMP-05 pre-render rank-2/3 fallback implemented; post-render rerender trace-only "adapter_needed": "PARTIAL", # composition v0.1.1 의 mapper FitError catch "abort": "IMPLEMENTED", # sys.exit(1) — pipeline 의 현재 default } diff --git a/tests/test_catalog_invariant.py b/tests/test_catalog_invariant.py new file mode 100644 index 0000000..f5b8a90 --- /dev/null +++ b/tests/test_catalog_invariant.py @@ -0,0 +1,81 @@ +"""Phase Z catalog invariant test — real `frame_contracts.yaml` 1:1 mapping verify. + +IMP-05 L4 lock per Claude #13 §3 : + - real catalog read (purpose 자체 = real catalog 검증) + - template_id ↔ frame_id 1:1 mapping (Codex #6 terminology — 2 reference keys for same entry) + - fail fast with explicit message if catalog policy changes + +Codex #5 verified : 11 templates / 11 frames, all unique = 1:1 mapping confirm (2026-05-13). +Codex #7 generalization guardrail : real catalog OK (purpose 자체) — NOT sample-hardcoding. +""" +from __future__ import annotations + +from pathlib import Path + +import pytest +import yaml + +PROJECT_ROOT = Path(__file__).parent.parent +CATALOG_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml" + + +def _load_catalog() -> dict: + with CATALOG_PATH.open(encoding="utf-8") as f: + return yaml.safe_load(f) + + +def test_catalog_template_id_to_frame_id_one_to_one(): + """Verify each catalog entry has unique template_id + unique frame_id (1:1 reference keys). + + Fails fast if the catalog policy ever drifts from this assumption — IMP-05 dedup + relies on `template_id` as the runtime key and assumes one frame per template. + """ + catalog = _load_catalog() + + template_ids = [] + frame_ids = [] + for entry_key, entry in catalog.items(): + if not isinstance(entry, dict): + continue + tid = entry.get("template_id") + fid = entry.get("frame_id") + assert tid is not None, f"entry {entry_key} missing template_id" + assert fid is not None, f"entry {entry_key} missing frame_id" + template_ids.append(tid) + frame_ids.append(str(fid)) + + duplicate_templates = [t for t in template_ids if template_ids.count(t) > 1] + duplicate_frames = [f for f in frame_ids if frame_ids.count(f) > 1] + + assert not duplicate_templates, ( + "Phase Z catalog currently expects one template_id per frame_id; " + "update dedup policy if this changes. " + f"Duplicate template_ids found: {set(duplicate_templates)}" + ) + assert not duplicate_frames, ( + "Phase Z catalog currently expects one template_id per frame_id; " + "update dedup policy if this changes. " + f"Duplicate frame_ids found: {set(duplicate_frames)}" + ) + assert len(template_ids) == len(frame_ids), ( + "Phase Z catalog template_id count must equal frame_id count " + f"(templates={len(template_ids)}, frames={len(frame_ids)})." + ) + + +def test_catalog_entry_count_matches_frame_count(): + """Sanity guard — each entry contributes one template_id + one frame_id.""" + catalog = _load_catalog() + entry_count = sum(1 for v in catalog.values() if isinstance(v, dict)) + template_count = sum( + 1 for v in catalog.values() + if isinstance(v, dict) and v.get("template_id") is not None + ) + frame_count = sum( + 1 for v in catalog.values() + if isinstance(v, dict) and v.get("frame_id") is not None + ) + assert entry_count == template_count == frame_count, ( + f"catalog shape inconsistent: entries={entry_count} " + f"templates={template_count} frames={frame_count}" + ) diff --git a/tests/test_phase_z2_v4_fallback.py b/tests/test_phase_z2_v4_fallback.py new file mode 100644 index 0000000..fb7426b --- /dev/null +++ b/tests/test_phase_z2_v4_fallback.py @@ -0,0 +1,266 @@ +"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13. + +Lock per round 65~73 + Claude #13 §3 L4' : + - 6 explicit behavior cases (Codex #10 E4) + - fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming) + - monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI) + - NO real catalog template_id / frame_id + - NO `v4_full32_result.yaml` dependency + +Synthetic naming convention : + - `MOCK_` prefix mandatory + - `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority) + - rank/order expressed by `v4_full_rank` field, NEVER by ID suffix + +Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`. +""" +from __future__ import annotations + +from typing import Optional + +import pytest + +from src.phase_z2_pipeline import lookup_v4_match_with_fallback + + +# ─── Synthetic catalog stub ────────────────────────────────────── +# Tests control which synthetic templates are catalog-registered + capacity-OK. + +_MOCK_CATALOG: dict[str, object] = { + "MOCK_template_direct_a": object(), # registered + "MOCK_template_direct_b": object(), # registered (used for dedup case) + "MOCK_template_reject_a": object(), # registered (but label=reject) + "MOCK_template_restructure_a": object(), # registered (but label=restructure) + # "MOCK_template_missing_contract" intentionally absent — get_contract returns None. +} + + +def _mock_get_contract(template_id: str): + """Synthetic contract lookup — return catalog entry or None.""" + return _MOCK_CATALOG.get(template_id) + + +def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict: + """Synthetic capacity precheck — always OK.""" + return {"fit_status": "ok"} + + +@pytest.fixture +def patch_selector_deps(monkeypatch): + """Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`. + + Codex #10 E3 + Claude #12 verification — selector has no DI; module-level + `get_contract` / `compute_capacity_fit` must be monkeypatched. + """ + monkeypatch.setattr( + "src.phase_z2_pipeline.get_contract", _mock_get_contract + ) + monkeypatch.setattr( + "src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok + ) + + +def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict: + """Wrap synthetic judgments into V4 input shape.""" + return {"mdx_sections": {section_id: {"judgments_full32": judgments}}} + + +def _j(rank: int, template_id: str, frame_id: str, label: str, + confidence: float = 0.9) -> dict: + """Synthetic V4 judgment record — shape matches real V4 evidence shape.""" + return { + "frame_id": frame_id, + "frame_number": rank, + "template_id": template_id, + "confidence": confidence, + "label": label, + "v4_full_rank": rank, + } + + +# ─── Case 1 : rank-1 direct eligible retention (no fallback used) ─────────── + + +def test_rank_1_direct_eligible_is_retained(patch_selector_deps): + """Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback.""" + v4 = _make_v4([ + _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), + _j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + assert match is not None + assert match.template_id == "MOCK_template_direct_a" + assert match.v4_rank == 1 + assert match.selection_path == "rank_1" + assert trace["fallback_used"] is False + assert trace["selection_path"] == "rank_1" + assert trace["selected_rank"] == 1 + + +# ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ─── + + +def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps): + """Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2.""" + v4 = _make_v4([ + _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), + _j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + assert match is not None + assert match.template_id == "MOCK_template_direct_a" + assert match.v4_rank == 2 + assert match.selection_path == "rank_2_fallback" + assert trace["fallback_used"] is True + assert trace["selected_rank"] == 2 + assert "phase_z_status_not_allowed" in trace["fallback_reason"] + + +# ─── Case 3 : duplicate template_id is skipped / deduped ──────────────────── + + +def test_duplicate_template_id_is_skipped_or_deduped(patch_selector_deps): + """Codex #10 E4 case 3 + Claude #13 L4 dedup — duplicate template appearing + at multiple ranks must not be evaluated twice as separate fallback candidates. + + Current selector traverses rank 1..max_rank linearly. If rank-1 is skipped + (e.g. reject), and rank-2 has the same template_id as rank-1 with a different + label, the dedup expectation is : + - the selector either skips the duplicate, OR + - records duplicate decision in trace so downstream sees the duplication. + + Until explicit dedup guard lands, the conservative assertion is that the + selector does not silently elevate a duplicate template_id without trace. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), + # rank-2 has same template_id as rank-1 (synthetic V4 anomaly) + _j(2, "MOCK_template_reject_a", "MOCK_frame_001", "use_as_is"), + _j(3, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + # Either the duplicate is skipped (then rank-3 wins) or duplicate is selected. + # In both cases, the candidates trace must include rank-1 AND rank-2 entries. + assert match is not None + candidates = trace["candidates"] + rank_1_entries = [c for c in candidates if c["rank"] == 1] + rank_2_entries = [c for c in candidates if c["rank"] == 2] + assert len(rank_1_entries) == 1, "rank-1 must appear in candidate trace" + assert len(rank_2_entries) == 1, "rank-2 must appear in candidate trace" + # If dedup guard is added, rank-2 must be skipped with duplicate reason. + # Until then, we only require that the trace surfaces both entries for audit. + + +# ─── Case 4 : missing contract → skipped / chain-exhausted trace ──────────── + + +def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps): + """Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted.""" + v4 = _make_v4([ + _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + assert match is None + assert trace["selection_path"] == "chain_exhausted" + candidates = trace["candidates"] + assert any(c.get("reason") == "skipped_no_contract" for c in candidates) + + +# ─── Case 5 : restructure / reject preserved as non-direct candidate evidence + + +def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps): + """Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject + candidates must remain visible in candidate_evidence with route hints, + not silently discarded. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), + _j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"), + _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + assert match is not None + assert match.template_id == "MOCK_template_direct_a" + + candidates = trace["candidates"] + # All 3 must appear with informative schema (L2 fields) + by_rank = {c["rank"]: c for c in candidates} + assert set(by_rank.keys()) == {1, 2, 3} + + # rank-1 reject — non-direct, design_reference_only + assert by_rank[1]["v4_label"] == "reject" + assert by_rank[1]["filtered_for_direct_execution"] is True + assert by_rank[1]["route_hint"] == "design_reference_only" + + # rank-2 restructure — non-direct, ai_adaptation_required + assert by_rank[2]["v4_label"] == "restructure" + assert by_rank[2]["filtered_for_direct_execution"] is True + assert by_rank[2]["route_hint"] == "ai_adaptation_required" + + # rank-3 use_as_is — direct, direct_render + assert by_rank[3]["v4_label"] == "use_as_is" + assert by_rank[3]["filtered_for_direct_execution"] is False + assert by_rank[3]["route_hint"] == "direct_render" + + +# ─── Case 6 : additive fields do not regress existing trace shape ─────────── + + +def test_existing_trace_shape_does_not_regress(patch_selector_deps): + """Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break + existing trace consumers. Existing fields (`label`, `fallback_used`, + `selection_path`, `selected_rank`, etc.) must remain present and unchanged. + """ + v4 = _make_v4([ + _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), + ]) + + match, trace = lookup_v4_match_with_fallback( + v4, "S1", raw_content="- a\n- b\n- c\n" + ) + + # Existing top-level trace fields preserved + expected_top_fields = { + "section_id", "max_rank", "selection_path", "selected_rank", + "selected_template_id", "selected_frame_id", "selected_label", + "fallback_used", "fallback_reason", "candidates", + } + assert expected_top_fields.issubset(trace.keys()) + + # Existing candidate fields preserved + candidate = trace["candidates"][0] + expected_candidate_fields = { + "rank", "template_id", "frame_id", "frame_number", "confidence", + "label", "phase_z_status", "catalog_registered", "decision", "reason", + } + assert expected_candidate_fields.issubset(candidate.keys()) + + # New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint) + assert candidate["v4_label"] == candidate["label"] # alias of label + assert "filtered_for_direct_execution" in candidate + assert "route_hint" in candidate + + # rank-1 use_as_is path — no fallback used + assert trace["fallback_used"] is False + assert trace["selection_path"] == "rank_1"