"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13. Lock per round 65~73 + Claude #13 §3 L4' : - 6 explicit behavior cases (Codex #10 E4) - fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming) - monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI) - NO real catalog template_id / frame_id - NO `v4_full32_result.yaml` dependency Synthetic naming convention : - `MOCK_` prefix mandatory - `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority) - rank/order expressed by `v4_full_rank` field, NEVER by ID suffix Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`. """ from __future__ import annotations from typing import Optional import pytest import inspect from src import phase_z2_pipeline from src.phase_z2_pipeline import lookup_v4_match_with_fallback # ─── Synthetic catalog stub ────────────────────────────────────── # Tests control which synthetic templates are catalog-registered + capacity-OK. _MOCK_CATALOG: dict[str, object] = { "MOCK_template_direct_a": object(), # registered "MOCK_template_direct_b": object(), # registered (used for dedup case) "MOCK_template_reject_a": object(), # registered (but label=reject) "MOCK_template_restructure_a": object(), # registered (but label=restructure) # "MOCK_template_missing_contract" intentionally absent — get_contract returns None. } def _mock_get_contract(template_id: str): """Synthetic contract lookup — return catalog entry or None.""" return _MOCK_CATALOG.get(template_id) def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict: """Synthetic capacity precheck — always OK.""" return {"fit_status": "ok"} @pytest.fixture def patch_selector_deps(monkeypatch): """Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`. Codex #10 E3 + Claude #12 verification — selector has no DI; module-level `get_contract` / `compute_capacity_fit` must be monkeypatched. """ monkeypatch.setattr( "src.phase_z2_pipeline.get_contract", _mock_get_contract ) monkeypatch.setattr( "src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok ) def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict: """Wrap synthetic judgments into V4 input shape.""" return {"mdx_sections": {section_id: {"judgments_full32": judgments}}} def _j(rank: int, template_id: str, frame_id: str, label: str, confidence: float = 0.9) -> dict: """Synthetic V4 judgment record — shape matches real V4 evidence shape.""" return { "frame_id": frame_id, "frame_number": rank, "template_id": template_id, "confidence": confidence, "label": label, "v4_full_rank": rank, } # ─── Case 1 : rank-1 direct eligible retention (no fallback used) ─────────── def test_rank_1_direct_eligible_is_retained(patch_selector_deps): """Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback.""" v4 = _make_v4([ _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), _j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" assert match.v4_rank == 1 assert match.selection_path == "rank_1" assert trace["fallback_used"] is False assert trace["selection_path"] == "rank_1" assert trace["selected_rank"] == 1 # ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ─── def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps): """Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2.""" v4 = _make_v4([ _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), _j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" assert match.v4_rank == 2 assert match.selection_path == "rank_2_fallback" assert trace["fallback_used"] is True assert trace["selected_rank"] == 2 assert "phase_z_status_not_allowed" in trace["fallback_reason"] # ─── Case 3 : duplicate template_id is skipped / deduped ──────────────────── def test_duplicate_template_id_is_skipped_rank_3_wins(patch_selector_deps): """Codex #14 dedup precision lock — first occurrence reserves template_id for the chain regardless of decision. Later rank with same template_id MUST be skipped as duplicate, regardless of its V4 label. Fixture simulates V4 anomaly : rank-1 + rank-2 share same template_id (and same frame_id per Codex #6 1:1 catalog terminology — real catalog 정합). rank-1 label = reject (non-direct, first occurrence), rank-2 label = use_as_is (would be executable but MUST be skipped as duplicate per Codex #14 intended rule). rank-3 = distinct executable template, wins. Per Codex #14 example : rank 1: A reject → skipped (non-direct), template A claimed rank 2: A use_as_is → skipped as duplicate_template_id (must NOT win) rank 3: B use_as_is → selected (distinct template, eligible) """ v4 = _make_v4([ # rank-1 : non-direct (reject), reserves template_id for chain _j(1, "MOCK_template_dup_a", "MOCK_frame_dup_001", "reject"), # rank-2 : same template_id + same frame_id (1:1 catalog), would be # executable but MUST be skipped as duplicate (Codex #14 intended rule) _j(2, "MOCK_template_dup_a", "MOCK_frame_dup_001", "use_as_is"), # rank-3 : distinct executable template, wins _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) # rank-3 must be selected (distinct executable, after rank-1+2 duplicates) assert match is not None assert match.template_id == "MOCK_template_direct_a" assert match.v4_rank == 3 assert match.selection_path == "rank_3_fallback" assert trace["fallback_used"] is True assert trace["selected_rank"] == 3 # Trace must preserve all 3 candidate entries with precise reasons candidates = trace["candidates"] by_rank = {c["rank"]: c for c in candidates} assert set(by_rank.keys()) == {1, 2, 3} # rank-1 : non-direct first occurrence (status_not_allowed reason preserved) assert by_rank[1]["decision"] == "skipped" assert by_rank[1]["reason"] == "phase_z_status_not_allowed:fallback_candidate" assert by_rank[1]["template_id"] == "MOCK_template_dup_a" assert by_rank[1]["v4_label"] == "reject" # rank-2 : duplicate of rank-1 template (MUST be skipped as duplicate, NOT selected) assert by_rank[2]["decision"] == "skipped" assert by_rank[2]["reason"] == "duplicate_template_id" assert by_rank[2]["template_id"] == "MOCK_template_dup_a" # audit fields preserved even though duplicate assert by_rank[2]["v4_label"] == "use_as_is" assert by_rank[2]["frame_id"] == "MOCK_frame_dup_001" # rank-3 : distinct executable, selected assert by_rank[3]["decision"] == "selected" assert by_rank[3]["template_id"] == "MOCK_template_direct_a" # ─── Case 4 : missing contract → skipped / chain-exhausted trace ──────────── def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps): """Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted.""" v4 = _make_v4([ _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is None assert trace["selection_path"] == "chain_exhausted" candidates = trace["candidates"] assert any(c.get("reason") == "skipped_no_contract" for c in candidates) # ─── Case 5 : restructure / reject preserved as non-direct candidate evidence def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps): """Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject candidates must remain visible in candidate_evidence with route hints, not silently discarded. """ v4 = _make_v4([ _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), _j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"), _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" candidates = trace["candidates"] # All 3 must appear with informative schema (L2 fields) by_rank = {c["rank"]: c for c in candidates} assert set(by_rank.keys()) == {1, 2, 3} # rank-1 reject — non-direct, ai_adaptation_required (IMP-47B u1 policy correction) assert by_rank[1]["v4_label"] == "reject" assert by_rank[1]["filtered_for_direct_execution"] is True assert by_rank[1]["route_hint"] == "ai_adaptation_required" # rank-2 restructure — non-direct, ai_adaptation_required assert by_rank[2]["v4_label"] == "restructure" assert by_rank[2]["filtered_for_direct_execution"] is True assert by_rank[2]["route_hint"] == "ai_adaptation_required" # rank-3 use_as_is — direct, direct_render assert by_rank[3]["v4_label"] == "use_as_is" assert by_rank[3]["filtered_for_direct_execution"] is False assert by_rank[3]["route_hint"] == "direct_render" # ─── Case 6 : additive fields do not regress existing trace shape ─────────── def test_existing_trace_shape_does_not_regress(patch_selector_deps): """Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break existing trace consumers. Existing fields (`label`, `fallback_used`, `selection_path`, `selected_rank`, etc.) must remain present and unchanged. """ v4 = _make_v4([ _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) # Existing top-level trace fields preserved expected_top_fields = { "section_id", "max_rank", "selection_path", "selected_rank", "selected_template_id", "selected_frame_id", "selected_label", "fallback_used", "fallback_reason", "candidates", } assert expected_top_fields.issubset(trace.keys()) # Existing candidate fields preserved candidate = trace["candidates"][0] expected_candidate_fields = { "rank", "template_id", "frame_id", "frame_number", "confidence", "label", "phase_z_status", "catalog_registered", "decision", "reason", } assert expected_candidate_fields.issubset(candidate.keys()) # New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint) assert candidate["v4_label"] == candidate["label"] # alias of label assert "filtered_for_direct_execution" in candidate assert "route_hint" in candidate # rank-1 use_as_is path — no fallback used assert trace["fallback_used"] is False assert trace["selection_path"] == "rank_1" # ─── Case 7 : Step 9 helper-call shape test (IMP-32 u5 — replaces source guard) ─── def test_build_application_plan_unit_emits_candidate_evidence_and_alias(): """IMP-32 u5 — direct helper-call shape test for Step 9 evidence fields. Replaces the IMP-05 Case 7 `inspect.getsource(phase_z2_pipeline)` literal guard (introduced at commit `23d1b25` while Step 9 unit assembly was inline) with a direct call to `_build_application_plan_unit`, the helper extracted in IMP-32 u3. Verification axes preserved: - candidate_evidence list identity sourced from `selection_trace["candidates"]` - fallback_chain compat-alias identity (same list object as candidate_evidence) - key order: candidate_evidence before fallback_chain - compat-alias comment preserved on the helper's fallback_chain line """ from types import SimpleNamespace from src.phase_z2_pipeline import _build_application_plan_unit candidates_list = [ {"rank": 1, "template_id": "MOCK_template_direct_a", "label": "use_as_is"}, ] selection_trace = {"candidates": candidates_list} # Synthetic CompositionUnit-shape duck-typed input — matches V4Match attrs # used inside the helper (template_id / frame_id / frame_number / v4_rank / # confidence / label per src/phase_z2_pipeline.py V4Match dataclass). v4_candidate = SimpleNamespace( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, v4_rank=1, confidence=0.9, label="use_as_is", ) unit = SimpleNamespace( source_section_ids=["S1"], v4_candidates=[v4_candidate], v4_rank=1, selection_path="rank_1", fallback_reason=None, frame_template_id="MOCK_template_direct_a", ) result = _build_application_plan_unit( unit=unit, zone_plan={}, selection_trace=selection_trace, plan_record=None, v4_all_for_unit=[], layout_preset="Type A", layout_candidates_list=[], ) # IMP-05 L2 — candidate_evidence is the primary field, identity-bound to # selection_trace["candidates"] (not a copy). assert "candidate_evidence" in result assert result["candidate_evidence"] is candidates_list # compat alias — fallback_chain references the SAME list object as # candidate_evidence (verified by `is` identity, not equality). assert "fallback_chain" in result assert result["fallback_chain"] is candidates_list # key order — candidate_evidence MUST precede fallback_chain in the # returned dict to preserve documented L2 ordering. keys = list(result.keys()) assert keys.index("candidate_evidence") < keys.index("fallback_chain") # compat-alias comment preserved on the helper's fallback_chain line. helper_source = inspect.getsource(_build_application_plan_unit) assert "compat alias; prefer candidate_evidence" in helper_source # ─── Case 8 : Step 20 slide-status qualifier fields presence + defensive default def test_step20_slide_status_qualifier_fields_present_with_defensive_defaults(): """Codex #10 D4 + Codex #17 idea F + Claude #21 idea J — Step 20 slide-status must expose `fallback_selection_count` and `selection_paths[]` derived from comp_debug["v4_fallback_summary"] with defensive defaults (0, []) when the summary is missing or empty. Top-level `overall` enum must remain stable. """ from src.phase_z2_pipeline import compute_slide_status from src.phase_z2_pipeline import MdxSection # Case A — comp_debug with populated v4_fallback_summary sections_empty: list[MdxSection] = [] units_empty: list = [] overflow_pass = {"passed": True, "fail_reasons": []} comp_debug_with = { "v4_fallback_summary": { "fallback_used_count": 1, "fallback_selection_count": 1, "selection_paths": [ {"section_id": "S1", "selection_path": "rank_2_fallback", "selected_rank": 2, "selected_template_id": "MOCK_T", "fallback_trigger": "phase_z_status_not_allowed:fallback_candidate"}, ], }, "candidates_summary": [], } status_a = compute_slide_status( sections_empty, units_empty, comp_debug_with, overflow_pass, adapter_needed_units=None, debug_zones=None, ) # Step 20 qualifier fields present near existing fallback fields (Codex F ordering) assert "fallback_selection_count" in status_a assert "selection_paths" in status_a assert status_a["fallback_selection_count"] == 1 assert len(status_a["selection_paths"]) == 1 assert status_a["selection_paths"][0]["section_id"] == "S1" # Existing fields preserved (no regression) assert "fallback_used" in status_a assert "fallback_selections" in status_a assert "overall" in status_a # Case B — comp_debug missing v4_fallback_summary (defensive defaults) comp_debug_empty = {"candidates_summary": []} status_b = compute_slide_status( sections_empty, units_empty, comp_debug_empty, overflow_pass, adapter_needed_units=None, debug_zones=None, ) # Defensive defaults — 0 + [] when summary missing assert status_b["fallback_selection_count"] == 0 assert status_b["selection_paths"] == [] # Top-level overall enum still stable assert "overall" in status_b # Case C — comp_debug with empty v4_fallback_summary dict comp_debug_empty_summary = {"v4_fallback_summary": {}, "candidates_summary": []} status_c = compute_slide_status( sections_empty, units_empty, comp_debug_empty_summary, overflow_pass, adapter_needed_units=None, debug_zones=None, ) # Defensive defaults — 0 + [] when summary present but empty assert status_c["fallback_selection_count"] == 0 assert status_c["selection_paths"] == [] # ─── Case 9 : IMP-30 u1 — opt-in provisional synthesis on chain_exhausted ─── def test_allow_provisional_default_off_preserves_imp05_behavior(patch_selector_deps): """IMP-30 u1 — default ``allow_provisional=False`` keeps chain_exhausted returning ``(None, trace)`` exactly as IMP-05 specified. Regression guard for IMP-05 close commit 23d1b25. """ v4 = _make_v4([ _j(1, "MOCK_template_restructure_a", "MOCK_frame_001", "restructure"), _j(2, "MOCK_template_reject_a", "MOCK_frame_002", "reject"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is None assert trace["selection_path"] == "chain_exhausted" assert trace.get("provisional") is None assert trace["selected_rank"] is None assert trace["selected_template_id"] is None def test_allow_provisional_synthesizes_rank_1_on_chain_exhausted(patch_selector_deps): """IMP-30 u1 — opt-in ``allow_provisional=True`` synthesizes a provisional rank-1 match when the rank-1..3 chain is exhausted (all restructure/reject). Downstream first-render invariant uses this to render a "needs adaptation" zone instead of aborting. """ v4 = _make_v4([ _j(1, "MOCK_template_restructure_a", "MOCK_frame_001", "restructure"), _j(2, "MOCK_template_reject_a", "MOCK_frame_002", "reject"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n", allow_provisional=True, ) # Provisional rank-1 synthesized from the rank-1 judgment assert match is not None assert match.provisional is True assert match.template_id == "MOCK_template_restructure_a" assert match.frame_id == "MOCK_frame_001" assert match.label == "restructure" assert match.v4_rank == 1 assert match.selection_path == "provisional_rank_1" # fallback_reason mirrors the chain-exhaust reason assert match.fallback_reason is not None assert "phase_z_status_not_allowed" in match.fallback_reason # Top-level trace mirrors reflect provisional selection assert trace["selection_path"] == "provisional_rank_1" assert trace["selected_rank"] == 1 assert trace["selected_template_id"] == "MOCK_template_restructure_a" assert trace["selected_frame_id"] == "MOCK_frame_001" assert trace["selected_label"] == "restructure" assert trace["fallback_used"] is True assert trace["provisional"] is True # Original candidate skip reasons are preserved (not rewritten by synthesis) by_rank = {c["rank"]: c for c in trace["candidates"]} assert by_rank[1]["decision"] == "skipped" assert by_rank[1]["reason"] == "phase_z_status_not_allowed:extract_matched_zone" assert by_rank[2]["decision"] == "skipped" assert by_rank[2]["reason"] == "phase_z_status_not_allowed:fallback_candidate" def test_allow_provisional_no_op_when_normal_selection_succeeds(patch_selector_deps): """IMP-30 u1 — ``allow_provisional=True`` is a no-op when normal selection succeeds. The rank-1 (or rank-N fallback) result MUST be non-provisional. """ v4 = _make_v4([ _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n", allow_provisional=True, ) assert match is not None assert match.provisional is False assert match.selection_path == "rank_1" assert trace["selection_path"] == "rank_1" assert trace.get("provisional") is None def test_allow_provisional_no_op_when_no_v4_section(patch_selector_deps): """IMP-30 u1 — when no V4 section is resolved (no rank-1 judgment to synthesize from), ``allow_provisional=True`` MUST still return ``(None, trace)``. u3/u4 handle this case with a placeholder zone or empty-shell terminal slide. """ v4 = {"mdx_sections": {}} # no section at all match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n", allow_provisional=True, ) assert match is None assert trace["fallback_reason"] == "no_v4_section" def test_allow_provisional_no_op_when_empty_judgments(patch_selector_deps): """IMP-30 u1 — when the V4 section exists but ``judgments_full32`` is empty, ``allow_provisional=True`` MUST still return ``(None, trace)``. No synthetic rank-1 can be fabricated from nothing. """ v4 = {"mdx_sections": {"S1": {"judgments_full32": []}}} match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n", allow_provisional=True, ) assert match is None assert trace["fallback_reason"] == "empty_v4_judgments"