"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13. Lock per round 65~73 + Claude #13 §3 L4' : - 6 explicit behavior cases (Codex #10 E4) - fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming) - monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI) - NO real catalog template_id / frame_id - NO `v4_full32_result.yaml` dependency Synthetic naming convention : - `MOCK_` prefix mandatory - `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority) - rank/order expressed by `v4_full_rank` field, NEVER by ID suffix Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`. """ from __future__ import annotations from typing import Optional import pytest from src.phase_z2_pipeline import lookup_v4_match_with_fallback # ─── Synthetic catalog stub ────────────────────────────────────── # Tests control which synthetic templates are catalog-registered + capacity-OK. _MOCK_CATALOG: dict[str, object] = { "MOCK_template_direct_a": object(), # registered "MOCK_template_direct_b": object(), # registered (used for dedup case) "MOCK_template_reject_a": object(), # registered (but label=reject) "MOCK_template_restructure_a": object(), # registered (but label=restructure) # "MOCK_template_missing_contract" intentionally absent — get_contract returns None. } def _mock_get_contract(template_id: str): """Synthetic contract lookup — return catalog entry or None.""" return _MOCK_CATALOG.get(template_id) def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict: """Synthetic capacity precheck — always OK.""" return {"fit_status": "ok"} @pytest.fixture def patch_selector_deps(monkeypatch): """Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`. Codex #10 E3 + Claude #12 verification — selector has no DI; module-level `get_contract` / `compute_capacity_fit` must be monkeypatched. """ monkeypatch.setattr( "src.phase_z2_pipeline.get_contract", _mock_get_contract ) monkeypatch.setattr( "src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok ) def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict: """Wrap synthetic judgments into V4 input shape.""" return {"mdx_sections": {section_id: {"judgments_full32": judgments}}} def _j(rank: int, template_id: str, frame_id: str, label: str, confidence: float = 0.9) -> dict: """Synthetic V4 judgment record — shape matches real V4 evidence shape.""" return { "frame_id": frame_id, "frame_number": rank, "template_id": template_id, "confidence": confidence, "label": label, "v4_full_rank": rank, } # ─── Case 1 : rank-1 direct eligible retention (no fallback used) ─────────── def test_rank_1_direct_eligible_is_retained(patch_selector_deps): """Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback.""" v4 = _make_v4([ _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), _j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" assert match.v4_rank == 1 assert match.selection_path == "rank_1" assert trace["fallback_used"] is False assert trace["selection_path"] == "rank_1" assert trace["selected_rank"] == 1 # ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ─── def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps): """Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2.""" v4 = _make_v4([ _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), _j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" assert match.v4_rank == 2 assert match.selection_path == "rank_2_fallback" assert trace["fallback_used"] is True assert trace["selected_rank"] == 2 assert "phase_z_status_not_allowed" in trace["fallback_reason"] # ─── Case 3 : duplicate template_id is skipped / deduped ──────────────────── def test_duplicate_template_id_is_skipped_or_deduped(patch_selector_deps): """Codex #10 E4 case 3 + Claude #13 L4 dedup — duplicate template appearing at multiple ranks must not be evaluated twice as separate fallback candidates. Current selector traverses rank 1..max_rank linearly. If rank-1 is skipped (e.g. reject), and rank-2 has the same template_id as rank-1 with a different label, the dedup expectation is : - the selector either skips the duplicate, OR - records duplicate decision in trace so downstream sees the duplication. Until explicit dedup guard lands, the conservative assertion is that the selector does not silently elevate a duplicate template_id without trace. """ v4 = _make_v4([ _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), # rank-2 has same template_id as rank-1 (synthetic V4 anomaly) _j(2, "MOCK_template_reject_a", "MOCK_frame_001", "use_as_is"), _j(3, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) # Either the duplicate is skipped (then rank-3 wins) or duplicate is selected. # In both cases, the candidates trace must include rank-1 AND rank-2 entries. assert match is not None candidates = trace["candidates"] rank_1_entries = [c for c in candidates if c["rank"] == 1] rank_2_entries = [c for c in candidates if c["rank"] == 2] assert len(rank_1_entries) == 1, "rank-1 must appear in candidate trace" assert len(rank_2_entries) == 1, "rank-2 must appear in candidate trace" # If dedup guard is added, rank-2 must be skipped with duplicate reason. # Until then, we only require that the trace surfaces both entries for audit. # ─── Case 4 : missing contract → skipped / chain-exhausted trace ──────────── def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps): """Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted.""" v4 = _make_v4([ _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is None assert trace["selection_path"] == "chain_exhausted" candidates = trace["candidates"] assert any(c.get("reason") == "skipped_no_contract" for c in candidates) # ─── Case 5 : restructure / reject preserved as non-direct candidate evidence def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps): """Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject candidates must remain visible in candidate_evidence with route hints, not silently discarded. """ v4 = _make_v4([ _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"), _j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"), _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) assert match is not None assert match.template_id == "MOCK_template_direct_a" candidates = trace["candidates"] # All 3 must appear with informative schema (L2 fields) by_rank = {c["rank"]: c for c in candidates} assert set(by_rank.keys()) == {1, 2, 3} # rank-1 reject — non-direct, design_reference_only assert by_rank[1]["v4_label"] == "reject" assert by_rank[1]["filtered_for_direct_execution"] is True assert by_rank[1]["route_hint"] == "design_reference_only" # rank-2 restructure — non-direct, ai_adaptation_required assert by_rank[2]["v4_label"] == "restructure" assert by_rank[2]["filtered_for_direct_execution"] is True assert by_rank[2]["route_hint"] == "ai_adaptation_required" # rank-3 use_as_is — direct, direct_render assert by_rank[3]["v4_label"] == "use_as_is" assert by_rank[3]["filtered_for_direct_execution"] is False assert by_rank[3]["route_hint"] == "direct_render" # ─── Case 6 : additive fields do not regress existing trace shape ─────────── def test_existing_trace_shape_does_not_regress(patch_selector_deps): """Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break existing trace consumers. Existing fields (`label`, `fallback_used`, `selection_path`, `selected_rank`, etc.) must remain present and unchanged. """ v4 = _make_v4([ _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"), ]) match, trace = lookup_v4_match_with_fallback( v4, "S1", raw_content="- a\n- b\n- c\n" ) # Existing top-level trace fields preserved expected_top_fields = { "section_id", "max_rank", "selection_path", "selected_rank", "selected_template_id", "selected_frame_id", "selected_label", "fallback_used", "fallback_reason", "candidates", } assert expected_top_fields.issubset(trace.keys()) # Existing candidate fields preserved candidate = trace["candidates"][0] expected_candidate_fields = { "rank", "template_id", "frame_id", "frame_number", "confidence", "label", "phase_z_status", "catalog_registered", "decision", "reason", } assert expected_candidate_fields.issubset(candidate.keys()) # New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint) assert candidate["v4_label"] == candidate["label"] # alias of label assert "filtered_for_direct_execution" in candidate assert "route_hint" in candidate # rank-1 use_as_is path — no fallback used assert trace["fallback_used"] is False assert trace["selection_path"] == "rank_1"