C.E.L_Slide_test2/tests/test_phase_z2_v4_fallback.py

"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13.

Lock per round 65~73 + Claude #13 §3 L4' :
  - 6 explicit behavior cases (Codex #10 E4)
  - fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming)
  - monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI)
  - NO real catalog template_id / frame_id
  - NO `v4_full32_result.yaml` dependency

Synthetic naming convention :
  - `MOCK_` prefix mandatory
  - `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority)
  - rank/order expressed by `v4_full_rank` field, NEVER by ID suffix

Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`.
"""
from __future__ import annotations

from typing import Optional

import pytest

from src.phase_z2_pipeline import lookup_v4_match_with_fallback


# ─── Synthetic catalog stub ──────────────────────────────────────
# Tests control which synthetic templates are catalog-registered + capacity-OK.

_MOCK_CATALOG: dict[str, object] = {
    "MOCK_template_direct_a": object(),       # registered
    "MOCK_template_direct_b": object(),       # registered (used for dedup case)
    "MOCK_template_reject_a": object(),       # registered (but label=reject)
    "MOCK_template_restructure_a": object(),  # registered (but label=restructure)
    # "MOCK_template_missing_contract" intentionally absent — get_contract returns None.
}


def _mock_get_contract(template_id: str):
    """Synthetic contract lookup — return catalog entry or None."""
    return _MOCK_CATALOG.get(template_id)


def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
    """Synthetic capacity precheck — always OK."""
    return {"fit_status": "ok"}


@pytest.fixture
def patch_selector_deps(monkeypatch):
    """Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`.

    Codex #10 E3 + Claude #12 verification — selector has no DI; module-level
    `get_contract` / `compute_capacity_fit` must be monkeypatched.
    """
    monkeypatch.setattr(
        "src.phase_z2_pipeline.get_contract", _mock_get_contract
    )
    monkeypatch.setattr(
        "src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok
    )


def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict:
    """Wrap synthetic judgments into V4 input shape."""
    return {"mdx_sections": {section_id: {"judgments_full32": judgments}}}


def _j(rank: int, template_id: str, frame_id: str, label: str,
       confidence: float = 0.9) -> dict:
    """Synthetic V4 judgment record — shape matches real V4 evidence shape."""
    return {
        "frame_id": frame_id,
        "frame_number": rank,
        "template_id": template_id,
        "confidence": confidence,
        "label": label,
        "v4_full_rank": rank,
    }


# ─── Case 1 : rank-1 direct eligible retention (no fallback used) ───────────


def test_rank_1_direct_eligible_is_retained(patch_selector_deps):
    """Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback."""
    v4 = _make_v4([
        _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
        _j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"
    assert match.v4_rank == 1
    assert match.selection_path == "rank_1"
    assert trace["fallback_used"] is False
    assert trace["selection_path"] == "rank_1"
    assert trace["selected_rank"] == 1


# ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ───


def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps):
    """Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2."""
    v4 = _make_v4([
        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
        _j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"
    assert match.v4_rank == 2
    assert match.selection_path == "rank_2_fallback"
    assert trace["fallback_used"] is True
    assert trace["selected_rank"] == 2
    assert "phase_z_status_not_allowed" in trace["fallback_reason"]


# ─── Case 3 : duplicate template_id is skipped / deduped ────────────────────


def test_duplicate_template_id_is_skipped_or_deduped(patch_selector_deps):
    """Codex #10 E4 case 3 + Claude #13 L4 dedup — duplicate template appearing
    at multiple ranks must not be evaluated twice as separate fallback candidates.

    Current selector traverses rank 1..max_rank linearly. If rank-1 is skipped
    (e.g. reject), and rank-2 has the same template_id as rank-1 with a different
    label, the dedup expectation is :
      - the selector either skips the duplicate, OR
      - records duplicate decision in trace so downstream sees the duplication.

    Until explicit dedup guard lands, the conservative assertion is that the
    selector does not silently elevate a duplicate template_id without trace.
    """
    v4 = _make_v4([
        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
        # rank-2 has same template_id as rank-1 (synthetic V4 anomaly)
        _j(2, "MOCK_template_reject_a", "MOCK_frame_001", "use_as_is"),
        _j(3, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    # Either the duplicate is skipped (then rank-3 wins) or duplicate is selected.
    # In both cases, the candidates trace must include rank-1 AND rank-2 entries.
    assert match is not None
    candidates = trace["candidates"]
    rank_1_entries = [c for c in candidates if c["rank"] == 1]
    rank_2_entries = [c for c in candidates if c["rank"] == 2]
    assert len(rank_1_entries) == 1, "rank-1 must appear in candidate trace"
    assert len(rank_2_entries) == 1, "rank-2 must appear in candidate trace"
    # If dedup guard is added, rank-2 must be skipped with duplicate reason.
    # Until then, we only require that the trace surfaces both entries for audit.


# ─── Case 4 : missing contract → skipped / chain-exhausted trace ────────────


def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps):
    """Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted."""
    v4 = _make_v4([
        _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is None
    assert trace["selection_path"] == "chain_exhausted"
    candidates = trace["candidates"]
    assert any(c.get("reason") == "skipped_no_contract" for c in candidates)


# ─── Case 5 : restructure / reject preserved as non-direct candidate evidence


def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps):
    """Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject
    candidates must remain visible in candidate_evidence with route hints,
    not silently discarded.
    """
    v4 = _make_v4([
        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
        _j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"),
        _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"

    candidates = trace["candidates"]
    # All 3 must appear with informative schema (L2 fields)
    by_rank = {c["rank"]: c for c in candidates}
    assert set(by_rank.keys()) == {1, 2, 3}

    # rank-1 reject — non-direct, design_reference_only
    assert by_rank[1]["v4_label"] == "reject"
    assert by_rank[1]["filtered_for_direct_execution"] is True
    assert by_rank[1]["route_hint"] == "design_reference_only"

    # rank-2 restructure — non-direct, ai_adaptation_required
    assert by_rank[2]["v4_label"] == "restructure"
    assert by_rank[2]["filtered_for_direct_execution"] is True
    assert by_rank[2]["route_hint"] == "ai_adaptation_required"

    # rank-3 use_as_is — direct, direct_render
    assert by_rank[3]["v4_label"] == "use_as_is"
    assert by_rank[3]["filtered_for_direct_execution"] is False
    assert by_rank[3]["route_hint"] == "direct_render"


# ─── Case 6 : additive fields do not regress existing trace shape ───────────


def test_existing_trace_shape_does_not_regress(patch_selector_deps):
    """Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break
    existing trace consumers. Existing fields (`label`, `fallback_used`,
    `selection_path`, `selected_rank`, etc.) must remain present and unchanged.
    """
    v4 = _make_v4([
        _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    # Existing top-level trace fields preserved
    expected_top_fields = {
        "section_id", "max_rank", "selection_path", "selected_rank",
        "selected_template_id", "selected_frame_id", "selected_label",
        "fallback_used", "fallback_reason", "candidates",
    }
    assert expected_top_fields.issubset(trace.keys())

    # Existing candidate fields preserved
    candidate = trace["candidates"][0]
    expected_candidate_fields = {
        "rank", "template_id", "frame_id", "frame_number", "confidence",
        "label", "phase_z_status", "catalog_registered", "decision", "reason",
    }
    assert expected_candidate_fields.issubset(candidate.keys())

    # New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint)
    assert candidate["v4_label"] == candidate["label"]  # alias of label
    assert "filtered_for_direct_execution" in candidate
    assert "route_hint" in candidate

    # rank-1 use_as_is path — no fallback used
    assert trace["fallback_used"] is False
    assert trace["selection_path"] == "rank_1"