C.E.L_Slide_test2/tests/test_phase_z2_v4_fallback.py

"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13.

Lock per round 65~73 + Claude #13 §3 L4' :
  - 6 explicit behavior cases (Codex #10 E4)
  - fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming)
  - monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI)
  - NO real catalog template_id / frame_id
  - NO `v4_full32_result.yaml` dependency

Synthetic naming convention :
  - `MOCK_` prefix mandatory
  - `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority)
  - rank/order expressed by `v4_full_rank` field, NEVER by ID suffix

Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`.
"""
from __future__ import annotations

from typing import Optional

import pytest

import inspect

from src import phase_z2_pipeline
from src.phase_z2_pipeline import lookup_v4_match_with_fallback


# ─── Synthetic catalog stub ──────────────────────────────────────
# Tests control which synthetic templates are catalog-registered + capacity-OK.

_MOCK_CATALOG: dict[str, object] = {
    "MOCK_template_direct_a": object(),       # registered
    "MOCK_template_direct_b": object(),       # registered (used for dedup case)
    "MOCK_template_reject_a": object(),       # registered (but label=reject)
    "MOCK_template_restructure_a": object(),  # registered (but label=restructure)
    # "MOCK_template_missing_contract" intentionally absent — get_contract returns None.
}


def _mock_get_contract(template_id: str):
    """Synthetic contract lookup — return catalog entry or None."""
    return _MOCK_CATALOG.get(template_id)


def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
    """Synthetic capacity precheck — always OK."""
    return {"fit_status": "ok"}


@pytest.fixture
def patch_selector_deps(monkeypatch):
    """Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`.

    Codex #10 E3 + Claude #12 verification — selector has no DI; module-level
    `get_contract` / `compute_capacity_fit` must be monkeypatched.
    """
    monkeypatch.setattr(
        "src.phase_z2_pipeline.get_contract", _mock_get_contract
    )
    monkeypatch.setattr(
        "src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok
    )


def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict:
    """Wrap synthetic judgments into V4 input shape."""
    return {"mdx_sections": {section_id: {"judgments_full32": judgments}}}


def _j(rank: int, template_id: str, frame_id: str, label: str,
       confidence: float = 0.9) -> dict:
    """Synthetic V4 judgment record — shape matches real V4 evidence shape."""
    return {
        "frame_id": frame_id,
        "frame_number": rank,
        "template_id": template_id,
        "confidence": confidence,
        "label": label,
        "v4_full_rank": rank,
    }


# ─── Case 1 : rank-1 direct eligible retention (no fallback used) ───────────


def test_rank_1_direct_eligible_is_retained(patch_selector_deps):
    """Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback."""
    v4 = _make_v4([
        _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
        _j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"
    assert match.v4_rank == 1
    assert match.selection_path == "rank_1"
    assert trace["fallback_used"] is False
    assert trace["selection_path"] == "rank_1"
    assert trace["selected_rank"] == 1


# ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ───


def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps):
    """Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2."""
    v4 = _make_v4([
        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
        _j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"
    assert match.v4_rank == 2
    assert match.selection_path == "rank_2_fallback"
    assert trace["fallback_used"] is True
    assert trace["selected_rank"] == 2
    assert "phase_z_status_not_allowed" in trace["fallback_reason"]


# ─── Case 3 : duplicate template_id is skipped / deduped ────────────────────


def test_duplicate_template_id_is_skipped_rank_3_wins(patch_selector_deps):
    """Codex #14 dedup precision lock — first occurrence reserves template_id
    for the chain regardless of decision. Later rank with same template_id MUST
    be skipped as duplicate, regardless of its V4 label.

    Fixture simulates V4 anomaly : rank-1 + rank-2 share same template_id (and
    same frame_id per Codex #6 1:1 catalog terminology — real catalog 정합).
    rank-1 label = reject (non-direct, first occurrence), rank-2 label =
    use_as_is (would be executable but MUST be skipped as duplicate per
    Codex #14 intended rule). rank-3 = distinct executable template, wins.

    Per Codex #14 example :
      rank 1: A reject       → skipped (non-direct), template A claimed
      rank 2: A use_as_is    → skipped as duplicate_template_id (must NOT win)
      rank 3: B use_as_is    → selected (distinct template, eligible)
    """
    v4 = _make_v4([
        # rank-1 : non-direct (reject), reserves template_id for chain
        _j(1, "MOCK_template_dup_a", "MOCK_frame_dup_001", "reject"),
        # rank-2 : same template_id + same frame_id (1:1 catalog), would be
        # executable but MUST be skipped as duplicate (Codex #14 intended rule)
        _j(2, "MOCK_template_dup_a", "MOCK_frame_dup_001", "use_as_is"),
        # rank-3 : distinct executable template, wins
        _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    # rank-3 must be selected (distinct executable, after rank-1+2 duplicates)
    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"
    assert match.v4_rank == 3
    assert match.selection_path == "rank_3_fallback"
    assert trace["fallback_used"] is True
    assert trace["selected_rank"] == 3

    # Trace must preserve all 3 candidate entries with precise reasons
    candidates = trace["candidates"]
    by_rank = {c["rank"]: c for c in candidates}
    assert set(by_rank.keys()) == {1, 2, 3}

    # rank-1 : non-direct first occurrence (status_not_allowed reason preserved)
    assert by_rank[1]["decision"] == "skipped"
    assert by_rank[1]["reason"] == "phase_z_status_not_allowed:fallback_candidate"
    assert by_rank[1]["template_id"] == "MOCK_template_dup_a"
    assert by_rank[1]["v4_label"] == "reject"

    # rank-2 : duplicate of rank-1 template (MUST be skipped as duplicate, NOT selected)
    assert by_rank[2]["decision"] == "skipped"
    assert by_rank[2]["reason"] == "duplicate_template_id"
    assert by_rank[2]["template_id"] == "MOCK_template_dup_a"
    # audit fields preserved even though duplicate
    assert by_rank[2]["v4_label"] == "use_as_is"
    assert by_rank[2]["frame_id"] == "MOCK_frame_dup_001"

    # rank-3 : distinct executable, selected
    assert by_rank[3]["decision"] == "selected"
    assert by_rank[3]["template_id"] == "MOCK_template_direct_a"


# ─── Case 4 : missing contract → skipped / chain-exhausted trace ────────────


def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps):
    """Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted."""
    v4 = _make_v4([
        _j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is None
    assert trace["selection_path"] == "chain_exhausted"
    candidates = trace["candidates"]
    assert any(c.get("reason") == "skipped_no_contract" for c in candidates)


# ─── Case 5 : restructure / reject preserved as non-direct candidate evidence


def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps):
    """Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject
    candidates must remain visible in candidate_evidence with route hints,
    not silently discarded.
    """
    v4 = _make_v4([
        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
        _j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"),
        _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    assert match is not None
    assert match.template_id == "MOCK_template_direct_a"

    candidates = trace["candidates"]
    # All 3 must appear with informative schema (L2 fields)
    by_rank = {c["rank"]: c for c in candidates}
    assert set(by_rank.keys()) == {1, 2, 3}

    # rank-1 reject — non-direct, design_reference_only
    assert by_rank[1]["v4_label"] == "reject"
    assert by_rank[1]["filtered_for_direct_execution"] is True
    assert by_rank[1]["route_hint"] == "design_reference_only"

    # rank-2 restructure — non-direct, ai_adaptation_required
    assert by_rank[2]["v4_label"] == "restructure"
    assert by_rank[2]["filtered_for_direct_execution"] is True
    assert by_rank[2]["route_hint"] == "ai_adaptation_required"

    # rank-3 use_as_is — direct, direct_render
    assert by_rank[3]["v4_label"] == "use_as_is"
    assert by_rank[3]["filtered_for_direct_execution"] is False
    assert by_rank[3]["route_hint"] == "direct_render"


# ─── Case 6 : additive fields do not regress existing trace shape ───────────


def test_existing_trace_shape_does_not_regress(patch_selector_deps):
    """Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break
    existing trace consumers. Existing fields (`label`, `fallback_used`,
    `selection_path`, `selected_rank`, etc.) must remain present and unchanged.
    """
    v4 = _make_v4([
        _j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

    # Existing top-level trace fields preserved
    expected_top_fields = {
        "section_id", "max_rank", "selection_path", "selected_rank",
        "selected_template_id", "selected_frame_id", "selected_label",
        "fallback_used", "fallback_reason", "candidates",
    }
    assert expected_top_fields.issubset(trace.keys())

    # Existing candidate fields preserved
    candidate = trace["candidates"][0]
    expected_candidate_fields = {
        "rank", "template_id", "frame_id", "frame_number", "confidence",
        "label", "phase_z_status", "catalog_registered", "decision", "reason",
    }
    assert expected_candidate_fields.issubset(candidate.keys())

    # New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint)
    assert candidate["v4_label"] == candidate["label"]  # alias of label
    assert "filtered_for_direct_execution" in candidate
    assert "route_hint" in candidate

    # rank-1 use_as_is path — no fallback used
    assert trace["fallback_used"] is False
    assert trace["selection_path"] == "rank_1"


# ─── Case 7 : Step 9 production-source guard (Codex #20 blocker fix) ───


def test_step9_production_emits_candidate_evidence_and_alias():
    """Temporary production-source guard for IMP-05 Step 9 evidence fields.

    Step 9 application-plan unit assembly is currently inline, so this test
    checks the exact production assignments until IMP-32 extracts a helper.
    Once that helper exists, replace this source-string guard with a direct
    helper-call test.
    """
    source = inspect.getsource(phase_z2_pipeline)
    candidate_line = '"candidate_evidence": selection_trace.get("candidates", [])'
    alias_line = '"fallback_chain": selection_trace.get("candidates", [])'

    assert candidate_line in source
    assert alias_line in source
    assert source.index(candidate_line) < source.index(alias_line)
    assert "compat alias; prefer candidate_evidence" in source


# ─── Case 8 : Step 20 slide-status qualifier fields presence + defensive default


def test_step20_slide_status_qualifier_fields_present_with_defensive_defaults():
    """Codex #10 D4 + Codex #17 idea F + Claude #21 idea J — Step 20 slide-status
    must expose `fallback_selection_count` and `selection_paths[]` derived from
    comp_debug["v4_fallback_summary"] with defensive defaults (0, []) when the
    summary is missing or empty. Top-level `overall` enum must remain stable.
    """
    from src.phase_z2_pipeline import compute_slide_status
    from src.phase_z2_pipeline import MdxSection

    # Case A — comp_debug with populated v4_fallback_summary
    sections_empty: list[MdxSection] = []
    units_empty: list = []
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug_with = {
        "v4_fallback_summary": {
            "fallback_used_count": 1,
            "fallback_selection_count": 1,
            "selection_paths": [
                {"section_id": "S1", "selection_path": "rank_2_fallback",
                 "selected_rank": 2, "selected_template_id": "MOCK_T",
                 "fallback_trigger": "phase_z_status_not_allowed:fallback_candidate"},
            ],
        },
        "candidates_summary": [],
    }
    status_a = compute_slide_status(
        sections_empty, units_empty, comp_debug_with, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )
    # Step 20 qualifier fields present near existing fallback fields (Codex F ordering)
    assert "fallback_selection_count" in status_a
    assert "selection_paths" in status_a
    assert status_a["fallback_selection_count"] == 1
    assert len(status_a["selection_paths"]) == 1
    assert status_a["selection_paths"][0]["section_id"] == "S1"
    # Existing fields preserved (no regression)
    assert "fallback_used" in status_a
    assert "fallback_selections" in status_a
    assert "overall" in status_a

    # Case B — comp_debug missing v4_fallback_summary (defensive defaults)
    comp_debug_empty = {"candidates_summary": []}
    status_b = compute_slide_status(
        sections_empty, units_empty, comp_debug_empty, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )
    # Defensive defaults — 0 + [] when summary missing
    assert status_b["fallback_selection_count"] == 0
    assert status_b["selection_paths"] == []
    # Top-level overall enum still stable
    assert "overall" in status_b

    # Case C — comp_debug with empty v4_fallback_summary dict
    comp_debug_empty_summary = {"v4_fallback_summary": {}, "candidates_summary": []}
    status_c = compute_slide_status(
        sections_empty, units_empty, comp_debug_empty_summary, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )
    # Defensive defaults — 0 + [] when summary present but empty
    assert status_c["fallback_selection_count"] == 0
    assert status_c["selection_paths"] == []