Files
C.E.L_Slide_test2/tests/test_mdx04_vp_routing.py
kyeongmin cacc5b30db feat(#85): IMP catalog builder invariant + VP runtime gate (u1~u7)
- u1: BuilderMissingError(FitError) — narrow exception aligned with pipeline catch
- u2: load_frame_contracts catalog invariant + VP skip + CatalogInvariantError
- u3a: audit CLI I1~I3 (partial existence / declared builder / registry membership)
- u3b: audit CLI I4 (slot_payload refs vs declared/generated payload keys)
- u4: lookup_v4_candidates VP filter (lookup_v4_all_judgments raw telemetry untouched)
- u5: catalog invariant regression coverage + temp non-VP failure fixtures
- u6: mdx04 VP routing fixture tests (sw_dependency_four_problems excluded from live)
- u7: tests/conftest.py env isolation + mdx03/mdx04/mdx05 subprocess smoke

Targeted 74 PASS (12.31s). Full regression 1063 PASS (87.70s). Audit CLI clean.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 16:56:38 +09:00

130 lines
5.2 KiB
Python

"""IMP-#85 u6 — mdx04 VP routing regression against the real V4 evidence.
Scope (Stage 2 lock):
- Use the production ``tests/matching/v4_full32_result.yaml`` + the production
``templates/phase_z2/catalog/frame_contracts.yaml`` (no fixtures, no mocks).
- Prove that ``sw_dependency_four_problems`` (VP rank-1 on ``04-2.1``, VP
rank-2 on ``04-2.2``) is excluded from ``lookup_v4_candidates`` after u4,
while ``lookup_v4_all_judgments`` retains it as Step 7-A raw telemetry.
- Guard mdx03 dynamically — the actual rank-1 winners on ``03-1`` / ``03-2``
must be non-VP per catalog AND must survive into live candidates.
- VP gating is asserted data-driven (catalog ``visual_pending: true`` flag),
never hard-coded — matches Stage 1/2 ``feedback_no_hardcoding`` guardrail.
Out of scope:
- Implementing the 17 missing VP builders (별 P0 backlog, IMP-04b / #42).
- VP semantics redefinition or VP frame removal from V4 evidence.
- Adapter pipeline redesign.
"""
from __future__ import annotations
from src.phase_z2_mapper import get_contract
from src.phase_z2_pipeline import (
load_v4_result,
lookup_v4_all_judgments,
lookup_v4_candidates,
)
CRASH_TEMPLATE_ID = "sw_dependency_four_problems"
def _rank1_template_id(v4: dict, section_id: str) -> str:
judgments = v4["mdx_sections"][section_id]["judgments_full32"]
return judgments[0]["template_id"]
# ─── Dynamic catalog proof — VP flag is data-driven ─────────────
def test_crash_template_is_visual_pending_in_catalog():
"""Catalog declares ``sw_dependency_four_problems.visual_pending: true``.
Locks the data-driven contract — the entire u4 / u6 chain rests on this
YAML flag, not a hard-coded frame allow-list. If the catalog ever drops
the flag without registering the ``cards_4_grid`` builder, this assertion
surfaces the regression before mdx04 crashes the mapper.
"""
contract = get_contract(CRASH_TEMPLATE_ID)
assert isinstance(contract, dict), CRASH_TEMPLATE_ID
assert contract.get("visual_pending") is True
# ─── mdx04-2.1 — VP frame at rank 1 ─────────────────────────────
def test_mdx04_2_1_excludes_vp_rank_1_from_live_candidates():
"""``04-2.1`` rank-1 is the VP crash frame — must NOT appear in live set.
Every surviving live candidate (if any) must itself be non-VP per catalog;
the section may legitimately produce an empty list (all remaining entries
are reject), which is the documented ``no_non_reject_v4_candidate`` signal
routed to the Step 9 fallback path.
"""
v4 = load_v4_result()
assert _rank1_template_id(v4, "04-2.1") == CRASH_TEMPLATE_ID
candidates = lookup_v4_candidates(v4, "04-2.1", max_n=6)
tids = [c.template_id for c in candidates]
assert CRASH_TEMPLATE_ID not in tids
for tid in tids:
contract = get_contract(tid) or {}
assert contract.get("visual_pending") is not True, (
f"04-2.1: surviving live candidate {tid} is VP"
)
def test_mdx04_2_1_retains_vp_frame_in_raw_judgments():
"""Step 7-A axis preservation — raw 32-entry telemetry still carries VP."""
v4 = load_v4_result()
all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.1")]
assert CRASH_TEMPLATE_ID in all_tids
# ─── mdx04-2.2 — VP frame at rank 2 ─────────────────────────────
def test_mdx04_2_2_excludes_vp_rank_2_from_live_candidates():
"""``04-2.2`` rank-2 is the VP crash frame — rank-1 live frame must win."""
v4 = load_v4_result()
rank_1 = _rank1_template_id(v4, "04-2.2")
rank_1_contract = get_contract(rank_1) or {}
# Pre-condition for this regression: rank-1 on 04-2.2 is non-VP.
assert rank_1_contract.get("visual_pending") is not True
candidates = lookup_v4_candidates(v4, "04-2.2", max_n=6)
tids = [c.template_id for c in candidates]
assert CRASH_TEMPLATE_ID not in tids
assert tids[0] == rank_1
def test_mdx04_2_2_retains_vp_frame_in_raw_judgments():
"""Raw judgments path preserves VP frame regardless of its rank."""
v4 = load_v4_result()
all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.2")]
assert CRASH_TEMPLATE_ID in all_tids
# ─── mdx03 dynamic guard — non-VP rank-1 survives ───────────────
def test_mdx03_rank_1_non_vp_survives_live_candidates():
"""Non-VP rank-1 winners on mdx03 sections must still win after u4.
Dynamic check — pulls rank-1 from the V4 yaml + catalog VP flag at runtime.
No hard-coded template_id list; only the regression contract is asserted.
"""
v4 = load_v4_result()
for section_id in ("03-1", "03-2"):
rank_1 = _rank1_template_id(v4, section_id)
contract = get_contract(rank_1) or {}
assert contract.get("visual_pending") is not True, (
f"{section_id} rank-1 ({rank_1}) unexpectedly VP — guard precondition broken"
)
candidates = lookup_v4_candidates(v4, section_id, max_n=6)
tids = [c.template_id for c in candidates]
assert tids and tids[0] == rank_1, (
f"{section_id}: expected rank-1 ({rank_1}) live, got {tids}"
)