- u1: BuilderMissingError(FitError) — narrow exception aligned with pipeline catch - u2: load_frame_contracts catalog invariant + VP skip + CatalogInvariantError - u3a: audit CLI I1~I3 (partial existence / declared builder / registry membership) - u3b: audit CLI I4 (slot_payload refs vs declared/generated payload keys) - u4: lookup_v4_candidates VP filter (lookup_v4_all_judgments raw telemetry untouched) - u5: catalog invariant regression coverage + temp non-VP failure fixtures - u6: mdx04 VP routing fixture tests (sw_dependency_four_problems excluded from live) - u7: tests/conftest.py env isolation + mdx03/mdx04/mdx05 subprocess smoke Targeted 74 PASS (12.31s). Full regression 1063 PASS (87.70s). Audit CLI clean. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
130 lines
5.2 KiB
Python
130 lines
5.2 KiB
Python
"""IMP-#85 u6 — mdx04 VP routing regression against the real V4 evidence.
|
|
|
|
Scope (Stage 2 lock):
|
|
- Use the production ``tests/matching/v4_full32_result.yaml`` + the production
|
|
``templates/phase_z2/catalog/frame_contracts.yaml`` (no fixtures, no mocks).
|
|
- Prove that ``sw_dependency_four_problems`` (VP rank-1 on ``04-2.1``, VP
|
|
rank-2 on ``04-2.2``) is excluded from ``lookup_v4_candidates`` after u4,
|
|
while ``lookup_v4_all_judgments`` retains it as Step 7-A raw telemetry.
|
|
- Guard mdx03 dynamically — the actual rank-1 winners on ``03-1`` / ``03-2``
|
|
must be non-VP per catalog AND must survive into live candidates.
|
|
- VP gating is asserted data-driven (catalog ``visual_pending: true`` flag),
|
|
never hard-coded — matches Stage 1/2 ``feedback_no_hardcoding`` guardrail.
|
|
|
|
Out of scope:
|
|
- Implementing the 17 missing VP builders (별 P0 backlog, IMP-04b / #42).
|
|
- VP semantics redefinition or VP frame removal from V4 evidence.
|
|
- Adapter pipeline redesign.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from src.phase_z2_mapper import get_contract
|
|
from src.phase_z2_pipeline import (
|
|
load_v4_result,
|
|
lookup_v4_all_judgments,
|
|
lookup_v4_candidates,
|
|
)
|
|
|
|
CRASH_TEMPLATE_ID = "sw_dependency_four_problems"
|
|
|
|
|
|
def _rank1_template_id(v4: dict, section_id: str) -> str:
|
|
judgments = v4["mdx_sections"][section_id]["judgments_full32"]
|
|
return judgments[0]["template_id"]
|
|
|
|
|
|
# ─── Dynamic catalog proof — VP flag is data-driven ─────────────
|
|
|
|
|
|
def test_crash_template_is_visual_pending_in_catalog():
|
|
"""Catalog declares ``sw_dependency_four_problems.visual_pending: true``.
|
|
|
|
Locks the data-driven contract — the entire u4 / u6 chain rests on this
|
|
YAML flag, not a hard-coded frame allow-list. If the catalog ever drops
|
|
the flag without registering the ``cards_4_grid`` builder, this assertion
|
|
surfaces the regression before mdx04 crashes the mapper.
|
|
"""
|
|
contract = get_contract(CRASH_TEMPLATE_ID)
|
|
assert isinstance(contract, dict), CRASH_TEMPLATE_ID
|
|
assert contract.get("visual_pending") is True
|
|
|
|
|
|
# ─── mdx04-2.1 — VP frame at rank 1 ─────────────────────────────
|
|
|
|
|
|
def test_mdx04_2_1_excludes_vp_rank_1_from_live_candidates():
|
|
"""``04-2.1`` rank-1 is the VP crash frame — must NOT appear in live set.
|
|
|
|
Every surviving live candidate (if any) must itself be non-VP per catalog;
|
|
the section may legitimately produce an empty list (all remaining entries
|
|
are reject), which is the documented ``no_non_reject_v4_candidate`` signal
|
|
routed to the Step 9 fallback path.
|
|
"""
|
|
v4 = load_v4_result()
|
|
assert _rank1_template_id(v4, "04-2.1") == CRASH_TEMPLATE_ID
|
|
|
|
candidates = lookup_v4_candidates(v4, "04-2.1", max_n=6)
|
|
tids = [c.template_id for c in candidates]
|
|
|
|
assert CRASH_TEMPLATE_ID not in tids
|
|
for tid in tids:
|
|
contract = get_contract(tid) or {}
|
|
assert contract.get("visual_pending") is not True, (
|
|
f"04-2.1: surviving live candidate {tid} is VP"
|
|
)
|
|
|
|
|
|
def test_mdx04_2_1_retains_vp_frame_in_raw_judgments():
|
|
"""Step 7-A axis preservation — raw 32-entry telemetry still carries VP."""
|
|
v4 = load_v4_result()
|
|
all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.1")]
|
|
assert CRASH_TEMPLATE_ID in all_tids
|
|
|
|
|
|
# ─── mdx04-2.2 — VP frame at rank 2 ─────────────────────────────
|
|
|
|
|
|
def test_mdx04_2_2_excludes_vp_rank_2_from_live_candidates():
|
|
"""``04-2.2`` rank-2 is the VP crash frame — rank-1 live frame must win."""
|
|
v4 = load_v4_result()
|
|
rank_1 = _rank1_template_id(v4, "04-2.2")
|
|
rank_1_contract = get_contract(rank_1) or {}
|
|
# Pre-condition for this regression: rank-1 on 04-2.2 is non-VP.
|
|
assert rank_1_contract.get("visual_pending") is not True
|
|
|
|
candidates = lookup_v4_candidates(v4, "04-2.2", max_n=6)
|
|
tids = [c.template_id for c in candidates]
|
|
|
|
assert CRASH_TEMPLATE_ID not in tids
|
|
assert tids[0] == rank_1
|
|
|
|
|
|
def test_mdx04_2_2_retains_vp_frame_in_raw_judgments():
|
|
"""Raw judgments path preserves VP frame regardless of its rank."""
|
|
v4 = load_v4_result()
|
|
all_tids = [j.template_id for j in lookup_v4_all_judgments(v4, "04-2.2")]
|
|
assert CRASH_TEMPLATE_ID in all_tids
|
|
|
|
|
|
# ─── mdx03 dynamic guard — non-VP rank-1 survives ───────────────
|
|
|
|
|
|
def test_mdx03_rank_1_non_vp_survives_live_candidates():
|
|
"""Non-VP rank-1 winners on mdx03 sections must still win after u4.
|
|
|
|
Dynamic check — pulls rank-1 from the V4 yaml + catalog VP flag at runtime.
|
|
No hard-coded template_id list; only the regression contract is asserted.
|
|
"""
|
|
v4 = load_v4_result()
|
|
for section_id in ("03-1", "03-2"):
|
|
rank_1 = _rank1_template_id(v4, section_id)
|
|
contract = get_contract(rank_1) or {}
|
|
assert contract.get("visual_pending") is not True, (
|
|
f"{section_id} rank-1 ({rank_1}) unexpectedly VP — guard precondition broken"
|
|
)
|
|
candidates = lookup_v4_candidates(v4, section_id, max_n=6)
|
|
tids = [c.template_id for c in candidates]
|
|
assert tids and tids[0] == rank_1, (
|
|
f"{section_id}: expected rank-1 ({rank_1}) live, got {tids}"
|
|
)
|