Files
C.E.L_Slide_test2/tests/test_phase_z2_v4_fallback.py
kyeongmin 1efbf672bd feat(#39): IMP-30 first-render invariant + abort bypass (2 paths)
Restore first-render invariant: final.html + Step 20 slide_status MUST be
written for every input where Step 0~5 succeed. Two abort paths replaced
with provisional/empty-shell synthesis; MDX content preserved, AI-free.

- u1 V4Match.provisional + lookup_v4_match_with_fallback(allow_provisional)
  chain_exhausted -> synthesize rank-1 provisional (opt-in, default-off)
- u2 CompositionUnit.provisional propagation (single / parent_merged /
  parent_merged_inferred constructors)
- u3 select_composition_units(allow_provisional_fill=True) last-resort
  fill + _candidate_state="selected_provisional"
- u4 pipeline.py path-(a) abort guard replaced with provisional retry +
  terminal __empty__ shell (no sys.exit(1))
- u5 zones_data.provisional -> slide_base.html zone--provisional class +
  data-provisional + needs-adaptation badge (template-only)
- u6 compute_slide_status additive provisional_first_render_count/_units
  (overall enum unchanged per IMP-05 Codex #10 D4)
- u7 regression: tests/test_phase_z2_imp30_first_render.py (28 tests) +
  tests/test_phase_z2_v4_fallback.py (+5 cases)

Guardrails verified: MVP1_ALLOWED_STATUSES unchanged, no calculate_fit,
no LLM in fallback path, no MDX 03/04/05 hardcoding.

Anchor sync (Rule 13): tests/orchestrator_unit/test_imp17_comment_anchor.py
re-pinned 564/565 -> 570/571 to track V4Match.provisional shift at
src/phase_z2_pipeline.py:179-184.

Cross-ref: IMP-05 (#5) §5 defer + Codex #2 first-render invariant.
2026-05-21 00:40:58 +09:00

505 lines
20 KiB
Python

"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13.
Lock per round 65~73 + Claude #13 §3 L4' :
- 6 explicit behavior cases (Codex #10 E4)
- fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming)
- monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI)
- NO real catalog template_id / frame_id
- NO `v4_full32_result.yaml` dependency
Synthetic naming convention :
- `MOCK_` prefix mandatory
- `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority)
- rank/order expressed by `v4_full_rank` field, NEVER by ID suffix
Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`.
"""
from __future__ import annotations
from typing import Optional
import pytest
import inspect
from src import phase_z2_pipeline
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
# ─── Synthetic catalog stub ──────────────────────────────────────
# Tests control which synthetic templates are catalog-registered + capacity-OK.
_MOCK_CATALOG: dict[str, object] = {
"MOCK_template_direct_a": object(), # registered
"MOCK_template_direct_b": object(), # registered (used for dedup case)
"MOCK_template_reject_a": object(), # registered (but label=reject)
"MOCK_template_restructure_a": object(), # registered (but label=restructure)
# "MOCK_template_missing_contract" intentionally absent — get_contract returns None.
}
def _mock_get_contract(template_id: str):
"""Synthetic contract lookup — return catalog entry or None."""
return _MOCK_CATALOG.get(template_id)
def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
"""Synthetic capacity precheck — always OK."""
return {"fit_status": "ok"}
@pytest.fixture
def patch_selector_deps(monkeypatch):
"""Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`.
Codex #10 E3 + Claude #12 verification — selector has no DI; module-level
`get_contract` / `compute_capacity_fit` must be monkeypatched.
"""
monkeypatch.setattr(
"src.phase_z2_pipeline.get_contract", _mock_get_contract
)
monkeypatch.setattr(
"src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok
)
def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict:
"""Wrap synthetic judgments into V4 input shape."""
return {"mdx_sections": {section_id: {"judgments_full32": judgments}}}
def _j(rank: int, template_id: str, frame_id: str, label: str,
confidence: float = 0.9) -> dict:
"""Synthetic V4 judgment record — shape matches real V4 evidence shape."""
return {
"frame_id": frame_id,
"frame_number": rank,
"template_id": template_id,
"confidence": confidence,
"label": label,
"v4_full_rank": rank,
}
# ─── Case 1 : rank-1 direct eligible retention (no fallback used) ───────────
def test_rank_1_direct_eligible_is_retained(patch_selector_deps):
"""Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback."""
v4 = _make_v4([
_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
_j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
assert match is not None
assert match.template_id == "MOCK_template_direct_a"
assert match.v4_rank == 1
assert match.selection_path == "rank_1"
assert trace["fallback_used"] is False
assert trace["selection_path"] == "rank_1"
assert trace["selected_rank"] == 1
# ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ───
def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps):
"""Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2."""
v4 = _make_v4([
_j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
_j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
assert match is not None
assert match.template_id == "MOCK_template_direct_a"
assert match.v4_rank == 2
assert match.selection_path == "rank_2_fallback"
assert trace["fallback_used"] is True
assert trace["selected_rank"] == 2
assert "phase_z_status_not_allowed" in trace["fallback_reason"]
# ─── Case 3 : duplicate template_id is skipped / deduped ────────────────────
def test_duplicate_template_id_is_skipped_rank_3_wins(patch_selector_deps):
"""Codex #14 dedup precision lock — first occurrence reserves template_id
for the chain regardless of decision. Later rank with same template_id MUST
be skipped as duplicate, regardless of its V4 label.
Fixture simulates V4 anomaly : rank-1 + rank-2 share same template_id (and
same frame_id per Codex #6 1:1 catalog terminology — real catalog 정합).
rank-1 label = reject (non-direct, first occurrence), rank-2 label =
use_as_is (would be executable but MUST be skipped as duplicate per
Codex #14 intended rule). rank-3 = distinct executable template, wins.
Per Codex #14 example :
rank 1: A reject → skipped (non-direct), template A claimed
rank 2: A use_as_is → skipped as duplicate_template_id (must NOT win)
rank 3: B use_as_is → selected (distinct template, eligible)
"""
v4 = _make_v4([
# rank-1 : non-direct (reject), reserves template_id for chain
_j(1, "MOCK_template_dup_a", "MOCK_frame_dup_001", "reject"),
# rank-2 : same template_id + same frame_id (1:1 catalog), would be
# executable but MUST be skipped as duplicate (Codex #14 intended rule)
_j(2, "MOCK_template_dup_a", "MOCK_frame_dup_001", "use_as_is"),
# rank-3 : distinct executable template, wins
_j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
# rank-3 must be selected (distinct executable, after rank-1+2 duplicates)
assert match is not None
assert match.template_id == "MOCK_template_direct_a"
assert match.v4_rank == 3
assert match.selection_path == "rank_3_fallback"
assert trace["fallback_used"] is True
assert trace["selected_rank"] == 3
# Trace must preserve all 3 candidate entries with precise reasons
candidates = trace["candidates"]
by_rank = {c["rank"]: c for c in candidates}
assert set(by_rank.keys()) == {1, 2, 3}
# rank-1 : non-direct first occurrence (status_not_allowed reason preserved)
assert by_rank[1]["decision"] == "skipped"
assert by_rank[1]["reason"] == "phase_z_status_not_allowed:fallback_candidate"
assert by_rank[1]["template_id"] == "MOCK_template_dup_a"
assert by_rank[1]["v4_label"] == "reject"
# rank-2 : duplicate of rank-1 template (MUST be skipped as duplicate, NOT selected)
assert by_rank[2]["decision"] == "skipped"
assert by_rank[2]["reason"] == "duplicate_template_id"
assert by_rank[2]["template_id"] == "MOCK_template_dup_a"
# audit fields preserved even though duplicate
assert by_rank[2]["v4_label"] == "use_as_is"
assert by_rank[2]["frame_id"] == "MOCK_frame_dup_001"
# rank-3 : distinct executable, selected
assert by_rank[3]["decision"] == "selected"
assert by_rank[3]["template_id"] == "MOCK_template_direct_a"
# ─── Case 4 : missing contract → skipped / chain-exhausted trace ────────────
def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps):
"""Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted."""
v4 = _make_v4([
_j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
assert match is None
assert trace["selection_path"] == "chain_exhausted"
candidates = trace["candidates"]
assert any(c.get("reason") == "skipped_no_contract" for c in candidates)
# ─── Case 5 : restructure / reject preserved as non-direct candidate evidence
def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps):
"""Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject
candidates must remain visible in candidate_evidence with route hints,
not silently discarded.
"""
v4 = _make_v4([
_j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
_j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"),
_j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
assert match is not None
assert match.template_id == "MOCK_template_direct_a"
candidates = trace["candidates"]
# All 3 must appear with informative schema (L2 fields)
by_rank = {c["rank"]: c for c in candidates}
assert set(by_rank.keys()) == {1, 2, 3}
# rank-1 reject — non-direct, design_reference_only
assert by_rank[1]["v4_label"] == "reject"
assert by_rank[1]["filtered_for_direct_execution"] is True
assert by_rank[1]["route_hint"] == "design_reference_only"
# rank-2 restructure — non-direct, ai_adaptation_required
assert by_rank[2]["v4_label"] == "restructure"
assert by_rank[2]["filtered_for_direct_execution"] is True
assert by_rank[2]["route_hint"] == "ai_adaptation_required"
# rank-3 use_as_is — direct, direct_render
assert by_rank[3]["v4_label"] == "use_as_is"
assert by_rank[3]["filtered_for_direct_execution"] is False
assert by_rank[3]["route_hint"] == "direct_render"
# ─── Case 6 : additive fields do not regress existing trace shape ───────────
def test_existing_trace_shape_does_not_regress(patch_selector_deps):
"""Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break
existing trace consumers. Existing fields (`label`, `fallback_used`,
`selection_path`, `selected_rank`, etc.) must remain present and unchanged.
"""
v4 = _make_v4([
_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
# Existing top-level trace fields preserved
expected_top_fields = {
"section_id", "max_rank", "selection_path", "selected_rank",
"selected_template_id", "selected_frame_id", "selected_label",
"fallback_used", "fallback_reason", "candidates",
}
assert expected_top_fields.issubset(trace.keys())
# Existing candidate fields preserved
candidate = trace["candidates"][0]
expected_candidate_fields = {
"rank", "template_id", "frame_id", "frame_number", "confidence",
"label", "phase_z_status", "catalog_registered", "decision", "reason",
}
assert expected_candidate_fields.issubset(candidate.keys())
# New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint)
assert candidate["v4_label"] == candidate["label"] # alias of label
assert "filtered_for_direct_execution" in candidate
assert "route_hint" in candidate
# rank-1 use_as_is path — no fallback used
assert trace["fallback_used"] is False
assert trace["selection_path"] == "rank_1"
# ─── Case 7 : Step 9 production-source guard (Codex #20 blocker fix) ───
def test_step9_production_emits_candidate_evidence_and_alias():
"""Temporary production-source guard for IMP-05 Step 9 evidence fields.
Step 9 application-plan unit assembly is currently inline, so this test
checks the exact production assignments until IMP-32 extracts a helper.
Once that helper exists, replace this source-string guard with a direct
helper-call test.
"""
source = inspect.getsource(phase_z2_pipeline)
candidate_line = '"candidate_evidence": selection_trace.get("candidates", [])'
alias_line = '"fallback_chain": selection_trace.get("candidates", [])'
assert candidate_line in source
assert alias_line in source
assert source.index(candidate_line) < source.index(alias_line)
assert "compat alias; prefer candidate_evidence" in source
# ─── Case 8 : Step 20 slide-status qualifier fields presence + defensive default
def test_step20_slide_status_qualifier_fields_present_with_defensive_defaults():
"""Codex #10 D4 + Codex #17 idea F + Claude #21 idea J — Step 20 slide-status
must expose `fallback_selection_count` and `selection_paths[]` derived from
comp_debug["v4_fallback_summary"] with defensive defaults (0, []) when the
summary is missing or empty. Top-level `overall` enum must remain stable.
"""
from src.phase_z2_pipeline import compute_slide_status
from src.phase_z2_pipeline import MdxSection
# Case A — comp_debug with populated v4_fallback_summary
sections_empty: list[MdxSection] = []
units_empty: list = []
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug_with = {
"v4_fallback_summary": {
"fallback_used_count": 1,
"fallback_selection_count": 1,
"selection_paths": [
{"section_id": "S1", "selection_path": "rank_2_fallback",
"selected_rank": 2, "selected_template_id": "MOCK_T",
"fallback_trigger": "phase_z_status_not_allowed:fallback_candidate"},
],
},
"candidates_summary": [],
}
status_a = compute_slide_status(
sections_empty, units_empty, comp_debug_with, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
# Step 20 qualifier fields present near existing fallback fields (Codex F ordering)
assert "fallback_selection_count" in status_a
assert "selection_paths" in status_a
assert status_a["fallback_selection_count"] == 1
assert len(status_a["selection_paths"]) == 1
assert status_a["selection_paths"][0]["section_id"] == "S1"
# Existing fields preserved (no regression)
assert "fallback_used" in status_a
assert "fallback_selections" in status_a
assert "overall" in status_a
# Case B — comp_debug missing v4_fallback_summary (defensive defaults)
comp_debug_empty = {"candidates_summary": []}
status_b = compute_slide_status(
sections_empty, units_empty, comp_debug_empty, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
# Defensive defaults — 0 + [] when summary missing
assert status_b["fallback_selection_count"] == 0
assert status_b["selection_paths"] == []
# Top-level overall enum still stable
assert "overall" in status_b
# Case C — comp_debug with empty v4_fallback_summary dict
comp_debug_empty_summary = {"v4_fallback_summary": {}, "candidates_summary": []}
status_c = compute_slide_status(
sections_empty, units_empty, comp_debug_empty_summary, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
# Defensive defaults — 0 + [] when summary present but empty
assert status_c["fallback_selection_count"] == 0
assert status_c["selection_paths"] == []
# ─── Case 9 : IMP-30 u1 — opt-in provisional synthesis on chain_exhausted ───
def test_allow_provisional_default_off_preserves_imp05_behavior(patch_selector_deps):
"""IMP-30 u1 — default ``allow_provisional=False`` keeps chain_exhausted
returning ``(None, trace)`` exactly as IMP-05 specified. Regression guard
for IMP-05 close commit 23d1b25.
"""
v4 = _make_v4([
_j(1, "MOCK_template_restructure_a", "MOCK_frame_001", "restructure"),
_j(2, "MOCK_template_reject_a", "MOCK_frame_002", "reject"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n"
)
assert match is None
assert trace["selection_path"] == "chain_exhausted"
assert trace.get("provisional") is None
assert trace["selected_rank"] is None
assert trace["selected_template_id"] is None
def test_allow_provisional_synthesizes_rank_1_on_chain_exhausted(patch_selector_deps):
"""IMP-30 u1 — opt-in ``allow_provisional=True`` synthesizes a provisional
rank-1 match when the rank-1..3 chain is exhausted (all restructure/reject).
Downstream first-render invariant uses this to render a "needs adaptation"
zone instead of aborting.
"""
v4 = _make_v4([
_j(1, "MOCK_template_restructure_a", "MOCK_frame_001", "restructure"),
_j(2, "MOCK_template_reject_a", "MOCK_frame_002", "reject"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n",
allow_provisional=True,
)
# Provisional rank-1 synthesized from the rank-1 judgment
assert match is not None
assert match.provisional is True
assert match.template_id == "MOCK_template_restructure_a"
assert match.frame_id == "MOCK_frame_001"
assert match.label == "restructure"
assert match.v4_rank == 1
assert match.selection_path == "provisional_rank_1"
# fallback_reason mirrors the chain-exhaust reason
assert match.fallback_reason is not None
assert "phase_z_status_not_allowed" in match.fallback_reason
# Top-level trace mirrors reflect provisional selection
assert trace["selection_path"] == "provisional_rank_1"
assert trace["selected_rank"] == 1
assert trace["selected_template_id"] == "MOCK_template_restructure_a"
assert trace["selected_frame_id"] == "MOCK_frame_001"
assert trace["selected_label"] == "restructure"
assert trace["fallback_used"] is True
assert trace["provisional"] is True
# Original candidate skip reasons are preserved (not rewritten by synthesis)
by_rank = {c["rank"]: c for c in trace["candidates"]}
assert by_rank[1]["decision"] == "skipped"
assert by_rank[1]["reason"] == "phase_z_status_not_allowed:extract_matched_zone"
assert by_rank[2]["decision"] == "skipped"
assert by_rank[2]["reason"] == "phase_z_status_not_allowed:fallback_candidate"
def test_allow_provisional_no_op_when_normal_selection_succeeds(patch_selector_deps):
"""IMP-30 u1 — ``allow_provisional=True`` is a no-op when normal selection
succeeds. The rank-1 (or rank-N fallback) result MUST be non-provisional.
"""
v4 = _make_v4([
_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
])
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n",
allow_provisional=True,
)
assert match is not None
assert match.provisional is False
assert match.selection_path == "rank_1"
assert trace["selection_path"] == "rank_1"
assert trace.get("provisional") is None
def test_allow_provisional_no_op_when_no_v4_section(patch_selector_deps):
"""IMP-30 u1 — when no V4 section is resolved (no rank-1 judgment to
synthesize from), ``allow_provisional=True`` MUST still return
``(None, trace)``. u3/u4 handle this case with a placeholder zone or
empty-shell terminal slide.
"""
v4 = {"mdx_sections": {}} # no section at all
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n",
allow_provisional=True,
)
assert match is None
assert trace["fallback_reason"] == "no_v4_section"
def test_allow_provisional_no_op_when_empty_judgments(patch_selector_deps):
"""IMP-30 u1 — when the V4 section exists but ``judgments_full32`` is
empty, ``allow_provisional=True`` MUST still return ``(None, trace)``.
No synthetic rank-1 can be fabricated from nothing.
"""
v4 = {"mdx_sections": {"S1": {"judgments_full32": []}}}
match, trace = lookup_v4_match_with_fallback(
v4, "S1", raw_content="- a\n- b\n- c\n",
allow_provisional=True,
)
assert match is None
assert trace["fallback_reason"] == "empty_v4_judgments"