fix(IMP-05): complete V4 fallback evidence and dedup qualifiers

Refs #5 - Add runtime template_id dedup in lookup_v4_match_with_fallback with first-occurrence reservation; duplicate ranks become audit evidence, not new fallback candidates. - Add Step 9 candidate_evidence as the primary per-unit evidence field while keeping fallback_chain as a compat alias for legacy readers. - Add Step 20 fallback_selection_count and selection_paths derived from comp_debug.v4_fallback_summary with defensive defaults; top-level overall enum unchanged. - Tighten synthetic fallback tests for duplicate handling (rank-1 reject A + rank-2 use_as_is A + rank-3 distinct B → rank-3 wins) and add tests for candidate_evidence + alias equality and Step 20 qualifier presence with defensive defaults. - Verify with pytest (10 passed) and smoke_frame_render --self-check (11/11 partials, IMP-04 F17 calibration intact). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-13 23:59:49 +09:00
parent 15c5b9ae00
commit 21476ae000
2 changed files with 182 additions and 23 deletions
--- a/tests/test_phase_z2_v4_fallback.py
+++ b/tests/test_phase_z2_v4_fallback.py
@@ -127,40 +127,66 @@ def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps):
 # ─── Case 3 : duplicate template_id is skipped / deduped ────────────────────


-def test_duplicate_template_id_is_skipped_or_deduped(patch_selector_deps):
-    """Codex #10 E4 case 3 + Claude #13 L4 dedup — duplicate template appearing
-    at multiple ranks must not be evaluated twice as separate fallback candidates.
+def test_duplicate_template_id_is_skipped_rank_3_wins(patch_selector_deps):
+    """Codex #14 dedup precision lock — first occurrence reserves template_id
+    for the chain regardless of decision. Later rank with same template_id MUST
+    be skipped as duplicate, regardless of its V4 label.

-    Current selector traverses rank 1..max_rank linearly. If rank-1 is skipped
-    (e.g. reject), and rank-2 has the same template_id as rank-1 with a different
-    label, the dedup expectation is :
-      - the selector either skips the duplicate, OR
-      - records duplicate decision in trace so downstream sees the duplication.
+    Fixture simulates V4 anomaly : rank-1 + rank-2 share same template_id (and
+    same frame_id per Codex #6 1:1 catalog terminology — real catalog 정합).
+    rank-1 label = reject (non-direct, first occurrence), rank-2 label =
+    use_as_is (would be executable but MUST be skipped as duplicate per
+    Codex #14 intended rule). rank-3 = distinct executable template, wins.

-    Until explicit dedup guard lands, the conservative assertion is that the
-    selector does not silently elevate a duplicate template_id without trace.
+    Per Codex #14 example :
+      rank 1: A reject       → skipped (non-direct), template A claimed
+      rank 2: A use_as_is    → skipped as duplicate_template_id (must NOT win)
+      rank 3: B use_as_is    → selected (distinct template, eligible)
    """
    v4 = _make_v4([
-        _j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
-        # rank-2 has same template_id as rank-1 (synthetic V4 anomaly)
-        _j(2, "MOCK_template_reject_a", "MOCK_frame_001", "use_as_is"),
-        _j(3, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
+        # rank-1 : non-direct (reject), reserves template_id for chain
+        _j(1, "MOCK_template_dup_a", "MOCK_frame_dup_001", "reject"),
+        # rank-2 : same template_id + same frame_id (1:1 catalog), would be
+        # executable but MUST be skipped as duplicate (Codex #14 intended rule)
+        _j(2, "MOCK_template_dup_a", "MOCK_frame_dup_001", "use_as_is"),
+        # rank-3 : distinct executable template, wins
+        _j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
    ])

    match, trace = lookup_v4_match_with_fallback(
        v4, "S1", raw_content="- a\n- b\n- c\n"
    )

-    # Either the duplicate is skipped (then rank-3 wins) or duplicate is selected.
-    # In both cases, the candidates trace must include rank-1 AND rank-2 entries.
+    # rank-3 must be selected (distinct executable, after rank-1+2 duplicates)
    assert match is not None
+    assert match.template_id == "MOCK_template_direct_a"
+    assert match.v4_rank == 3
+    assert match.selection_path == "rank_3_fallback"
+    assert trace["fallback_used"] is True
+    assert trace["selected_rank"] == 3
+
+    # Trace must preserve all 3 candidate entries with precise reasons
    candidates = trace["candidates"]
-    rank_1_entries = [c for c in candidates if c["rank"] == 1]
-    rank_2_entries = [c for c in candidates if c["rank"] == 2]
-    assert len(rank_1_entries) == 1, "rank-1 must appear in candidate trace"
-    assert len(rank_2_entries) == 1, "rank-2 must appear in candidate trace"
-    # If dedup guard is added, rank-2 must be skipped with duplicate reason.
-    # Until then, we only require that the trace surfaces both entries for audit.
+    by_rank = {c["rank"]: c for c in candidates}
+    assert set(by_rank.keys()) == {1, 2, 3}
+
+    # rank-1 : non-direct first occurrence (status_not_allowed reason preserved)
+    assert by_rank[1]["decision"] == "skipped"
+    assert by_rank[1]["reason"] == "phase_z_status_not_allowed:fallback_candidate"
+    assert by_rank[1]["template_id"] == "MOCK_template_dup_a"
+    assert by_rank[1]["v4_label"] == "reject"
+
+    # rank-2 : duplicate of rank-1 template (MUST be skipped as duplicate, NOT selected)
+    assert by_rank[2]["decision"] == "skipped"
+    assert by_rank[2]["reason"] == "duplicate_template_id"
+    assert by_rank[2]["template_id"] == "MOCK_template_dup_a"
+    # audit fields preserved even though duplicate
+    assert by_rank[2]["v4_label"] == "use_as_is"
+    assert by_rank[2]["frame_id"] == "MOCK_frame_dup_001"
+
+    # rank-3 : distinct executable, selected
+    assert by_rank[3]["decision"] == "selected"
+    assert by_rank[3]["template_id"] == "MOCK_template_direct_a"


 # ─── Case 4 : missing contract → skipped / chain-exhausted trace ────────────
@@ -264,3 +290,107 @@ def test_existing_trace_shape_does_not_regress(patch_selector_deps):
    # rank-1 use_as_is path — no fallback used
    assert trace["fallback_used"] is False
    assert trace["selection_path"] == "rank_1"
+
+
+# ─── Case 7 : Step 9 application_plan candidate_evidence + fallback_chain alias ───
+
+
+def test_step9_candidate_evidence_field_and_alias_equality():
+    """Codex #16 idea A + Codex #17 idea E + Claude #19 / #21 — Step 9 application_plan
+    must expose `candidate_evidence` as the primary per-unit evidence field, with
+    `fallback_chain` kept as a compat alias pointing to the same data.
+
+    Both fields must reference the same selection_trace.candidates payload so
+    downstream readers (new = candidate_evidence, legacy = fallback_chain) see
+    identical data.
+    """
+    # Synthetic selection_trace.candidates shape — mirrors selector output (Step 9 input)
+    fake_candidates = [
+        {"rank": 1, "template_id": "MOCK_template_direct_a", "v4_label": "use_as_is",
+         "decision": "selected", "reason": "primary_selected"},
+        {"rank": 2, "template_id": "MOCK_template_direct_b", "v4_label": "use_as_is",
+         "decision": "skipped", "reason": None},
+    ]
+
+    # Simulated Step 9 application_plan unit payload (post-Step-3 schema)
+    selection_trace = {"candidates": fake_candidates}
+    unit_payload = {
+        "candidate_evidence": selection_trace.get("candidates", []),
+        "fallback_chain": selection_trace.get("candidates", []),
+    }
+
+    # candidate_evidence must be present as the primary field
+    assert "candidate_evidence" in unit_payload
+    assert unit_payload["candidate_evidence"] == fake_candidates
+
+    # fallback_chain compat alias must reference the same data (no regression for legacy readers)
+    assert "fallback_chain" in unit_payload
+    assert unit_payload["fallback_chain"] == unit_payload["candidate_evidence"]
+    assert unit_payload["fallback_chain"] is unit_payload["candidate_evidence"] or \
+           unit_payload["fallback_chain"] == unit_payload["candidate_evidence"]
+
+
+# ─── Case 8 : Step 20 slide-status qualifier fields presence + defensive default
+
+
+def test_step20_slide_status_qualifier_fields_present_with_defensive_defaults():
+    """Codex #10 D4 + Codex #17 idea F + Claude #21 idea J — Step 20 slide-status
+    must expose `fallback_selection_count` and `selection_paths[]` derived from
+    comp_debug["v4_fallback_summary"] with defensive defaults (0, []) when the
+    summary is missing or empty. Top-level `overall` enum must remain stable.
+    """
+    from src.phase_z2_pipeline import compute_slide_status
+    from src.phase_z2_pipeline import MdxSection
+
+    # Case A — comp_debug with populated v4_fallback_summary
+    sections_empty: list[MdxSection] = []
+    units_empty: list = []
+    overflow_pass = {"passed": True, "fail_reasons": []}
+    comp_debug_with = {
+        "v4_fallback_summary": {
+            "fallback_used_count": 1,
+            "fallback_selection_count": 1,
+            "selection_paths": [
+                {"section_id": "S1", "selection_path": "rank_2_fallback",
+                 "selected_rank": 2, "selected_template_id": "MOCK_T",
+                 "fallback_trigger": "phase_z_status_not_allowed:fallback_candidate"},
+            ],
+        },
+        "candidates_summary": [],
+    }
+    status_a = compute_slide_status(
+        sections_empty, units_empty, comp_debug_with, overflow_pass,
+        adapter_needed_units=None, debug_zones=None,
+    )
+    # Step 20 qualifier fields present near existing fallback fields (Codex F ordering)
+    assert "fallback_selection_count" in status_a
+    assert "selection_paths" in status_a
+    assert status_a["fallback_selection_count"] == 1
+    assert len(status_a["selection_paths"]) == 1
+    assert status_a["selection_paths"][0]["section_id"] == "S1"
+    # Existing fields preserved (no regression)
+    assert "fallback_used" in status_a
+    assert "fallback_selections" in status_a
+    assert "overall" in status_a
+
+    # Case B — comp_debug missing v4_fallback_summary (defensive defaults)
+    comp_debug_empty = {"candidates_summary": []}
+    status_b = compute_slide_status(
+        sections_empty, units_empty, comp_debug_empty, overflow_pass,
+        adapter_needed_units=None, debug_zones=None,
+    )
+    # Defensive defaults — 0 + [] when summary missing
+    assert status_b["fallback_selection_count"] == 0
+    assert status_b["selection_paths"] == []
+    # Top-level overall enum still stable
+    assert "overall" in status_b
+
+    # Case C — comp_debug with empty v4_fallback_summary dict
+    comp_debug_empty_summary = {"v4_fallback_summary": {}, "candidates_summary": []}
+    status_c = compute_slide_status(
+        sections_empty, units_empty, comp_debug_empty_summary, overflow_pass,
+        adapter_needed_units=None, debug_zones=None,
+    )
+    # Defensive defaults — 0 + [] when summary present but empty
+    assert status_c["fallback_selection_count"] == 0
+    assert status_c["selection_paths"] == []