EMPTY_SHELL_NO_CONTENT overall enum + 3-marker detection (frame_template_id="__empty__" OR label="empty_shell" OR merge_type="empty_shell") routes empty-placeholder-only slides to BLOCKED CLI exit 1 + red final_status.html, blocking fake PASS reports (feedback_artifact_status_naming). Coverage accounting split: legacy covered_section_ids preserved + new content_rendered_section_ids / empty_shell_section_ids. mdx05 Case B (zero V4 evidence) honestly classified instead of synthesizing fabricated rank-1 reject frames. IMP-30 u6/u7 stale empty-shell PASS assertions inverted (29 tests). IMP-85 smoke parametrize: mdx05 removed from exit-0 list + dedicated BLOCKED exit test added (4 tests). No production behavior change for chain_exhausted Case A; no AI route activation; no mdx-id hardcoding. 53 targeted + 76 adjacent Phase Z tests PASS. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1566 lines
63 KiB
Python
1566 lines
63 KiB
Python
"""IMP-30 first-render invariant tests (per-unit slice).
|
|
|
|
This file is the shared regression home for IMP-30 units u2~u7. Each
|
|
implementation unit adds its own focused tests; u7 (regression coverage)
|
|
will broaden the surface (synthetic V4 fixtures for chain_exhausted
|
|
provisional, zero-V4 empty-shell, normal-path unchanged).
|
|
|
|
u3 scope (this slice) — select_composition_units last-resort provisional
|
|
fill for uncovered sections + _candidate_state "selected_provisional":
|
|
1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard).
|
|
2. opt-in fills uncovered sections with provisional candidates whose
|
|
phase_z_status would otherwise be filter_status.
|
|
3. opt-in never displaces normal greedy selections.
|
|
4. opt-in respects coverage non-overlap (no section selected twice).
|
|
5. plan_composition._candidate_state returns "selected_provisional"
|
|
for fills and "selected" for normal greedy picks.
|
|
|
|
Synthetic naming convention (Codex #10 E1):
|
|
- MOCK_ prefix mandatory
|
|
- _a / _b suffixes = enumeration only (NOT ordering / priority)
|
|
- rank/order expressed by V4 rank field, NEVER ID suffix
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from dataclasses import dataclass, field
|
|
from typing import Optional
|
|
|
|
from src.phase_z2_composition import (
|
|
CompositionUnit,
|
|
plan_composition,
|
|
select_composition_units,
|
|
)
|
|
|
|
|
|
# ─── Synthetic match shape (duck-typed V4Match-like) ───────────────────
|
|
|
|
@dataclass
|
|
class _StubV4Match:
|
|
"""Duck-typed V4Match surface used by collect_candidates / score path.
|
|
|
|
Mirrors src.phase_z2_pipeline.V4Match fields touched by composition:
|
|
template_id / frame_id / frame_number / confidence / label / v4_rank /
|
|
selection_path / fallback_reason / provisional. Composition module
|
|
intentionally does not import V4Match (circular dep avoidance), so a
|
|
plain stub object with the same attributes is the contract.
|
|
"""
|
|
template_id: str
|
|
frame_id: str
|
|
frame_number: int
|
|
confidence: float
|
|
label: str
|
|
v4_rank: Optional[int] = None
|
|
selection_path: str = "rank_1"
|
|
fallback_reason: Optional[str] = None
|
|
provisional: bool = False
|
|
|
|
|
|
@dataclass
|
|
class _StubSection:
|
|
"""Minimal section surface used by collect_candidates (section_id /
|
|
raw_content / title). Matches MdxSection's attribute names without
|
|
importing pipeline (keeps test isolated to composition module)."""
|
|
section_id: str
|
|
title: str = ""
|
|
raw_content: str = ""
|
|
|
|
|
|
# Phase Z status mapping fixture — only the keys exercised here are listed.
|
|
# Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this
|
|
# stub deliberately mirrors only what the tests touch.
|
|
_LABEL_TO_STATUS = {
|
|
"use_as_is": "matched_zone",
|
|
"light_edit": "adapt_matched_zone",
|
|
"restructure": "extract_matched_zone",
|
|
"reject": "fallback_candidate",
|
|
}
|
|
|
|
_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}
|
|
|
|
|
|
# ─── Helpers ────────────────────────────────────────────────────────────
|
|
|
|
def _make_lookup(matches_by_section: dict[str, _StubV4Match]):
|
|
"""Return v4_lookup_fn (section_id -> _StubV4Match | None)."""
|
|
def _fn(section_id: str):
|
|
return matches_by_section.get(section_id)
|
|
return _fn
|
|
|
|
|
|
def _make_candidates_lookup_empty():
|
|
"""v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here)."""
|
|
def _fn(section_id: str):
|
|
return []
|
|
return _fn
|
|
|
|
|
|
# ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ─────────
|
|
|
|
def test_u3_default_off_preserves_imp05_behavior():
|
|
"""IMP-05 regression guard. With allow_provisional_fill=False (default),
|
|
select_composition_units must yield the same units as pre-u3 even when
|
|
provisional candidates exist in the pool.
|
|
|
|
Setup:
|
|
- S1: use_as_is + provisional=False (normal selection)
|
|
- S2: restructure + provisional=True (would be fill-eligible)
|
|
Expected (default-off):
|
|
- units = [S1 unit] only. S2 stays uncovered.
|
|
"""
|
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
|
matches = {
|
|
"S1": _StubV4Match(
|
|
template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001", frame_number=1,
|
|
confidence=0.9, label="use_as_is", v4_rank=1,
|
|
),
|
|
"S2": _StubV4Match(
|
|
template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002", frame_number=2,
|
|
confidence=0.65, label="restructure", v4_rank=1,
|
|
selection_path="provisional_rank_1",
|
|
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
|
|
provisional=True,
|
|
),
|
|
}
|
|
units, preset, debug = plan_composition(
|
|
sections,
|
|
_make_lookup(matches),
|
|
_LABEL_TO_STATUS,
|
|
_ALLOWED_STATUSES,
|
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
|
# allow_provisional_fill omitted → default False
|
|
)
|
|
assert len(units) == 1
|
|
assert units[0].source_section_ids == ["S1"]
|
|
assert units[0].provisional is False
|
|
assert preset == "single"
|
|
# S2 candidate must still appear in debug summary as filtered_status
|
|
summary_by_section = {
|
|
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
|
|
}
|
|
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
|
|
|
|
|
|
# ─── u3 case 2 : opt-in fills uncovered sections with provisional ──────
|
|
|
|
def test_u3_opt_in_fills_uncovered_with_provisional():
|
|
"""IMP-30 u3 — opt-in path.
|
|
|
|
Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but
|
|
with allow_provisional_fill=True. S2 must be filled as
|
|
selected_provisional unit; greedy S1 selection unchanged.
|
|
"""
|
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
|
matches = {
|
|
"S1": _StubV4Match(
|
|
template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001", frame_number=1,
|
|
confidence=0.9, label="use_as_is", v4_rank=1,
|
|
),
|
|
"S2": _StubV4Match(
|
|
template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002", frame_number=2,
|
|
confidence=0.65, label="restructure", v4_rank=1,
|
|
selection_path="provisional_rank_1",
|
|
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
|
|
provisional=True,
|
|
),
|
|
}
|
|
units, preset, debug = plan_composition(
|
|
sections,
|
|
_make_lookup(matches),
|
|
_LABEL_TO_STATUS,
|
|
_ALLOWED_STATUSES,
|
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
|
allow_provisional_fill=True,
|
|
)
|
|
# Both sections must be covered now
|
|
section_ids = {sid for u in units for sid in u.source_section_ids}
|
|
assert section_ids == {"S1", "S2"}
|
|
# Identify which unit covers which section
|
|
by_section = {tuple(u.source_section_ids): u for u in units}
|
|
s1_unit = by_section[("S1",)]
|
|
s2_unit = by_section[("S2",)]
|
|
# Normal greedy pick — provisional flag stays False
|
|
assert s1_unit.provisional is False
|
|
# Provisional fill — provisional flag carried from V4Match (u1) via u2
|
|
assert s2_unit.provisional is True
|
|
assert s2_unit.label == "restructure"
|
|
# Layout preset reflects 2-unit count
|
|
assert preset == "horizontal-2"
|
|
|
|
|
|
# ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─
|
|
|
|
def test_u3_candidate_state_marks_selected_provisional():
|
|
"""plan_composition._candidate_state must return:
|
|
- "selected" for normal greedy picks
|
|
- "selected_provisional" for last-resort fills
|
|
"""
|
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
|
matches = {
|
|
"S1": _StubV4Match(
|
|
template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001", frame_number=1,
|
|
confidence=0.9, label="use_as_is", v4_rank=1,
|
|
),
|
|
"S2": _StubV4Match(
|
|
template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002", frame_number=2,
|
|
confidence=0.65, label="restructure", v4_rank=1,
|
|
selection_path="provisional_rank_1",
|
|
provisional=True,
|
|
),
|
|
}
|
|
units, preset, debug = plan_composition(
|
|
sections,
|
|
_make_lookup(matches),
|
|
_LABEL_TO_STATUS,
|
|
_ALLOWED_STATUSES,
|
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
|
allow_provisional_fill=True,
|
|
)
|
|
summary_by_section = {
|
|
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
|
|
}
|
|
assert summary_by_section[("S1",)]["selection_state"] == "selected"
|
|
assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional"
|
|
|
|
|
|
# ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ─────
|
|
|
|
def test_u3_opt_in_respects_coverage_non_overlap():
|
|
"""Provisional fill must not pick a candidate whose source_section_ids
|
|
overlap with already-covered sections.
|
|
|
|
Setup:
|
|
- S1 use_as_is (normal selection)
|
|
- S2 restructure provisional (eligible for fill)
|
|
- parent_merged_inferred over [S1, S2] with provisional=True
|
|
(synthetic — would normally not exist, but stresses non-overlap)
|
|
|
|
With allow_provisional_fill=True, the [S1,S2] provisional merge must
|
|
NOT be selected (S1 already covered by normal pick).
|
|
"""
|
|
# 2 children with derive_parent_id → "S" parent. But derive_parent_id
|
|
# only triggers on "<base>-sub-<n>" or "<id>-<suffix>.<sub>". Use the
|
|
# canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible).
|
|
sections = [
|
|
_StubSection("P-sub-1", raw_content="alpha"),
|
|
_StubSection("P-sub-2", raw_content="beta"),
|
|
]
|
|
matches = {
|
|
"P-sub-1": _StubV4Match(
|
|
template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001", frame_number=1,
|
|
confidence=0.9, label="use_as_is", v4_rank=1,
|
|
),
|
|
"P-sub-2": _StubV4Match(
|
|
template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002", frame_number=2,
|
|
confidence=0.65, label="restructure", v4_rank=1,
|
|
provisional=True,
|
|
),
|
|
# No parent V4 → branch 3 may synthesize parent_merged_inferred
|
|
# if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1
|
|
# (higher confidence) → rep_match.provisional=False, so the inferred
|
|
# merge is NOT provisional. The normal greedy pass should prefer
|
|
# the single P-sub-1 (same score, but inferred merge has coverage
|
|
# tiebreak win). Test asserts: covered set is exact, no double-fill.
|
|
}
|
|
units, preset, debug = plan_composition(
|
|
sections,
|
|
_make_lookup(matches),
|
|
_LABEL_TO_STATUS,
|
|
_ALLOWED_STATUSES,
|
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
|
allow_provisional_fill=True,
|
|
)
|
|
covered = []
|
|
for u in units:
|
|
covered.extend(u.source_section_ids)
|
|
# No section appears twice — non-overlap invariant
|
|
assert len(covered) == len(set(covered))
|
|
# Both sections covered exactly once
|
|
assert set(covered) == {"P-sub-1", "P-sub-2"}
|
|
|
|
|
|
# ─── u3 case 5 : opt-in with no provisional candidates is a no-op ──────
|
|
|
|
def test_u3_opt_in_noop_when_no_provisional_candidates():
|
|
"""allow_provisional_fill=True with zero provisional candidates must
|
|
behave identically to default-off. No fill is forced; uncovered sections
|
|
simply remain uncovered (u4 owns the zero-unit empty-shell terminal).
|
|
"""
|
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
|
matches = {
|
|
"S1": _StubV4Match(
|
|
template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001", frame_number=1,
|
|
confidence=0.9, label="use_as_is", v4_rank=1,
|
|
),
|
|
# S2: restructure but NOT provisional (e.g., pipeline did not opt
|
|
# into u1 allow_provisional, or section had real rank-1 restructure)
|
|
"S2": _StubV4Match(
|
|
template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002", frame_number=2,
|
|
confidence=0.65, label="restructure", v4_rank=1,
|
|
provisional=False,
|
|
),
|
|
}
|
|
units, preset, debug = plan_composition(
|
|
sections,
|
|
_make_lookup(matches),
|
|
_LABEL_TO_STATUS,
|
|
_ALLOWED_STATUSES,
|
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
|
allow_provisional_fill=True,
|
|
)
|
|
assert len(units) == 1
|
|
assert units[0].source_section_ids == ["S1"]
|
|
assert preset == "single"
|
|
# S2 remains filter_status — not provisional, so u3 fill ignores it
|
|
summary_by_section = {
|
|
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
|
|
}
|
|
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
|
|
|
|
|
|
# ─── u3 case 6 : select_composition_units direct invocation parity ─────
|
|
|
|
def test_u3_select_composition_units_default_off_signature():
|
|
"""Direct invocation without keyword-only u3 args must remain valid
|
|
(backward-compat for existing callers that import the function directly).
|
|
"""
|
|
# Build a minimal CompositionUnit by hand — bypass collect_candidates.
|
|
c1 = CompositionUnit(
|
|
source_section_ids=["S1"],
|
|
merge_type="single",
|
|
frame_template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001",
|
|
frame_number=1,
|
|
confidence=0.9,
|
|
label="use_as_is",
|
|
phase_z_status="matched_zone",
|
|
raw_content="alpha",
|
|
title="S1",
|
|
)
|
|
units = select_composition_units([c1], _ALLOWED_STATUSES)
|
|
assert len(units) == 1
|
|
assert units[0].source_section_ids == ["S1"]
|
|
|
|
|
|
def test_u3_select_composition_units_opt_in_direct():
|
|
"""Direct invocation with u3 opt-in must fill uncovered section from
|
|
provisional candidate pool, leaving greedy pick untouched.
|
|
"""
|
|
c_greedy = CompositionUnit(
|
|
source_section_ids=["S1"],
|
|
merge_type="single",
|
|
frame_template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001",
|
|
frame_number=1,
|
|
confidence=0.9,
|
|
label="use_as_is",
|
|
phase_z_status="matched_zone",
|
|
raw_content="alpha",
|
|
title="S1",
|
|
)
|
|
c_provisional = CompositionUnit(
|
|
source_section_ids=["S2"],
|
|
merge_type="single",
|
|
frame_template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002",
|
|
frame_number=2,
|
|
confidence=0.65,
|
|
label="restructure",
|
|
phase_z_status="extract_matched_zone",
|
|
raw_content="beta",
|
|
title="S2",
|
|
provisional=True,
|
|
)
|
|
units = select_composition_units(
|
|
[c_greedy, c_provisional],
|
|
_ALLOWED_STATUSES,
|
|
all_section_ids=["S1", "S2"],
|
|
allow_provisional_fill=True,
|
|
)
|
|
assert len(units) == 2
|
|
by_section = {tuple(u.source_section_ids): u for u in units}
|
|
assert by_section[("S1",)].provisional is False
|
|
assert by_section[("S2",)].provisional is True
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
# u4 — pipeline abort guard empty-shell synthesis
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
#
|
|
# u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort
|
|
# guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3)
|
|
# then terminal empty-shell (Phase B). The shell is a single CompositionUnit
|
|
# with frame_template_id="__empty__" and preset="single"; the per-unit
|
|
# for-loop's __empty__ branch bypasses mapper/contract and emits a
|
|
# placeholder zones_data/debug_zones record so final.html still writes.
|
|
#
|
|
# These tests verify the composition-side invariants that u4 relies on:
|
|
# - CompositionUnit can be constructed in the empty-shell shape.
|
|
# - The shell shape carries the data needed for u5 (provisional flag) /
|
|
# u6 (status qualifier) / render_slide __empty__ branch (template_id).
|
|
# The pipeline-level integration (provisional retry / empty-shell synthesis
|
|
# at the abort guard, plus the per-unit __empty__ bypass) is covered by
|
|
# u7 (regression coverage) with synthetic V4 fixtures.
|
|
|
|
|
|
def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis():
|
|
"""The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort
|
|
guard must carry the field shape downstream consumers (per-unit
|
|
__empty__ branch, compute_slide_status, slide_base template) rely on.
|
|
|
|
Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block):
|
|
- frame_template_id == "__empty__" → render_slide short-circuits
|
|
partial_html to "" (existing __empty__ branch at line 2106).
|
|
- phase_z_status == "empty_shell" → Step 20 distinguishes from
|
|
matched_zone / adapt_matched_zone / extract_matched_zone /
|
|
fallback_candidate (u6 surfaces this as additive qualifier).
|
|
- provisional == True → u5 zone--provisional class + needs-adaptation
|
|
badge (template-side wiring).
|
|
- source_section_ids covers all aligned section ids → compute_slide_status
|
|
treats every section as "covered by the shell" (u6 marks the count
|
|
of provisional_first_render_units).
|
|
- selection_path == "empty_shell" / fallback_reason set → audit trace
|
|
survives in step06_composition_plan.json.
|
|
"""
|
|
aligned_section_ids = ["S1", "S2", "S3"]
|
|
raw_contents = ["alpha", "beta", "gamma"]
|
|
titles = ["First", "Second", "Third"]
|
|
|
|
shell = CompositionUnit(
|
|
source_section_ids=list(aligned_section_ids),
|
|
merge_type="empty_shell",
|
|
frame_template_id="__empty__",
|
|
frame_id="__empty__",
|
|
frame_number=0,
|
|
confidence=0.0,
|
|
label="empty_shell",
|
|
phase_z_status="empty_shell",
|
|
raw_content="\n\n".join(raw_contents),
|
|
title=" / ".join(titles),
|
|
v4_rank=None,
|
|
selection_path="empty_shell",
|
|
fallback_reason="no_v4_rank_1_for_any_section",
|
|
score=0.0,
|
|
rationale={
|
|
"imp30_u4": "terminal_first_render_empty_shell",
|
|
"reason": "no_rank_1_V4_evidence_in_any_section",
|
|
"aligned_section_ids": aligned_section_ids,
|
|
},
|
|
provisional=True,
|
|
)
|
|
|
|
assert shell.frame_template_id == "__empty__"
|
|
assert shell.frame_id == "__empty__"
|
|
assert shell.label == "empty_shell"
|
|
assert shell.phase_z_status == "empty_shell"
|
|
assert shell.provisional is True
|
|
assert shell.selection_path == "empty_shell"
|
|
assert shell.fallback_reason == "no_v4_rank_1_for_any_section"
|
|
assert shell.source_section_ids == aligned_section_ids
|
|
assert shell.v4_rank is None
|
|
assert shell.confidence == 0.0
|
|
assert shell.score == 0.0
|
|
# MDX content preserved (no rewrite) — full raw content kept in the unit
|
|
# even though no V4 mapping is applied. Adaptation deferred to IMP-31.
|
|
assert shell.raw_content == "alpha\n\nbeta\n\ngamma"
|
|
# Rationale carries the audit trail consumed by Step 6 artifact + u6.
|
|
assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell"
|
|
assert shell.rationale["aligned_section_ids"] == aligned_section_ids
|
|
|
|
|
|
def test_u4_empty_shell_unit_default_provisional_is_false():
|
|
"""Smoke test — provisional flag is opt-in. A plain CompositionUnit
|
|
(no explicit provisional=True) does NOT mark itself as empty-shell.
|
|
Guards against accidental positive on normal units when u5 / u6 read
|
|
unit.provisional.
|
|
"""
|
|
normal = CompositionUnit(
|
|
source_section_ids=["S1"],
|
|
merge_type="single",
|
|
frame_template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001",
|
|
frame_number=1,
|
|
confidence=0.9,
|
|
label="use_as_is",
|
|
phase_z_status="matched_zone",
|
|
raw_content="alpha",
|
|
title="S1",
|
|
)
|
|
assert normal.provisional is False
|
|
assert normal.frame_template_id != "__empty__"
|
|
|
|
|
|
def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed():
|
|
"""The empty-shell unit's phase_z_status ('empty_shell') must NOT be
|
|
inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over
|
|
units filtered by allowed_statuses would treat the shell as a normal
|
|
matched zone — defeating the "needs adaptation" signal.
|
|
|
|
This test pins the contract at the composition-test level so a status
|
|
rename in the pipeline cannot silently leak the shell into normal flows.
|
|
"""
|
|
# _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES
|
|
# ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct
|
|
# status so downstream filters reject it.
|
|
assert "empty_shell" not in _ALLOWED_STATUSES
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
# u5 — zones_data carries provisional flag; slide_base.html zone div adds
|
|
# zone--provisional class + inline needs-adaptation badge
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
#
|
|
# u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in
|
|
# u1, or directly by u4 empty-shell synthesis) through the zones_data payload
|
|
# into the slide_base.html template. Visual contract:
|
|
# - zones_data[i]['provisional'] = bool (default False; True only for IMP-30
|
|
# opt-in synthesized units).
|
|
# - slide_base.html zone div gets `zone--provisional` class when True; an
|
|
# inline `<span class="zone__needs-adaptation-badge">needs adaptation</span>`
|
|
# element is rendered inside the zone (top-right corner via absolute pos).
|
|
# - data-provisional="1" attribute set for downstream selectors / overflow
|
|
# checker / e2e tooling.
|
|
#
|
|
# The composition / pipeline-level handoff is exercised by u3 / u4 already.
|
|
# u5 tests focus on:
|
|
# - template-rendering output: class + badge HTML correctly emitted ONLY when
|
|
# zones[i].provisional is truthy. (default-off path unchanged.)
|
|
# - byte-equivalence: non-provisional zones render the same div shape as
|
|
# pre-u5 (just no zone--provisional class / no badge element).
|
|
|
|
import re
|
|
from pathlib import Path
|
|
|
|
from jinja2 import Environment, FileSystemLoader, select_autoescape
|
|
|
|
|
|
# ─── u5 helpers ────────────────────────────────────────────────────────
|
|
|
|
def _render_slide_base(zones: list[dict], *, layout_preset: str = "single",
|
|
layout_css: dict | None = None) -> str:
|
|
"""Render templates/phase_z2/slide_base.html directly via Jinja2 with a
|
|
minimal zones list. Bypasses render_slide() so u5 can exercise the
|
|
template-only contract without spinning up the full pipeline (no mapper,
|
|
no contracts, no token CSS loader). slot_payload / partial_html are
|
|
stubbed to fixed strings so the test focuses on zone div attributes."""
|
|
template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2"
|
|
env = Environment(
|
|
loader=FileSystemLoader(str(template_dir)),
|
|
autoescape=select_autoescape(["html"]),
|
|
)
|
|
if layout_css is None:
|
|
layout_css = {
|
|
"cols": "1fr",
|
|
"rows": "1fr",
|
|
"areas": '"single"',
|
|
}
|
|
# Each zone needs a partial_html (render_slide normally populates this).
|
|
# Use a stable placeholder per zone so the assertion can target zone-level
|
|
# attributes without coupling to frame template internals.
|
|
for z in zones:
|
|
z.setdefault("partial_html", "<div class=\"_stub_partial\">stub</div>")
|
|
base = env.get_template("slide_base.html")
|
|
return base.render(
|
|
slide_title="IMP-30 u5 test slide",
|
|
slide_footer=None,
|
|
zones=zones,
|
|
layout_preset=layout_preset,
|
|
layout_css=layout_css,
|
|
gap_px=12,
|
|
token_css="", # empty token CSS — not under test here
|
|
embedded_mode="standalone",
|
|
)
|
|
|
|
|
|
def _zone_div_for_position(html: str, position: str) -> str:
|
|
"""Return the opening tag + immediate inner content (up to but not
|
|
including partial_html) for the zone div at a given `data-zone-position`
|
|
value. Tight enough for class/attribute assertions, lenient enough not
|
|
to depend on partial_html internals."""
|
|
pattern = re.compile(
|
|
r'<div class="zone[^"]*"\s+data-zone-position="' + re.escape(position) + r'"[^>]*>'
|
|
r'(?:\s*<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>)?',
|
|
re.DOTALL,
|
|
)
|
|
match = pattern.search(html)
|
|
if not match:
|
|
return ""
|
|
return match.group(0)
|
|
|
|
|
|
def _all_zone_div_openings(html: str) -> list[str]:
|
|
"""Return every zone-div opening tag in the layout body. Used to scope
|
|
class / attribute assertions away from the CSS <style> block (which
|
|
contains `.zone--provisional` / `.zone__needs-adaptation-badge` as
|
|
selectors — must not be mistaken for zone-div class emissions)."""
|
|
return re.findall(
|
|
r'<div class="zone[^"]*"[^>]*data-zone-position="[^"]*"[^>]*>',
|
|
html,
|
|
)
|
|
|
|
|
|
def _all_badge_spans(html: str) -> list[str]:
|
|
"""Return every actual badge `<span>` element in the rendered body
|
|
(NOT the `.zone__needs-adaptation-badge` selector in the <style> block).
|
|
Used to count badge emission accurately."""
|
|
return re.findall(
|
|
r'<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>',
|
|
html,
|
|
)
|
|
|
|
|
|
# ─── u5 case 1 : non-provisional zone renders pre-u5 div shape ────────
|
|
|
|
|
|
def test_u5_non_provisional_zone_renders_without_class_or_badge():
|
|
"""Default-off path. zones[i].provisional=False (or absent) must render
|
|
the zone div as `<div class="zone" ...>` with no zone--provisional class
|
|
and no needs-adaptation badge — byte-equivalent to pre-u5.
|
|
|
|
Assertions are scoped to actual zone div emissions (not the CSS
|
|
selectors in the <style> block, which always contain the strings
|
|
`.zone--provisional` and `.zone__needs-adaptation-badge`)."""
|
|
zones = [
|
|
{
|
|
"position": "single",
|
|
"template_id": "MOCK_template_direct_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": False,
|
|
}
|
|
]
|
|
html = _render_slide_base(zones)
|
|
# Scope: zone div openings only.
|
|
zone_divs = _all_zone_div_openings(html)
|
|
assert len(zone_divs) == 1
|
|
assert "zone--provisional" not in zone_divs[0]
|
|
assert 'data-provisional="1"' not in zone_divs[0]
|
|
# No actual badge <span> element (CSS selector in style block excluded).
|
|
assert _all_badge_spans(html) == []
|
|
# Sanity: the zone div carries the canonical class.
|
|
assert 'class="zone"' in zone_divs[0]
|
|
|
|
|
|
def test_u5_zone_without_provisional_key_treated_as_non_provisional():
|
|
"""Belt-and-suspenders: a zones dict that omits the `provisional` key
|
|
entirely (Jinja2 truthy check on missing attr → falsy) must render the
|
|
same as provisional=False. Pre-u5 callers that haven't been updated
|
|
still produce valid output without crashing the template."""
|
|
zones = [
|
|
{
|
|
"position": "single",
|
|
"template_id": "MOCK_template_direct_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
# provisional key intentionally absent
|
|
}
|
|
]
|
|
html = _render_slide_base(zones)
|
|
zone_divs = _all_zone_div_openings(html)
|
|
assert len(zone_divs) == 1
|
|
assert "zone--provisional" not in zone_divs[0]
|
|
assert _all_badge_spans(html) == []
|
|
|
|
|
|
# ─── u5 case 2 : provisional zone renders class + badge + data attr ───
|
|
|
|
|
|
def test_u5_provisional_zone_renders_class_and_badge():
|
|
"""Opt-in path. zones[i].provisional=True must:
|
|
1. Append `zone--provisional` class to the zone div.
|
|
2. Set `data-provisional="1"` data attribute (for downstream selectors).
|
|
3. Render a `<span class="zone__needs-adaptation-badge">` element with
|
|
the literal text "needs adaptation" (aria-label included for a11y).
|
|
"""
|
|
zones = [
|
|
{
|
|
"position": "single",
|
|
"template_id": "MOCK_template_restructure_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": True,
|
|
}
|
|
]
|
|
html = _render_slide_base(zones)
|
|
# zone--provisional class must appear on the zone div for position=single.
|
|
assert "zone--provisional" in html
|
|
# data-provisional="1" attribute must be present.
|
|
assert 'data-provisional="1"' in html
|
|
# Badge element with the required label text.
|
|
assert 'class="zone__needs-adaptation-badge"' in html
|
|
assert "needs adaptation" in html
|
|
assert 'aria-label="needs user or AI adaptation"' in html
|
|
|
|
|
|
def test_u5_provisional_badge_appears_inside_provisional_zone_only():
|
|
"""Mixed-zone slide: one provisional zone + one normal zone. The badge
|
|
+ class must appear ONLY in the provisional zone, not bleed into the
|
|
normal one (CSS-level isolation should already prevent this, but the
|
|
template must not emit the badge for both)."""
|
|
zones = [
|
|
{
|
|
"position": "top",
|
|
"template_id": "MOCK_template_direct_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": False,
|
|
},
|
|
{
|
|
"position": "bottom",
|
|
"template_id": "MOCK_template_restructure_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": True,
|
|
},
|
|
]
|
|
layout_css = {
|
|
"cols": "1fr",
|
|
"rows": "1fr 1fr",
|
|
"areas": '"top" "bottom"',
|
|
}
|
|
html = _render_slide_base(
|
|
zones, layout_preset="vertical-2", layout_css=layout_css
|
|
)
|
|
# Exactly one badge span element should be present in the rendered body
|
|
# (CSS selector in <style> excluded by the helper).
|
|
assert len(_all_badge_spans(html)) == 1
|
|
# zone--provisional must appear on exactly one zone div (CSS selector
|
|
# in <style> excluded by the helper).
|
|
zone_divs = _all_zone_div_openings(html)
|
|
assert len(zone_divs) == 2
|
|
provisional_zone_divs = [d for d in zone_divs if "zone--provisional" in d]
|
|
assert len(provisional_zone_divs) == 1
|
|
# The provisional class must be associated with the bottom zone.
|
|
bottom_zone_open = _zone_div_for_position(html, "bottom")
|
|
assert "zone--provisional" in bottom_zone_open
|
|
assert "zone__needs-adaptation-badge" in bottom_zone_open
|
|
# The top zone must NOT carry the provisional class.
|
|
top_zone_open = _zone_div_for_position(html, "top")
|
|
assert "zone--provisional" not in top_zone_open
|
|
assert "zone__needs-adaptation-badge" not in top_zone_open
|
|
|
|
|
|
# ─── u5 case 3 : zones_data data shape contract ────────────────────────
|
|
|
|
|
|
def test_u5_zones_data_provisional_field_defaults_false_in_template():
|
|
"""Template-level fallback: even if a future zones_data builder forgets
|
|
to set provisional explicitly, the template's truthy check must not
|
|
falsely emit zone--provisional. Pin this so a template refactor cannot
|
|
silently invert the default."""
|
|
zones = [
|
|
{
|
|
"position": "single",
|
|
"template_id": "MOCK_template_direct_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": None, # explicit falsy but not False
|
|
}
|
|
]
|
|
html = _render_slide_base(zones)
|
|
zone_divs = _all_zone_div_openings(html)
|
|
assert len(zone_divs) == 1
|
|
assert "zone--provisional" not in zone_divs[0]
|
|
assert _all_badge_spans(html) == []
|
|
|
|
|
|
def test_u5_slide_base_css_carries_provisional_marker_styles():
|
|
"""The provisional visual contract (dashed outline + striped wash + badge)
|
|
is defined in slide_base.html <style>. Pin that the relevant CSS class
|
|
selectors exist in the rendered HTML so a refactor that removes them
|
|
breaks this test rather than silently rendering an unstyled badge.
|
|
|
|
This is a class-selector existence check; it does not validate the
|
|
specific color / dash pattern, which is a design decision intentionally
|
|
left malleable (e.g., palette swap for a different theme)."""
|
|
zones = [
|
|
{
|
|
"position": "single",
|
|
"template_id": "MOCK_template_restructure_a",
|
|
"slot_payload": {},
|
|
"content_weight": {"score": 1},
|
|
"min_height_px": 100,
|
|
"provisional": True,
|
|
}
|
|
]
|
|
html = _render_slide_base(zones)
|
|
# Style block must define .zone--provisional and the badge selector.
|
|
assert ".zone--provisional" in html
|
|
assert ".zone__needs-adaptation-badge" in html
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
# u6 — compute_slide_status additive qualifiers
|
|
# provisional_first_render_count + provisional_first_render_units
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
#
|
|
# u6 surfaces the IMP-30 first-render invariant in Step 20 slide_status.
|
|
# Contract :
|
|
# - Additive only. Top-level `overall` enum (PASS / RENDERED_WITH_VISUAL_REGRESSION /
|
|
# PARTIAL_COVERAGE / PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION) is NOT extended.
|
|
# Stage 1 Q3 lock + Codex #10 D4 (IMP-05) preservation.
|
|
# - `provisional_first_render_count` = int >= 0 — number of selected units with
|
|
# unit.provisional == True (set by u1 V4Match synthesis → u2 propagation,
|
|
# u3 last-resort fill, or u4 empty-shell synthesis).
|
|
# - `provisional_first_render_units` = list[dict] — per-unit entries mirroring
|
|
# the shape of `fallback_selections` / `adapter_needed_units` so downstream
|
|
# consumers can branch uniformly without re-deriving intent from labels.
|
|
# - Defensive `getattr` keeps the function safe when units come from legacy
|
|
# code paths that predate u2 (no .provisional attribute) — those units are
|
|
# treated as non-provisional.
|
|
|
|
from src.phase_z2_pipeline import MdxSection, compute_slide_status
|
|
|
|
|
|
def _mk_unit(*, section_ids: list[str], provisional: bool, **overrides):
|
|
"""Helper — build a real CompositionUnit for compute_slide_status tests.
|
|
Uses the production dataclass (not a stub) so the .provisional getattr
|
|
path is exercised end-to-end. Field defaults mirror what u1~u4 produce.
|
|
"""
|
|
base = dict(
|
|
source_section_ids=list(section_ids),
|
|
merge_type="single",
|
|
frame_template_id="MOCK_template_direct_a",
|
|
frame_id="MOCK_frame_001",
|
|
frame_number=1,
|
|
confidence=0.9,
|
|
label="use_as_is",
|
|
phase_z_status="matched_zone",
|
|
raw_content="alpha",
|
|
title="MOCK section",
|
|
v4_rank=1,
|
|
selection_path="rank_1",
|
|
fallback_reason=None,
|
|
score=1.0,
|
|
provisional=provisional,
|
|
)
|
|
base.update(overrides)
|
|
return CompositionUnit(**base)
|
|
|
|
|
|
def _mk_section(section_id: str) -> MdxSection:
|
|
"""Minimal MdxSection — only fields touched by compute_slide_status
|
|
(section_id, raw_content, title) populated; others get dataclass defaults."""
|
|
return MdxSection(
|
|
section_id=section_id,
|
|
section_num=int(section_id.lstrip("S") or "0"),
|
|
title=f"Section {section_id}",
|
|
raw_content=f"raw {section_id}",
|
|
)
|
|
|
|
|
|
# ─── u6 case 1 : no provisional units — defensive default 0 / [] ──────
|
|
|
|
|
|
def test_u6_no_provisional_units_returns_zero_and_empty_list():
|
|
"""Normal happy-path slide with all units selected via rank_1 (no IMP-30
|
|
recovery). Both u6 fields must surface as zero / empty list — defensive
|
|
default. Pre-IMP-30 callers see no behavioral change beyond the two new
|
|
keys being present in the returned dict."""
|
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
|
units = [
|
|
_mk_unit(section_ids=["S1"], provisional=False),
|
|
_mk_unit(section_ids=["S2"], provisional=False),
|
|
]
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 0
|
|
assert status["provisional_first_render_units"] == []
|
|
# Overall enum unchanged — full coverage + visual pass = PASS.
|
|
assert status["overall"] == "PASS"
|
|
# Existing IMP-05 qualifier fields remain (regression guard).
|
|
assert status["fallback_selection_count"] == 0
|
|
assert status["selection_paths"] == []
|
|
|
|
|
|
def test_u6_provisional_field_absent_is_treated_as_false():
|
|
"""Legacy code path that constructs CompositionUnit-like objects without
|
|
a .provisional attribute (or sets it to a falsy non-False value) must
|
|
NOT count as provisional. Defensive getattr in compute_slide_status keeps
|
|
the count accurate."""
|
|
sections = [_mk_section("S1")]
|
|
units = [_mk_unit(section_ids=["S1"], provisional=False)]
|
|
# Forcibly delete the attribute to simulate a legacy duck-typed unit.
|
|
# CompositionUnit is a dataclass so this exercises the getattr default.
|
|
delattr(units[0], "provisional")
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 0
|
|
assert status["provisional_first_render_units"] == []
|
|
|
|
|
|
# ─── u6 case 2 : provisional unit synthesized via u1 (chain_exhausted) ─
|
|
|
|
|
|
def test_u6_chain_exhausted_provisional_unit_listed_with_full_shape():
|
|
"""u1 synthesizes a rank-1 V4Match with provisional=True when the V4
|
|
chain is exhausted and the caller opts in. u2 propagates the flag onto
|
|
the CompositionUnit. u6 must surface this unit in
|
|
provisional_first_render_units with the full shape (source_section_ids /
|
|
phase_z_status / frame_template_id / frame_id / label / selection_path /
|
|
fallback_reason / v4_rank) so debug consumers can audit it without
|
|
re-parsing the units list.
|
|
"""
|
|
sections = [_mk_section("S1")]
|
|
units = [
|
|
_mk_unit(
|
|
section_ids=["S1"],
|
|
provisional=True,
|
|
label="restructure",
|
|
phase_z_status="extract_matched_zone",
|
|
frame_template_id="MOCK_template_restructure_a",
|
|
frame_id="MOCK_frame_002",
|
|
selection_path="provisional_rank_1",
|
|
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
|
|
v4_rank=1,
|
|
),
|
|
]
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 1
|
|
entries = status["provisional_first_render_units"]
|
|
assert len(entries) == 1
|
|
entry = entries[0]
|
|
assert entry["source_section_ids"] == ["S1"]
|
|
assert entry["phase_z_status"] == "extract_matched_zone"
|
|
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
|
|
assert entry["frame_id"] == "MOCK_frame_002"
|
|
assert entry["label"] == "restructure"
|
|
assert entry["selection_path"] == "provisional_rank_1"
|
|
assert entry["fallback_reason"] == "phase_z_status_not_allowed:extract_matched_zone"
|
|
assert entry["v4_rank"] == 1
|
|
# Overall enum still PASS — full coverage + visual pass + adapter=0.
|
|
# IMP-30 provisional is a qualifier, not a failure class.
|
|
assert status["overall"] == "PASS"
|
|
|
|
|
|
# ─── u6 case 3 : empty-shell unit (u4) listed with __empty__ identifiers ─
|
|
|
|
|
|
def test_u6_empty_shell_unit_listed_with_empty_identifiers():
|
|
"""u4 synthesizes a single empty-shell CompositionUnit when both the
|
|
normal greedy pass AND the provisional retry yield zero units (terminal
|
|
first-render route). u6 must list it in provisional_first_render_units
|
|
with frame_template_id/frame_id == "__empty__" + phase_z_status ==
|
|
"empty_shell" so Step 20 distinguishes terminal shell from non-shell
|
|
provisional units (chain_exhausted_provisional)."""
|
|
sections = [_mk_section("S1"), _mk_section("S2")]
|
|
shell = _mk_unit(
|
|
section_ids=["S1", "S2"],
|
|
provisional=True,
|
|
merge_type="empty_shell",
|
|
frame_template_id="__empty__",
|
|
frame_id="__empty__",
|
|
frame_number=0,
|
|
confidence=0.0,
|
|
label="empty_shell",
|
|
phase_z_status="empty_shell",
|
|
raw_content="raw S1\n\nraw S2",
|
|
title="Section S1 / Section S2",
|
|
v4_rank=None,
|
|
selection_path="empty_shell",
|
|
fallback_reason="no_v4_rank_1_for_any_section",
|
|
score=0.0,
|
|
)
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, [shell], comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 1
|
|
entry = status["provisional_first_render_units"][0]
|
|
assert entry["frame_template_id"] == "__empty__"
|
|
assert entry["frame_id"] == "__empty__"
|
|
assert entry["phase_z_status"] == "empty_shell"
|
|
assert entry["label"] == "empty_shell"
|
|
assert entry["selection_path"] == "empty_shell"
|
|
assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
|
|
assert entry["v4_rank"] is None
|
|
# IMP-87 u4 — honesty defect inversion. The shell.source_section_ids
|
|
# still feeds legacy covered_section_ids for display, but the content-
|
|
# rendered axis (u1) excludes empty-shell units, so full_mdx_coverage
|
|
# MUST flip to False. Overall (u2) MUST elevate to
|
|
# EMPTY_SHELL_NO_CONTENT before the legacy ladder, otherwise a slide
|
|
# whose sole rendered unit is __empty__ would be reported as PASS —
|
|
# the exact Stage 1 mdx05 honesty defect this issue exists to fix.
|
|
assert status["full_mdx_coverage"] is False
|
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
|
|
|
|
|
# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──
|
|
|
|
|
|
def test_u6_mixed_selection_counts_only_provisional_units():
|
|
"""Realistic IMP-30 retry outcome: some sections covered by normal rank_1
|
|
units, others by u3 last-resort provisional fill. u6 must count ONLY
|
|
the provisional ones, NOT the normal ones. List preserves the iteration
|
|
order of the units argument (so debug.json reads top-down as the slide)."""
|
|
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
|
|
units = [
|
|
_mk_unit(section_ids=["S1"], provisional=False),
|
|
_mk_unit(
|
|
section_ids=["S2"],
|
|
provisional=True,
|
|
label="reject",
|
|
phase_z_status="fallback_candidate",
|
|
selection_path="provisional_rank_1",
|
|
fallback_reason="phase_z_status_not_allowed:fallback_candidate",
|
|
),
|
|
_mk_unit(section_ids=["S3"], provisional=False),
|
|
]
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 1
|
|
entries = status["provisional_first_render_units"]
|
|
assert len(entries) == 1
|
|
assert entries[0]["source_section_ids"] == ["S2"]
|
|
assert entries[0]["label"] == "reject"
|
|
# Normal units do NOT appear in the provisional list.
|
|
flat_sections = [sid for e in entries for sid in e["source_section_ids"]]
|
|
assert "S1" not in flat_sections
|
|
assert "S3" not in flat_sections
|
|
|
|
|
|
# ─── u6 case 5 : overall enum stability under provisional units ───────
|
|
|
|
|
|
def test_u6_overall_enum_unchanged_when_provisional_present_with_visual_pass():
|
|
"""A slide with provisional units + full coverage + visual pass still
|
|
reports overall == PASS. Stage 1 Q3 + Codex #10 D4 lock: provisional is
|
|
additive, not a new failure class. Future code that wants to gate on
|
|
provisional must read provisional_first_render_count, NOT overall."""
|
|
sections = [_mk_section("S1")]
|
|
units = [
|
|
_mk_unit(
|
|
section_ids=["S1"],
|
|
provisional=True,
|
|
label="restructure",
|
|
phase_z_status="extract_matched_zone",
|
|
selection_path="provisional_rank_1",
|
|
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
|
|
),
|
|
]
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["overall"] == "PASS"
|
|
assert status["provisional_first_render_count"] == 1
|
|
|
|
|
|
def test_u6_overall_enum_visual_regression_independent_of_provisional():
|
|
"""Provisional unit + full coverage + visual FAIL must still report
|
|
RENDERED_WITH_VISUAL_REGRESSION (existing enum), not a new value. Pins
|
|
that visual outcome alone drives the enum and provisional is orthogonal."""
|
|
sections = [_mk_section("S1")]
|
|
units = [
|
|
_mk_unit(
|
|
section_ids=["S1"],
|
|
provisional=True,
|
|
selection_path="provisional_rank_1",
|
|
),
|
|
]
|
|
overflow_fail = {"passed": False, "fail_reasons": ["zone overflow"]}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_fail,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["overall"] == "RENDERED_WITH_VISUAL_REGRESSION"
|
|
assert status["provisional_first_render_count"] == 1
|
|
|
|
|
|
# ─── u6 case 6 : note field documents the new qualifier ───────────────
|
|
|
|
|
|
def test_u6_note_field_mentions_provisional_first_render_count():
|
|
"""The slide_status `note` field is a human-readable summary embedded in
|
|
Step 20 JSON / HTML. u6 must extend it with a mention of
|
|
`provisional_first_render_count > 0 = IMP-30 first-render invariant 작동`
|
|
so operators reading Step 20 see the qualifier without having to grep
|
|
the JSON keys themselves."""
|
|
sections = [_mk_section("S1")]
|
|
units = [_mk_unit(section_ids=["S1"], provisional=False)]
|
|
overflow_pass = {"passed": True, "fail_reasons": []}
|
|
comp_debug = {"candidates_summary": []}
|
|
|
|
status = compute_slide_status(
|
|
sections, units, comp_debug, overflow_pass,
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert "provisional_first_render_count" in status["note"]
|
|
# Existing note guidance (adapter_needed_count, content_truncated_count)
|
|
# must remain — regression guard for IMP-05 / earlier qualifier callers.
|
|
assert "adapter_needed_count" in status["note"]
|
|
assert "content_truncated_count" in status["note"]
|
|
|
|
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
# u7 — broader pipeline fixture coverage for the empty-shell + provisional
|
|
# retry end-to-end (deferred from u4 verification per mid-stage
|
|
# compaction). Threads real production functions together with
|
|
# synthetic V4 fixtures so the cross-layer data flow (u1 → u2 → u3 →
|
|
# u4 → u6) is exercised in one pass, instead of relying on per-unit
|
|
# tests that each touch only one layer.
|
|
# ════════════════════════════════════════════════════════════════════════
|
|
#
|
|
# Each u7 case runs the real chain:
|
|
# synthetic V4 dict
|
|
# → lookup_v4_match_with_fallback (u1)
|
|
# → plan_composition (u2 + u3 propagation / fill)
|
|
# → compute_slide_status (u6 qualifier surface)
|
|
#
|
|
# Monkeypatched dependencies mirror tests/test_phase_z2_v4_fallback.py
|
|
# (get_contract + compute_capacity_fit). MOCK_ naming + rank-by-field
|
|
# convention preserved (Codex #10 E1).
|
|
|
|
import pytest
|
|
|
|
from src import phase_z2_pipeline as _pz_pipeline
|
|
from src.phase_z2_pipeline import (
|
|
V4_LABEL_TO_PHASE_Z_STATUS as _PROD_LABEL_TO_STATUS,
|
|
compute_slide_status as _compute_slide_status,
|
|
lookup_v4_match_with_fallback as _real_lookup,
|
|
)
|
|
|
|
|
|
# Synthetic catalog stub — only MOCK_ templates considered registered.
|
|
# Mirrors test_phase_z2_v4_fallback.py shape so the two suites stay in sync.
|
|
_U7_MOCK_CATALOG: dict[str, object] = {
|
|
"MOCK_template_direct_a": object(),
|
|
"MOCK_template_restructure_a": object(),
|
|
"MOCK_template_reject_a": object(),
|
|
}
|
|
|
|
|
|
def _u7_get_contract(template_id: str):
|
|
return _U7_MOCK_CATALOG.get(template_id)
|
|
|
|
|
|
def _u7_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
|
|
return {"fit_status": "ok"}
|
|
|
|
|
|
@pytest.fixture
|
|
def u7_patch_selector_deps(monkeypatch):
|
|
"""Monkeypatch module-level dependencies of lookup_v4_match_with_fallback.
|
|
Selector has no DI (Codex #10 E3) — module-level get_contract +
|
|
compute_capacity_fit must be patched at the pipeline module."""
|
|
monkeypatch.setattr(_pz_pipeline, "get_contract", _u7_get_contract)
|
|
monkeypatch.setattr(_pz_pipeline, "compute_capacity_fit", _u7_capacity_fit_ok)
|
|
|
|
|
|
def _u7_v4_section(judgments: list[dict]) -> dict:
|
|
return {"judgments_full32": judgments}
|
|
|
|
|
|
def _u7_j(rank: int, template_id: str, frame_id: str, label: str,
|
|
confidence: float = 0.9) -> dict:
|
|
return {
|
|
"frame_id": frame_id,
|
|
"frame_number": rank,
|
|
"template_id": template_id,
|
|
"confidence": confidence,
|
|
"label": label,
|
|
"v4_full_rank": rank,
|
|
}
|
|
|
|
|
|
def _u7_section(section_id: str) -> _pz_pipeline.MdxSection:
|
|
return _pz_pipeline.MdxSection(
|
|
section_id=section_id,
|
|
section_num=int(section_id.lstrip("S") or "0"),
|
|
title=f"Section {section_id}",
|
|
raw_content=f"- bullet for {section_id}\n",
|
|
)
|
|
|
|
|
|
# ─── u7 case 1 : e2e chain_exhausted → provisional retry → slide_status ──
|
|
|
|
|
|
def test_u7_e2e_chain_exhausted_provisional_flows_through_layers(
|
|
u7_patch_selector_deps,
|
|
):
|
|
"""End-to-end: a section whose rank-1..3 are all restructure/reject must
|
|
surface as a provisional unit when both opt-in flags are on, and the
|
|
provisional flag must propagate cleanly through V4Match (u1) →
|
|
CompositionUnit (u2) → select_composition_units provisional fill (u3) →
|
|
compute_slide_status qualifier (u6).
|
|
|
|
This mirrors the production pipeline.py:3262 _lookup_fn_provisional +
|
|
plan_composition(allow_provisional_fill=True) recovery path (u4 Phase A).
|
|
"""
|
|
v4 = {
|
|
"mdx_sections": {
|
|
# S1 — auto-renderable, normal rank-1 selection.
|
|
"S1": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
|
|
]),
|
|
# S2 — chain exhausted (all restructure / reject).
|
|
"S2": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
|
|
_u7_j(2, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
|
|
]),
|
|
}
|
|
}
|
|
sections = [_u7_section("S1"), _u7_section("S2")]
|
|
|
|
def lookup_fn(sid: str):
|
|
match, _trace = _real_lookup(
|
|
v4, sid, raw_content="- a\n- b\n",
|
|
allow_provisional=True,
|
|
)
|
|
return match
|
|
|
|
units, layout_preset, comp_debug = plan_composition(
|
|
sections,
|
|
v4_lookup_fn=lookup_fn,
|
|
v4_label_to_status=_PROD_LABEL_TO_STATUS,
|
|
allowed_statuses=_ALLOWED_STATUSES,
|
|
capacity_fit_fn=None,
|
|
v4_candidates_lookup_fn=None,
|
|
allow_provisional_fill=True,
|
|
)
|
|
|
|
# Recovery succeeded: 2 units (S1 normal + S2 provisional fill).
|
|
by_section = {u.source_section_ids[0]: u for u in units}
|
|
assert set(by_section) == {"S1", "S2"}
|
|
assert by_section["S1"].provisional is False
|
|
assert by_section["S1"].label == "use_as_is"
|
|
assert by_section["S2"].provisional is True
|
|
assert by_section["S2"].label == "restructure"
|
|
assert by_section["S2"].selection_path == "provisional_rank_1"
|
|
assert layout_preset == "horizontal-2"
|
|
|
|
# u6 qualifier surface — only S2 counted.
|
|
status = _compute_slide_status(
|
|
sections, units, comp_debug,
|
|
overflow={"passed": True, "fail_reasons": []},
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
assert status["provisional_first_render_count"] == 1
|
|
entry = status["provisional_first_render_units"][0]
|
|
assert entry["source_section_ids"] == ["S2"]
|
|
assert entry["selection_path"] == "provisional_rank_1"
|
|
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
|
|
# overall enum unchanged — full coverage + visual pass = PASS.
|
|
assert status["overall"] == "PASS"
|
|
|
|
|
|
# ─── u7 case 2 : e2e zero-V4 → u4 empty-shell synthesis → slide_status ───
|
|
|
|
|
|
def test_u7_e2e_zero_v4_empty_shell_status_surface(u7_patch_selector_deps):
|
|
"""End-to-end zero-V4 path: when V4 has no usable judgments for any
|
|
section, plan_composition (even with both opt-in flags on) yields zero
|
|
units, and the u4 Phase B empty-shell synthesis kicks in. The synthesized
|
|
shell must reach compute_slide_status with provisional=True +
|
|
phase_z_status='empty_shell', so Step 20 reports the first-render
|
|
invariant outcome without altering overall enum.
|
|
"""
|
|
v4 = {"mdx_sections": {}} # nothing matches any section
|
|
sections = [_u7_section("S1"), _u7_section("S2")]
|
|
|
|
def lookup_fn(sid: str):
|
|
match, _trace = _real_lookup(
|
|
v4, sid, raw_content="- a\n",
|
|
allow_provisional=True,
|
|
)
|
|
return match
|
|
|
|
units_first, preset_first, _ = plan_composition(
|
|
sections,
|
|
v4_lookup_fn=lookup_fn,
|
|
v4_label_to_status=_PROD_LABEL_TO_STATUS,
|
|
allowed_statuses=_ALLOWED_STATUSES,
|
|
capacity_fit_fn=None,
|
|
v4_candidates_lookup_fn=None,
|
|
allow_provisional_fill=True,
|
|
)
|
|
# No V4 evidence anywhere — recovery cannot fabricate a match, so units
|
|
# stays empty. This is the trigger condition for u4 Phase B (empty-shell).
|
|
assert units_first == []
|
|
assert preset_first is None
|
|
|
|
# Simulate the production u4 Phase B synthesis (pipeline.py:3325~).
|
|
empty_shell = CompositionUnit(
|
|
source_section_ids=[s.section_id for s in sections],
|
|
merge_type="empty_shell",
|
|
frame_template_id="__empty__",
|
|
frame_id="__empty__",
|
|
frame_number=0,
|
|
confidence=0.0,
|
|
label="empty_shell",
|
|
phase_z_status="empty_shell",
|
|
raw_content="\n\n".join(s.raw_content for s in sections),
|
|
title=" / ".join(s.title for s in sections),
|
|
v4_rank=None,
|
|
selection_path="empty_shell",
|
|
fallback_reason="no_v4_rank_1_for_any_section",
|
|
score=0.0,
|
|
provisional=True,
|
|
)
|
|
|
|
status = _compute_slide_status(
|
|
sections, [empty_shell], comp_debug={"candidates_summary": []},
|
|
overflow={"passed": True, "fail_reasons": []},
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
|
|
assert status["provisional_first_render_count"] == 1
|
|
shell_entry = status["provisional_first_render_units"][0]
|
|
assert shell_entry["phase_z_status"] == "empty_shell"
|
|
assert shell_entry["frame_template_id"] == "__empty__"
|
|
assert shell_entry["source_section_ids"] == ["S1", "S2"]
|
|
# IMP-87 u4 — honesty defect inversion. The shell unit still attaches
|
|
# both sections to legacy covered_section_ids (display preserved), but
|
|
# the content-rendered axis (u1) excludes empty-shell units, so
|
|
# full_mdx_coverage MUST flip to False. Overall (u2) MUST elevate to
|
|
# EMPTY_SHELL_NO_CONTENT before the legacy 4-way ladder, so a zero-V4
|
|
# slide cannot disguise itself as PASS through visual-overflow alone.
|
|
assert status["full_mdx_coverage"] is False
|
|
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
|
|
|
|
|
|
# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────
|
|
|
|
|
|
def test_u7_e2e_normal_path_unchanged_with_opt_in_flags(u7_patch_selector_deps):
|
|
"""IMP-05 regression guard at e2e level. When every section has an
|
|
auto-renderable rank-1 match, turning BOTH opt-in flags on (allow_provisional
|
|
at the lookup function + allow_provisional_fill at plan_composition) must
|
|
not produce any provisional unit. The normal greedy pass owns every
|
|
section, leaving the provisional fill pool with nothing to cover.
|
|
"""
|
|
v4 = {
|
|
"mdx_sections": {
|
|
"S1": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
|
|
]),
|
|
"S2": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
|
|
]),
|
|
}
|
|
}
|
|
sections = [_u7_section("S1"), _u7_section("S2")]
|
|
|
|
def lookup_fn(sid: str):
|
|
match, _trace = _real_lookup(
|
|
v4, sid, raw_content="- a\n",
|
|
allow_provisional=True,
|
|
)
|
|
return match
|
|
|
|
units, layout_preset, comp_debug = plan_composition(
|
|
sections,
|
|
v4_lookup_fn=lookup_fn,
|
|
v4_label_to_status=_PROD_LABEL_TO_STATUS,
|
|
allowed_statuses=_ALLOWED_STATUSES,
|
|
capacity_fit_fn=None,
|
|
v4_candidates_lookup_fn=None,
|
|
allow_provisional_fill=True,
|
|
)
|
|
|
|
assert {u.source_section_ids[0] for u in units} == {"S1", "S2"}
|
|
assert all(u.provisional is False for u in units)
|
|
assert all(u.selection_path == "rank_1" for u in units)
|
|
assert layout_preset == "horizontal-2"
|
|
|
|
status = _compute_slide_status(
|
|
sections, units, comp_debug,
|
|
overflow={"passed": True, "fail_reasons": []},
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
assert status["provisional_first_render_count"] == 0
|
|
assert status["provisional_first_render_units"] == []
|
|
assert status["overall"] == "PASS"
|
|
|
|
|
|
# ─── u7 case 4 : IMP-30 invariants — MDX content preserved + audit trail ──
|
|
|
|
|
|
def test_u7_imp30_invariants_mdx_preserved_and_audit_trail_surfaced(
|
|
u7_patch_selector_deps,
|
|
):
|
|
"""Issue-body invariants consolidation:
|
|
- 'MDX content preserved (no rewrite)' → provisional unit's
|
|
raw_content equals the source section's raw_content byte-for-byte.
|
|
- comp_debug audit trail surfaces 'selected_provisional' so the
|
|
recovery is observable (not silent — per IMP-30 scope-lock
|
|
'Telemetry: degraded outcomes must surface in slide_status').
|
|
"""
|
|
raw_s2 = "- restructure-only bullet alpha\n- restructure-only bullet beta\n"
|
|
section_s2 = _pz_pipeline.MdxSection(
|
|
section_id="S2", section_num=2, title="Section S2",
|
|
raw_content=raw_s2,
|
|
)
|
|
sections = [_u7_section("S1"), section_s2]
|
|
v4 = {
|
|
"mdx_sections": {
|
|
"S1": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
|
|
]),
|
|
"S2": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
|
|
]),
|
|
}
|
|
}
|
|
|
|
def lookup_fn(sid: str):
|
|
# raw_content threaded through real lookup — provisional V4Match
|
|
# must carry the section's actual raw_content (no compression).
|
|
raw = sections[0].raw_content if sid == "S1" else raw_s2
|
|
match, _trace = _real_lookup(
|
|
v4, sid, raw_content=raw, allow_provisional=True,
|
|
)
|
|
return match
|
|
|
|
units, _preset, comp_debug = plan_composition(
|
|
sections,
|
|
v4_lookup_fn=lookup_fn,
|
|
v4_label_to_status=_PROD_LABEL_TO_STATUS,
|
|
allowed_statuses=_ALLOWED_STATUSES,
|
|
capacity_fit_fn=None,
|
|
v4_candidates_lookup_fn=None,
|
|
allow_provisional_fill=True,
|
|
)
|
|
|
|
by_section = {u.source_section_ids[0]: u for u in units}
|
|
assert by_section["S2"].provisional is True
|
|
# IMP-30 contract: MDX content preserved through u1→u2→u3 path.
|
|
assert by_section["S2"].raw_content == raw_s2
|
|
|
|
# Audit-trail surface: candidates_summary must record S2's provisional
|
|
# pick as 'selected_provisional' (selection_state set in
|
|
# src/phase_z2_composition.py:862 _candidate_state).
|
|
summary = comp_debug.get("candidates_summary", [])
|
|
s2_selected = [
|
|
e for e in summary
|
|
if e["source_section_ids"] == ["S2"]
|
|
and e["selection_state"] == "selected_provisional"
|
|
]
|
|
assert len(s2_selected) == 1
|
|
assert s2_selected[0]["template_id"] == "MOCK_template_restructure_a"
|
|
assert s2_selected[0]["selection_path"] == "provisional_rank_1"
|
|
|
|
|
|
# ─── u7 case 5 : all-restructure/reject → every section gets placeholder ──
|
|
|
|
|
|
def test_u7_imp30_all_restructure_only_each_section_gets_provisional_unit(
|
|
u7_patch_selector_deps,
|
|
):
|
|
"""Issue-body invariant: 'restructure / reject 만 있는 section 도
|
|
*placeholder zone + trace*'. When EVERY section is restructure/reject,
|
|
the recovery must yield one provisional unit per section (no zero-unit
|
|
abort path (a), no chain_exhausted swallow at path (b)).
|
|
"""
|
|
sections = [_u7_section(sid) for sid in ("S1", "S2", "S3")]
|
|
v4 = {
|
|
"mdx_sections": {
|
|
"S1": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
|
|
]),
|
|
"S2": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
|
|
]),
|
|
"S3": _u7_v4_section([
|
|
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_013", "restructure"),
|
|
]),
|
|
}
|
|
}
|
|
|
|
def lookup_fn(sid: str):
|
|
match, _trace = _real_lookup(
|
|
v4, sid, raw_content="- bullet\n", allow_provisional=True,
|
|
)
|
|
return match
|
|
|
|
units, layout_preset, comp_debug = plan_composition(
|
|
sections,
|
|
v4_lookup_fn=lookup_fn,
|
|
v4_label_to_status=_PROD_LABEL_TO_STATUS,
|
|
allowed_statuses=_ALLOWED_STATUSES,
|
|
capacity_fit_fn=None,
|
|
v4_candidates_lookup_fn=None,
|
|
allow_provisional_fill=True,
|
|
)
|
|
|
|
# Every section must be covered by exactly one provisional unit —
|
|
# the issue-body 'placeholder zone + trace' contract.
|
|
by_section = {u.source_section_ids[0]: u for u in units}
|
|
assert set(by_section) == {"S1", "S2", "S3"}
|
|
assert all(u.provisional is True for u in units)
|
|
assert layout_preset is not None # path (a) abort guard bypassed
|
|
# All three audit entries must report selected_provisional.
|
|
summary = comp_debug.get("candidates_summary", [])
|
|
selected_provisional_sids = {
|
|
e["source_section_ids"][0]
|
|
for e in summary
|
|
if e["selection_state"] == "selected_provisional"
|
|
}
|
|
assert selected_provisional_sids == {"S1", "S2", "S3"}
|
|
|
|
# Step 20 surface: all three sections counted as provisional, overall
|
|
# enum unchanged (qualifier-not-enum per IMP-05 Codex #10 D4).
|
|
status = _compute_slide_status(
|
|
sections, units, comp_debug,
|
|
overflow={"passed": True, "fail_reasons": []},
|
|
adapter_needed_units=None, debug_zones=None,
|
|
)
|
|
assert status["provisional_first_render_count"] == 3
|
|
assert {
|
|
e["source_section_ids"][0]
|
|
for e in status["provisional_first_render_units"]
|
|
} == {"S1", "S2", "S3"}
|
|
assert status["overall"] == "PASS"
|