Files
C.E.L_Slide_test2/tests/test_phase_z2_imp30_first_render.py
kyeongmin 842a46144c feat(#87): IMP-87 u1~u5 empty_shell honesty gate + BLOCKED exit
EMPTY_SHELL_NO_CONTENT overall enum + 3-marker detection (frame_template_id="__empty__"
OR label="empty_shell" OR merge_type="empty_shell") routes empty-placeholder-only
slides to BLOCKED CLI exit 1 + red final_status.html, blocking fake PASS reports
(feedback_artifact_status_naming). Coverage accounting split: legacy covered_section_ids
preserved + new content_rendered_section_ids / empty_shell_section_ids. mdx05 Case B
(zero V4 evidence) honestly classified instead of synthesizing fabricated rank-1 reject
frames. IMP-30 u6/u7 stale empty-shell PASS assertions inverted (29 tests). IMP-85 smoke
parametrize: mdx05 removed from exit-0 list + dedicated BLOCKED exit test added (4 tests).
No production behavior change for chain_exhausted Case A; no AI route activation; no
mdx-id hardcoding. 53 targeted + 76 adjacent Phase Z tests PASS.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 20:40:54 +09:00

1566 lines
63 KiB
Python

"""IMP-30 first-render invariant tests (per-unit slice).
This file is the shared regression home for IMP-30 units u2~u7. Each
implementation unit adds its own focused tests; u7 (regression coverage)
will broaden the surface (synthetic V4 fixtures for chain_exhausted
provisional, zero-V4 empty-shell, normal-path unchanged).
u3 scope (this slice) — select_composition_units last-resort provisional
fill for uncovered sections + _candidate_state "selected_provisional":
1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard).
2. opt-in fills uncovered sections with provisional candidates whose
phase_z_status would otherwise be filter_status.
3. opt-in never displaces normal greedy selections.
4. opt-in respects coverage non-overlap (no section selected twice).
5. plan_composition._candidate_state returns "selected_provisional"
for fills and "selected" for normal greedy picks.
Synthetic naming convention (Codex #10 E1):
- MOCK_ prefix mandatory
- _a / _b suffixes = enumeration only (NOT ordering / priority)
- rank/order expressed by V4 rank field, NEVER ID suffix
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
from src.phase_z2_composition import (
CompositionUnit,
plan_composition,
select_composition_units,
)
# ─── Synthetic match shape (duck-typed V4Match-like) ───────────────────
@dataclass
class _StubV4Match:
"""Duck-typed V4Match surface used by collect_candidates / score path.
Mirrors src.phase_z2_pipeline.V4Match fields touched by composition:
template_id / frame_id / frame_number / confidence / label / v4_rank /
selection_path / fallback_reason / provisional. Composition module
intentionally does not import V4Match (circular dep avoidance), so a
plain stub object with the same attributes is the contract.
"""
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
provisional: bool = False
@dataclass
class _StubSection:
"""Minimal section surface used by collect_candidates (section_id /
raw_content / title). Matches MdxSection's attribute names without
importing pipeline (keeps test isolated to composition module)."""
section_id: str
title: str = ""
raw_content: str = ""
# Phase Z status mapping fixture — only the keys exercised here are listed.
# Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this
# stub deliberately mirrors only what the tests touch.
_LABEL_TO_STATUS = {
"use_as_is": "matched_zone",
"light_edit": "adapt_matched_zone",
"restructure": "extract_matched_zone",
"reject": "fallback_candidate",
}
_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}
# ─── Helpers ────────────────────────────────────────────────────────────
def _make_lookup(matches_by_section: dict[str, _StubV4Match]):
"""Return v4_lookup_fn (section_id -> _StubV4Match | None)."""
def _fn(section_id: str):
return matches_by_section.get(section_id)
return _fn
def _make_candidates_lookup_empty():
"""v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here)."""
def _fn(section_id: str):
return []
return _fn
# ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ─────────
def test_u3_default_off_preserves_imp05_behavior():
"""IMP-05 regression guard. With allow_provisional_fill=False (default),
select_composition_units must yield the same units as pre-u3 even when
provisional candidates exist in the pool.
Setup:
- S1: use_as_is + provisional=False (normal selection)
- S2: restructure + provisional=True (would be fill-eligible)
Expected (default-off):
- units = [S1 unit] only. S2 stays uncovered.
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
# allow_provisional_fill omitted → default False
)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
assert units[0].provisional is False
assert preset == "single"
# S2 candidate must still appear in debug summary as filtered_status
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
# ─── u3 case 2 : opt-in fills uncovered sections with provisional ──────
def test_u3_opt_in_fills_uncovered_with_provisional():
"""IMP-30 u3 — opt-in path.
Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but
with allow_provisional_fill=True. S2 must be filled as
selected_provisional unit; greedy S1 selection unchanged.
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
# Both sections must be covered now
section_ids = {sid for u in units for sid in u.source_section_ids}
assert section_ids == {"S1", "S2"}
# Identify which unit covers which section
by_section = {tuple(u.source_section_ids): u for u in units}
s1_unit = by_section[("S1",)]
s2_unit = by_section[("S2",)]
# Normal greedy pick — provisional flag stays False
assert s1_unit.provisional is False
# Provisional fill — provisional flag carried from V4Match (u1) via u2
assert s2_unit.provisional is True
assert s2_unit.label == "restructure"
# Layout preset reflects 2-unit count
assert preset == "horizontal-2"
# ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─
def test_u3_candidate_state_marks_selected_provisional():
"""plan_composition._candidate_state must return:
- "selected" for normal greedy picks
- "selected_provisional" for last-resort fills
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S1",)]["selection_state"] == "selected"
assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional"
# ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ─────
def test_u3_opt_in_respects_coverage_non_overlap():
"""Provisional fill must not pick a candidate whose source_section_ids
overlap with already-covered sections.
Setup:
- S1 use_as_is (normal selection)
- S2 restructure provisional (eligible for fill)
- parent_merged_inferred over [S1, S2] with provisional=True
(synthetic — would normally not exist, but stresses non-overlap)
With allow_provisional_fill=True, the [S1,S2] provisional merge must
NOT be selected (S1 already covered by normal pick).
"""
# 2 children with derive_parent_id → "S" parent. But derive_parent_id
# only triggers on "<base>-sub-<n>" or "<id>-<suffix>.<sub>". Use the
# canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible).
sections = [
_StubSection("P-sub-1", raw_content="alpha"),
_StubSection("P-sub-2", raw_content="beta"),
]
matches = {
"P-sub-1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"P-sub-2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
provisional=True,
),
# No parent V4 → branch 3 may synthesize parent_merged_inferred
# if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1
# (higher confidence) → rep_match.provisional=False, so the inferred
# merge is NOT provisional. The normal greedy pass should prefer
# the single P-sub-1 (same score, but inferred merge has coverage
# tiebreak win). Test asserts: covered set is exact, no double-fill.
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
covered = []
for u in units:
covered.extend(u.source_section_ids)
# No section appears twice — non-overlap invariant
assert len(covered) == len(set(covered))
# Both sections covered exactly once
assert set(covered) == {"P-sub-1", "P-sub-2"}
# ─── u3 case 5 : opt-in with no provisional candidates is a no-op ──────
def test_u3_opt_in_noop_when_no_provisional_candidates():
"""allow_provisional_fill=True with zero provisional candidates must
behave identically to default-off. No fill is forced; uncovered sections
simply remain uncovered (u4 owns the zero-unit empty-shell terminal).
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
# S2: restructure but NOT provisional (e.g., pipeline did not opt
# into u1 allow_provisional, or section had real rank-1 restructure)
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
provisional=False,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
assert preset == "single"
# S2 remains filter_status — not provisional, so u3 fill ignores it
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
# ─── u3 case 6 : select_composition_units direct invocation parity ─────
def test_u3_select_composition_units_default_off_signature():
"""Direct invocation without keyword-only u3 args must remain valid
(backward-compat for existing callers that import the function directly).
"""
# Build a minimal CompositionUnit by hand — bypass collect_candidates.
c1 = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
units = select_composition_units([c1], _ALLOWED_STATUSES)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
def test_u3_select_composition_units_opt_in_direct():
"""Direct invocation with u3 opt-in must fill uncovered section from
provisional candidate pool, leaving greedy pick untouched.
"""
c_greedy = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
c_provisional = CompositionUnit(
source_section_ids=["S2"],
merge_type="single",
frame_template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002",
frame_number=2,
confidence=0.65,
label="restructure",
phase_z_status="extract_matched_zone",
raw_content="beta",
title="S2",
provisional=True,
)
units = select_composition_units(
[c_greedy, c_provisional],
_ALLOWED_STATUSES,
all_section_ids=["S1", "S2"],
allow_provisional_fill=True,
)
assert len(units) == 2
by_section = {tuple(u.source_section_ids): u for u in units}
assert by_section[("S1",)].provisional is False
assert by_section[("S2",)].provisional is True
# ════════════════════════════════════════════════════════════════════════
# u4 — pipeline abort guard empty-shell synthesis
# ════════════════════════════════════════════════════════════════════════
#
# u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort
# guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3)
# then terminal empty-shell (Phase B). The shell is a single CompositionUnit
# with frame_template_id="__empty__" and preset="single"; the per-unit
# for-loop's __empty__ branch bypasses mapper/contract and emits a
# placeholder zones_data/debug_zones record so final.html still writes.
#
# These tests verify the composition-side invariants that u4 relies on:
# - CompositionUnit can be constructed in the empty-shell shape.
# - The shell shape carries the data needed for u5 (provisional flag) /
# u6 (status qualifier) / render_slide __empty__ branch (template_id).
# The pipeline-level integration (provisional retry / empty-shell synthesis
# at the abort guard, plus the per-unit __empty__ bypass) is covered by
# u7 (regression coverage) with synthetic V4 fixtures.
def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis():
"""The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort
guard must carry the field shape downstream consumers (per-unit
__empty__ branch, compute_slide_status, slide_base template) rely on.
Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block):
- frame_template_id == "__empty__" → render_slide short-circuits
partial_html to "" (existing __empty__ branch at line 2106).
- phase_z_status == "empty_shell" → Step 20 distinguishes from
matched_zone / adapt_matched_zone / extract_matched_zone /
fallback_candidate (u6 surfaces this as additive qualifier).
- provisional == True → u5 zone--provisional class + needs-adaptation
badge (template-side wiring).
- source_section_ids covers all aligned section ids → compute_slide_status
treats every section as "covered by the shell" (u6 marks the count
of provisional_first_render_units).
- selection_path == "empty_shell" / fallback_reason set → audit trace
survives in step06_composition_plan.json.
"""
aligned_section_ids = ["S1", "S2", "S3"]
raw_contents = ["alpha", "beta", "gamma"]
titles = ["First", "Second", "Third"]
shell = CompositionUnit(
source_section_ids=list(aligned_section_ids),
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="\n\n".join(raw_contents),
title=" / ".join(titles),
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
rationale={
"imp30_u4": "terminal_first_render_empty_shell",
"reason": "no_rank_1_V4_evidence_in_any_section",
"aligned_section_ids": aligned_section_ids,
},
provisional=True,
)
assert shell.frame_template_id == "__empty__"
assert shell.frame_id == "__empty__"
assert shell.label == "empty_shell"
assert shell.phase_z_status == "empty_shell"
assert shell.provisional is True
assert shell.selection_path == "empty_shell"
assert shell.fallback_reason == "no_v4_rank_1_for_any_section"
assert shell.source_section_ids == aligned_section_ids
assert shell.v4_rank is None
assert shell.confidence == 0.0
assert shell.score == 0.0
# MDX content preserved (no rewrite) — full raw content kept in the unit
# even though no V4 mapping is applied. Adaptation deferred to IMP-31.
assert shell.raw_content == "alpha\n\nbeta\n\ngamma"
# Rationale carries the audit trail consumed by Step 6 artifact + u6.
assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell"
assert shell.rationale["aligned_section_ids"] == aligned_section_ids
def test_u4_empty_shell_unit_default_provisional_is_false():
"""Smoke test — provisional flag is opt-in. A plain CompositionUnit
(no explicit provisional=True) does NOT mark itself as empty-shell.
Guards against accidental positive on normal units when u5 / u6 read
unit.provisional.
"""
normal = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
assert normal.provisional is False
assert normal.frame_template_id != "__empty__"
def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed():
"""The empty-shell unit's phase_z_status ('empty_shell') must NOT be
inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over
units filtered by allowed_statuses would treat the shell as a normal
matched zone — defeating the "needs adaptation" signal.
This test pins the contract at the composition-test level so a status
rename in the pipeline cannot silently leak the shell into normal flows.
"""
# _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES
# ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct
# status so downstream filters reject it.
assert "empty_shell" not in _ALLOWED_STATUSES
# ════════════════════════════════════════════════════════════════════════
# u5 — zones_data carries provisional flag; slide_base.html zone div adds
# zone--provisional class + inline needs-adaptation badge
# ════════════════════════════════════════════════════════════════════════
#
# u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in
# u1, or directly by u4 empty-shell synthesis) through the zones_data payload
# into the slide_base.html template. Visual contract:
# - zones_data[i]['provisional'] = bool (default False; True only for IMP-30
# opt-in synthesized units).
# - slide_base.html zone div gets `zone--provisional` class when True; an
# inline `<span class="zone__needs-adaptation-badge">needs adaptation</span>`
# element is rendered inside the zone (top-right corner via absolute pos).
# - data-provisional="1" attribute set for downstream selectors / overflow
# checker / e2e tooling.
#
# The composition / pipeline-level handoff is exercised by u3 / u4 already.
# u5 tests focus on:
# - template-rendering output: class + badge HTML correctly emitted ONLY when
# zones[i].provisional is truthy. (default-off path unchanged.)
# - byte-equivalence: non-provisional zones render the same div shape as
# pre-u5 (just no zone--provisional class / no badge element).
import re
from pathlib import Path
from jinja2 import Environment, FileSystemLoader, select_autoescape
# ─── u5 helpers ────────────────────────────────────────────────────────
def _render_slide_base(zones: list[dict], *, layout_preset: str = "single",
layout_css: dict | None = None) -> str:
"""Render templates/phase_z2/slide_base.html directly via Jinja2 with a
minimal zones list. Bypasses render_slide() so u5 can exercise the
template-only contract without spinning up the full pipeline (no mapper,
no contracts, no token CSS loader). slot_payload / partial_html are
stubbed to fixed strings so the test focuses on zone div attributes."""
template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2"
env = Environment(
loader=FileSystemLoader(str(template_dir)),
autoescape=select_autoescape(["html"]),
)
if layout_css is None:
layout_css = {
"cols": "1fr",
"rows": "1fr",
"areas": '"single"',
}
# Each zone needs a partial_html (render_slide normally populates this).
# Use a stable placeholder per zone so the assertion can target zone-level
# attributes without coupling to frame template internals.
for z in zones:
z.setdefault("partial_html", "<div class=\"_stub_partial\">stub</div>")
base = env.get_template("slide_base.html")
return base.render(
slide_title="IMP-30 u5 test slide",
slide_footer=None,
zones=zones,
layout_preset=layout_preset,
layout_css=layout_css,
gap_px=12,
token_css="", # empty token CSS — not under test here
embedded_mode="standalone",
)
def _zone_div_for_position(html: str, position: str) -> str:
"""Return the opening tag + immediate inner content (up to but not
including partial_html) for the zone div at a given `data-zone-position`
value. Tight enough for class/attribute assertions, lenient enough not
to depend on partial_html internals."""
pattern = re.compile(
r'<div class="zone[^"]*"\s+data-zone-position="' + re.escape(position) + r'"[^>]*>'
r'(?:\s*<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>)?',
re.DOTALL,
)
match = pattern.search(html)
if not match:
return ""
return match.group(0)
def _all_zone_div_openings(html: str) -> list[str]:
"""Return every zone-div opening tag in the layout body. Used to scope
class / attribute assertions away from the CSS <style> block (which
contains `.zone--provisional` / `.zone__needs-adaptation-badge` as
selectors — must not be mistaken for zone-div class emissions)."""
return re.findall(
r'<div class="zone[^"]*"[^>]*data-zone-position="[^"]*"[^>]*>',
html,
)
def _all_badge_spans(html: str) -> list[str]:
"""Return every actual badge `<span>` element in the rendered body
(NOT the `.zone__needs-adaptation-badge` selector in the <style> block).
Used to count badge emission accurately."""
return re.findall(
r'<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>',
html,
)
# ─── u5 case 1 : non-provisional zone renders pre-u5 div shape ────────
def test_u5_non_provisional_zone_renders_without_class_or_badge():
"""Default-off path. zones[i].provisional=False (or absent) must render
the zone div as `<div class="zone" ...>` with no zone--provisional class
and no needs-adaptation badge — byte-equivalent to pre-u5.
Assertions are scoped to actual zone div emissions (not the CSS
selectors in the <style> block, which always contain the strings
`.zone--provisional` and `.zone__needs-adaptation-badge`)."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": False,
}
]
html = _render_slide_base(zones)
# Scope: zone div openings only.
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert 'data-provisional="1"' not in zone_divs[0]
# No actual badge <span> element (CSS selector in style block excluded).
assert _all_badge_spans(html) == []
# Sanity: the zone div carries the canonical class.
assert 'class="zone"' in zone_divs[0]
def test_u5_zone_without_provisional_key_treated_as_non_provisional():
"""Belt-and-suspenders: a zones dict that omits the `provisional` key
entirely (Jinja2 truthy check on missing attr → falsy) must render the
same as provisional=False. Pre-u5 callers that haven't been updated
still produce valid output without crashing the template."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
# provisional key intentionally absent
}
]
html = _render_slide_base(zones)
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert _all_badge_spans(html) == []
# ─── u5 case 2 : provisional zone renders class + badge + data attr ───
def test_u5_provisional_zone_renders_class_and_badge():
"""Opt-in path. zones[i].provisional=True must:
1. Append `zone--provisional` class to the zone div.
2. Set `data-provisional="1"` data attribute (for downstream selectors).
3. Render a `<span class="zone__needs-adaptation-badge">` element with
the literal text "needs adaptation" (aria-label included for a11y).
"""
zones = [
{
"position": "single",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
}
]
html = _render_slide_base(zones)
# zone--provisional class must appear on the zone div for position=single.
assert "zone--provisional" in html
# data-provisional="1" attribute must be present.
assert 'data-provisional="1"' in html
# Badge element with the required label text.
assert 'class="zone__needs-adaptation-badge"' in html
assert "needs adaptation" in html
assert 'aria-label="needs user or AI adaptation"' in html
def test_u5_provisional_badge_appears_inside_provisional_zone_only():
"""Mixed-zone slide: one provisional zone + one normal zone. The badge
+ class must appear ONLY in the provisional zone, not bleed into the
normal one (CSS-level isolation should already prevent this, but the
template must not emit the badge for both)."""
zones = [
{
"position": "top",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": False,
},
{
"position": "bottom",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
},
]
layout_css = {
"cols": "1fr",
"rows": "1fr 1fr",
"areas": '"top" "bottom"',
}
html = _render_slide_base(
zones, layout_preset="vertical-2", layout_css=layout_css
)
# Exactly one badge span element should be present in the rendered body
# (CSS selector in <style> excluded by the helper).
assert len(_all_badge_spans(html)) == 1
# zone--provisional must appear on exactly one zone div (CSS selector
# in <style> excluded by the helper).
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 2
provisional_zone_divs = [d for d in zone_divs if "zone--provisional" in d]
assert len(provisional_zone_divs) == 1
# The provisional class must be associated with the bottom zone.
bottom_zone_open = _zone_div_for_position(html, "bottom")
assert "zone--provisional" in bottom_zone_open
assert "zone__needs-adaptation-badge" in bottom_zone_open
# The top zone must NOT carry the provisional class.
top_zone_open = _zone_div_for_position(html, "top")
assert "zone--provisional" not in top_zone_open
assert "zone__needs-adaptation-badge" not in top_zone_open
# ─── u5 case 3 : zones_data data shape contract ────────────────────────
def test_u5_zones_data_provisional_field_defaults_false_in_template():
"""Template-level fallback: even if a future zones_data builder forgets
to set provisional explicitly, the template's truthy check must not
falsely emit zone--provisional. Pin this so a template refactor cannot
silently invert the default."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": None, # explicit falsy but not False
}
]
html = _render_slide_base(zones)
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert _all_badge_spans(html) == []
def test_u5_slide_base_css_carries_provisional_marker_styles():
"""The provisional visual contract (dashed outline + striped wash + badge)
is defined in slide_base.html <style>. Pin that the relevant CSS class
selectors exist in the rendered HTML so a refactor that removes them
breaks this test rather than silently rendering an unstyled badge.
This is a class-selector existence check; it does not validate the
specific color / dash pattern, which is a design decision intentionally
left malleable (e.g., palette swap for a different theme)."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
}
]
html = _render_slide_base(zones)
# Style block must define .zone--provisional and the badge selector.
assert ".zone--provisional" in html
assert ".zone__needs-adaptation-badge" in html
# ════════════════════════════════════════════════════════════════════════
# u6 — compute_slide_status additive qualifiers
# provisional_first_render_count + provisional_first_render_units
# ════════════════════════════════════════════════════════════════════════
#
# u6 surfaces the IMP-30 first-render invariant in Step 20 slide_status.
# Contract :
# - Additive only. Top-level `overall` enum (PASS / RENDERED_WITH_VISUAL_REGRESSION /
# PARTIAL_COVERAGE / PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION) is NOT extended.
# Stage 1 Q3 lock + Codex #10 D4 (IMP-05) preservation.
# - `provisional_first_render_count` = int >= 0 — number of selected units with
# unit.provisional == True (set by u1 V4Match synthesis → u2 propagation,
# u3 last-resort fill, or u4 empty-shell synthesis).
# - `provisional_first_render_units` = list[dict] — per-unit entries mirroring
# the shape of `fallback_selections` / `adapter_needed_units` so downstream
# consumers can branch uniformly without re-deriving intent from labels.
# - Defensive `getattr` keeps the function safe when units come from legacy
# code paths that predate u2 (no .provisional attribute) — those units are
# treated as non-provisional.
from src.phase_z2_pipeline import MdxSection, compute_slide_status
def _mk_unit(*, section_ids: list[str], provisional: bool, **overrides):
"""Helper — build a real CompositionUnit for compute_slide_status tests.
Uses the production dataclass (not a stub) so the .provisional getattr
path is exercised end-to-end. Field defaults mirror what u1~u4 produce.
"""
base = dict(
source_section_ids=list(section_ids),
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="MOCK section",
v4_rank=1,
selection_path="rank_1",
fallback_reason=None,
score=1.0,
provisional=provisional,
)
base.update(overrides)
return CompositionUnit(**base)
def _mk_section(section_id: str) -> MdxSection:
"""Minimal MdxSection — only fields touched by compute_slide_status
(section_id, raw_content, title) populated; others get dataclass defaults."""
return MdxSection(
section_id=section_id,
section_num=int(section_id.lstrip("S") or "0"),
title=f"Section {section_id}",
raw_content=f"raw {section_id}",
)
# ─── u6 case 1 : no provisional units — defensive default 0 / [] ──────
def test_u6_no_provisional_units_returns_zero_and_empty_list():
"""Normal happy-path slide with all units selected via rank_1 (no IMP-30
recovery). Both u6 fields must surface as zero / empty list — defensive
default. Pre-IMP-30 callers see no behavioral change beyond the two new
keys being present in the returned dict."""
sections = [_mk_section("S1"), _mk_section("S2")]
units = [
_mk_unit(section_ids=["S1"], provisional=False),
_mk_unit(section_ids=["S2"], provisional=False),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
# Overall enum unchanged — full coverage + visual pass = PASS.
assert status["overall"] == "PASS"
# Existing IMP-05 qualifier fields remain (regression guard).
assert status["fallback_selection_count"] == 0
assert status["selection_paths"] == []
def test_u6_provisional_field_absent_is_treated_as_false():
"""Legacy code path that constructs CompositionUnit-like objects without
a .provisional attribute (or sets it to a falsy non-False value) must
NOT count as provisional. Defensive getattr in compute_slide_status keeps
the count accurate."""
sections = [_mk_section("S1")]
units = [_mk_unit(section_ids=["S1"], provisional=False)]
# Forcibly delete the attribute to simulate a legacy duck-typed unit.
# CompositionUnit is a dataclass so this exercises the getattr default.
delattr(units[0], "provisional")
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
# ─── u6 case 2 : provisional unit synthesized via u1 (chain_exhausted) ─
def test_u6_chain_exhausted_provisional_unit_listed_with_full_shape():
"""u1 synthesizes a rank-1 V4Match with provisional=True when the V4
chain is exhausted and the caller opts in. u2 propagates the flag onto
the CompositionUnit. u6 must surface this unit in
provisional_first_render_units with the full shape (source_section_ids /
phase_z_status / frame_template_id / frame_id / label / selection_path /
fallback_reason / v4_rank) so debug consumers can audit it without
re-parsing the units list.
"""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
label="restructure",
phase_z_status="extract_matched_zone",
frame_template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
v4_rank=1,
),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entries = status["provisional_first_render_units"]
assert len(entries) == 1
entry = entries[0]
assert entry["source_section_ids"] == ["S1"]
assert entry["phase_z_status"] == "extract_matched_zone"
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
assert entry["frame_id"] == "MOCK_frame_002"
assert entry["label"] == "restructure"
assert entry["selection_path"] == "provisional_rank_1"
assert entry["fallback_reason"] == "phase_z_status_not_allowed:extract_matched_zone"
assert entry["v4_rank"] == 1
# Overall enum still PASS — full coverage + visual pass + adapter=0.
# IMP-30 provisional is a qualifier, not a failure class.
assert status["overall"] == "PASS"
# ─── u6 case 3 : empty-shell unit (u4) listed with __empty__ identifiers ─
def test_u6_empty_shell_unit_listed_with_empty_identifiers():
"""u4 synthesizes a single empty-shell CompositionUnit when both the
normal greedy pass AND the provisional retry yield zero units (terminal
first-render route). u6 must list it in provisional_first_render_units
with frame_template_id/frame_id == "__empty__" + phase_z_status ==
"empty_shell" so Step 20 distinguishes terminal shell from non-shell
provisional units (chain_exhausted_provisional)."""
sections = [_mk_section("S1"), _mk_section("S2")]
shell = _mk_unit(
section_ids=["S1", "S2"],
provisional=True,
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="raw S1\n\nraw S2",
title="Section S1 / Section S2",
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
)
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, [shell], comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entry = status["provisional_first_render_units"][0]
assert entry["frame_template_id"] == "__empty__"
assert entry["frame_id"] == "__empty__"
assert entry["phase_z_status"] == "empty_shell"
assert entry["label"] == "empty_shell"
assert entry["selection_path"] == "empty_shell"
assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
assert entry["v4_rank"] is None
# IMP-87 u4 — honesty defect inversion. The shell.source_section_ids
# still feeds legacy covered_section_ids for display, but the content-
# rendered axis (u1) excludes empty-shell units, so full_mdx_coverage
# MUST flip to False. Overall (u2) MUST elevate to
# EMPTY_SHELL_NO_CONTENT before the legacy ladder, otherwise a slide
# whose sole rendered unit is __empty__ would be reported as PASS —
# the exact Stage 1 mdx05 honesty defect this issue exists to fix.
assert status["full_mdx_coverage"] is False
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──
def test_u6_mixed_selection_counts_only_provisional_units():
"""Realistic IMP-30 retry outcome: some sections covered by normal rank_1
units, others by u3 last-resort provisional fill. u6 must count ONLY
the provisional ones, NOT the normal ones. List preserves the iteration
order of the units argument (so debug.json reads top-down as the slide)."""
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
units = [
_mk_unit(section_ids=["S1"], provisional=False),
_mk_unit(
section_ids=["S2"],
provisional=True,
label="reject",
phase_z_status="fallback_candidate",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:fallback_candidate",
),
_mk_unit(section_ids=["S3"], provisional=False),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entries = status["provisional_first_render_units"]
assert len(entries) == 1
assert entries[0]["source_section_ids"] == ["S2"]
assert entries[0]["label"] == "reject"
# Normal units do NOT appear in the provisional list.
flat_sections = [sid for e in entries for sid in e["source_section_ids"]]
assert "S1" not in flat_sections
assert "S3" not in flat_sections
# ─── u6 case 5 : overall enum stability under provisional units ───────
def test_u6_overall_enum_unchanged_when_provisional_present_with_visual_pass():
"""A slide with provisional units + full coverage + visual pass still
reports overall == PASS. Stage 1 Q3 + Codex #10 D4 lock: provisional is
additive, not a new failure class. Future code that wants to gate on
provisional must read provisional_first_render_count, NOT overall."""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
label="restructure",
phase_z_status="extract_matched_zone",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["overall"] == "PASS"
assert status["provisional_first_render_count"] == 1
def test_u6_overall_enum_visual_regression_independent_of_provisional():
"""Provisional unit + full coverage + visual FAIL must still report
RENDERED_WITH_VISUAL_REGRESSION (existing enum), not a new value. Pins
that visual outcome alone drives the enum and provisional is orthogonal."""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
selection_path="provisional_rank_1",
),
]
overflow_fail = {"passed": False, "fail_reasons": ["zone overflow"]}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_fail,
adapter_needed_units=None, debug_zones=None,
)
assert status["overall"] == "RENDERED_WITH_VISUAL_REGRESSION"
assert status["provisional_first_render_count"] == 1
# ─── u6 case 6 : note field documents the new qualifier ───────────────
def test_u6_note_field_mentions_provisional_first_render_count():
"""The slide_status `note` field is a human-readable summary embedded in
Step 20 JSON / HTML. u6 must extend it with a mention of
`provisional_first_render_count > 0 = IMP-30 first-render invariant 작동`
so operators reading Step 20 see the qualifier without having to grep
the JSON keys themselves."""
sections = [_mk_section("S1")]
units = [_mk_unit(section_ids=["S1"], provisional=False)]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert "provisional_first_render_count" in status["note"]
# Existing note guidance (adapter_needed_count, content_truncated_count)
# must remain — regression guard for IMP-05 / earlier qualifier callers.
assert "adapter_needed_count" in status["note"]
assert "content_truncated_count" in status["note"]
# ════════════════════════════════════════════════════════════════════════
# u7 — broader pipeline fixture coverage for the empty-shell + provisional
# retry end-to-end (deferred from u4 verification per mid-stage
# compaction). Threads real production functions together with
# synthetic V4 fixtures so the cross-layer data flow (u1 → u2 → u3 →
# u4 → u6) is exercised in one pass, instead of relying on per-unit
# tests that each touch only one layer.
# ════════════════════════════════════════════════════════════════════════
#
# Each u7 case runs the real chain:
# synthetic V4 dict
# → lookup_v4_match_with_fallback (u1)
# → plan_composition (u2 + u3 propagation / fill)
# → compute_slide_status (u6 qualifier surface)
#
# Monkeypatched dependencies mirror tests/test_phase_z2_v4_fallback.py
# (get_contract + compute_capacity_fit). MOCK_ naming + rank-by-field
# convention preserved (Codex #10 E1).
import pytest
from src import phase_z2_pipeline as _pz_pipeline
from src.phase_z2_pipeline import (
V4_LABEL_TO_PHASE_Z_STATUS as _PROD_LABEL_TO_STATUS,
compute_slide_status as _compute_slide_status,
lookup_v4_match_with_fallback as _real_lookup,
)
# Synthetic catalog stub — only MOCK_ templates considered registered.
# Mirrors test_phase_z2_v4_fallback.py shape so the two suites stay in sync.
_U7_MOCK_CATALOG: dict[str, object] = {
"MOCK_template_direct_a": object(),
"MOCK_template_restructure_a": object(),
"MOCK_template_reject_a": object(),
}
def _u7_get_contract(template_id: str):
return _U7_MOCK_CATALOG.get(template_id)
def _u7_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
return {"fit_status": "ok"}
@pytest.fixture
def u7_patch_selector_deps(monkeypatch):
"""Monkeypatch module-level dependencies of lookup_v4_match_with_fallback.
Selector has no DI (Codex #10 E3) — module-level get_contract +
compute_capacity_fit must be patched at the pipeline module."""
monkeypatch.setattr(_pz_pipeline, "get_contract", _u7_get_contract)
monkeypatch.setattr(_pz_pipeline, "compute_capacity_fit", _u7_capacity_fit_ok)
def _u7_v4_section(judgments: list[dict]) -> dict:
return {"judgments_full32": judgments}
def _u7_j(rank: int, template_id: str, frame_id: str, label: str,
confidence: float = 0.9) -> dict:
return {
"frame_id": frame_id,
"frame_number": rank,
"template_id": template_id,
"confidence": confidence,
"label": label,
"v4_full_rank": rank,
}
def _u7_section(section_id: str) -> _pz_pipeline.MdxSection:
return _pz_pipeline.MdxSection(
section_id=section_id,
section_num=int(section_id.lstrip("S") or "0"),
title=f"Section {section_id}",
raw_content=f"- bullet for {section_id}\n",
)
# ─── u7 case 1 : e2e chain_exhausted → provisional retry → slide_status ──
def test_u7_e2e_chain_exhausted_provisional_flows_through_layers(
u7_patch_selector_deps,
):
"""End-to-end: a section whose rank-1..3 are all restructure/reject must
surface as a provisional unit when both opt-in flags are on, and the
provisional flag must propagate cleanly through V4Match (u1) →
CompositionUnit (u2) → select_composition_units provisional fill (u3) →
compute_slide_status qualifier (u6).
This mirrors the production pipeline.py:3262 _lookup_fn_provisional +
plan_composition(allow_provisional_fill=True) recovery path (u4 Phase A).
"""
v4 = {
"mdx_sections": {
# S1 — auto-renderable, normal rank-1 selection.
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
# S2 — chain exhausted (all restructure / reject).
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
_u7_j(2, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
]),
}
}
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n- b\n",
allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# Recovery succeeded: 2 units (S1 normal + S2 provisional fill).
by_section = {u.source_section_ids[0]: u for u in units}
assert set(by_section) == {"S1", "S2"}
assert by_section["S1"].provisional is False
assert by_section["S1"].label == "use_as_is"
assert by_section["S2"].provisional is True
assert by_section["S2"].label == "restructure"
assert by_section["S2"].selection_path == "provisional_rank_1"
assert layout_preset == "horizontal-2"
# u6 qualifier surface — only S2 counted.
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entry = status["provisional_first_render_units"][0]
assert entry["source_section_ids"] == ["S2"]
assert entry["selection_path"] == "provisional_rank_1"
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
# overall enum unchanged — full coverage + visual pass = PASS.
assert status["overall"] == "PASS"
# ─── u7 case 2 : e2e zero-V4 → u4 empty-shell synthesis → slide_status ───
def test_u7_e2e_zero_v4_empty_shell_status_surface(u7_patch_selector_deps):
"""End-to-end zero-V4 path: when V4 has no usable judgments for any
section, plan_composition (even with both opt-in flags on) yields zero
units, and the u4 Phase B empty-shell synthesis kicks in. The synthesized
shell must reach compute_slide_status with provisional=True +
phase_z_status='empty_shell', so Step 20 reports the first-render
invariant outcome without altering overall enum.
"""
v4 = {"mdx_sections": {}} # nothing matches any section
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n",
allow_provisional=True,
)
return match
units_first, preset_first, _ = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# No V4 evidence anywhere — recovery cannot fabricate a match, so units
# stays empty. This is the trigger condition for u4 Phase B (empty-shell).
assert units_first == []
assert preset_first is None
# Simulate the production u4 Phase B synthesis (pipeline.py:3325~).
empty_shell = CompositionUnit(
source_section_ids=[s.section_id for s in sections],
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="\n\n".join(s.raw_content for s in sections),
title=" / ".join(s.title for s in sections),
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
provisional=True,
)
status = _compute_slide_status(
sections, [empty_shell], comp_debug={"candidates_summary": []},
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
shell_entry = status["provisional_first_render_units"][0]
assert shell_entry["phase_z_status"] == "empty_shell"
assert shell_entry["frame_template_id"] == "__empty__"
assert shell_entry["source_section_ids"] == ["S1", "S2"]
# IMP-87 u4 — honesty defect inversion. The shell unit still attaches
# both sections to legacy covered_section_ids (display preserved), but
# the content-rendered axis (u1) excludes empty-shell units, so
# full_mdx_coverage MUST flip to False. Overall (u2) MUST elevate to
# EMPTY_SHELL_NO_CONTENT before the legacy 4-way ladder, so a zero-V4
# slide cannot disguise itself as PASS through visual-overflow alone.
assert status["full_mdx_coverage"] is False
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────
def test_u7_e2e_normal_path_unchanged_with_opt_in_flags(u7_patch_selector_deps):
"""IMP-05 regression guard at e2e level. When every section has an
auto-renderable rank-1 match, turning BOTH opt-in flags on (allow_provisional
at the lookup function + allow_provisional_fill at plan_composition) must
not produce any provisional unit. The normal greedy pass owns every
section, leaving the provisional fill pool with nothing to cover.
"""
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
]),
}
}
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n",
allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
assert {u.source_section_ids[0] for u in units} == {"S1", "S2"}
assert all(u.provisional is False for u in units)
assert all(u.selection_path == "rank_1" for u in units)
assert layout_preset == "horizontal-2"
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
assert status["overall"] == "PASS"
# ─── u7 case 4 : IMP-30 invariants — MDX content preserved + audit trail ──
def test_u7_imp30_invariants_mdx_preserved_and_audit_trail_surfaced(
u7_patch_selector_deps,
):
"""Issue-body invariants consolidation:
- 'MDX content preserved (no rewrite)' → provisional unit's
raw_content equals the source section's raw_content byte-for-byte.
- comp_debug audit trail surfaces 'selected_provisional' so the
recovery is observable (not silent — per IMP-30 scope-lock
'Telemetry: degraded outcomes must surface in slide_status').
"""
raw_s2 = "- restructure-only bullet alpha\n- restructure-only bullet beta\n"
section_s2 = _pz_pipeline.MdxSection(
section_id="S2", section_num=2, title="Section S2",
raw_content=raw_s2,
)
sections = [_u7_section("S1"), section_s2]
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
]),
}
}
def lookup_fn(sid: str):
# raw_content threaded through real lookup — provisional V4Match
# must carry the section's actual raw_content (no compression).
raw = sections[0].raw_content if sid == "S1" else raw_s2
match, _trace = _real_lookup(
v4, sid, raw_content=raw, allow_provisional=True,
)
return match
units, _preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
by_section = {u.source_section_ids[0]: u for u in units}
assert by_section["S2"].provisional is True
# IMP-30 contract: MDX content preserved through u1→u2→u3 path.
assert by_section["S2"].raw_content == raw_s2
# Audit-trail surface: candidates_summary must record S2's provisional
# pick as 'selected_provisional' (selection_state set in
# src/phase_z2_composition.py:862 _candidate_state).
summary = comp_debug.get("candidates_summary", [])
s2_selected = [
e for e in summary
if e["source_section_ids"] == ["S2"]
and e["selection_state"] == "selected_provisional"
]
assert len(s2_selected) == 1
assert s2_selected[0]["template_id"] == "MOCK_template_restructure_a"
assert s2_selected[0]["selection_path"] == "provisional_rank_1"
# ─── u7 case 5 : all-restructure/reject → every section gets placeholder ──
def test_u7_imp30_all_restructure_only_each_section_gets_provisional_unit(
u7_patch_selector_deps,
):
"""Issue-body invariant: 'restructure / reject 만 있는 section 도
*placeholder zone + trace*'. When EVERY section is restructure/reject,
the recovery must yield one provisional unit per section (no zero-unit
abort path (a), no chain_exhausted swallow at path (b)).
"""
sections = [_u7_section(sid) for sid in ("S1", "S2", "S3")]
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
]),
"S3": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_013", "restructure"),
]),
}
}
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- bullet\n", allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# Every section must be covered by exactly one provisional unit —
# the issue-body 'placeholder zone + trace' contract.
by_section = {u.source_section_ids[0]: u for u in units}
assert set(by_section) == {"S1", "S2", "S3"}
assert all(u.provisional is True for u in units)
assert layout_preset is not None # path (a) abort guard bypassed
# All three audit entries must report selected_provisional.
summary = comp_debug.get("candidates_summary", [])
selected_provisional_sids = {
e["source_section_ids"][0]
for e in summary
if e["selection_state"] == "selected_provisional"
}
assert selected_provisional_sids == {"S1", "S2", "S3"}
# Step 20 surface: all three sections counted as provisional, overall
# enum unchanged (qualifier-not-enum per IMP-05 Codex #10 D4).
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 3
assert {
e["source_section_ids"][0]
for e in status["provisional_first_render_units"]
} == {"S1", "S2", "S3"}
assert status["overall"] == "PASS"