Files
C.E.L_Slide_test2/tests/test_phase_z2_imp30_first_render.py
kyeongmin b9747c2f4a
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 21s
feat(#84): IMP-84 u1~u3 silent automation policy enforcement (FramePanel reject confirm + slide_base provisional badge/outline + IMP-30 visual assertions inverted)
- u1 FramePanel.tsx: extract `applyFrameSelection(candidate, onFrameSelect)`
  pure helper; collapse `handleFrameSelect` to direct onFrameSelect for every
  V4 label; drop `window.confirm` reject popup (IMP-47B u11 regression noise
  per `feedback_auto_pipeline_first`). New vitest pin `imp84_framepanel_reject_silent.test.ts`
  covers helper invocation across all 4 V4 labels + source-presence pins.
- u2 templates/phase_z2/slide_base.html: delete `.zone--provisional` CSS,
  `.zone__needs-adaptation-badge` CSS, the zone--provisional class fragment
  in the zone div, and the badge `<span>` render at the provisional zone.
  Preserve `data-provisional="1"` attribute as silent telemetry. New pytest
  `tests/phase_z2/test_imp84_provisional_silent_render.py` pins the silent
  contract independently of the IMP-30 first-render file.
- u3 tests/test_phase_z2_imp30_first_render.py: invert the three IMP-30 u5
  positive provisional-visual assertions to IMP-84 silent-contract negatives
  (no class, no badge, no CSS selectors); preserve positive `data-provisional`
  telemetry assertions. Docstrings updated to IMP-84 silent contract.

Out of scope (Round #4 + #92 contract): Home.tsx `toast.error(aiReviewMsg)`
call line, designAgentApi.ts `api_error_kinds`/`api_error_kind` schema and
operational-only formatter, FramePanel reject badge/tooltip read-only labels
(L102/L147/L156), and backend `zone.provisional` flag emission.

Stage 4 PASS: u1 vitest 10/10, u2 pytest 5/5, u3 pytest 29/29 (incl. 3
IMP-84 inverted assertions: `test_imp84_provisional_zone_silent_no_class_no_badge`,
`test_imp84_provisional_badge_never_rendered_in_mixed_zones`,
`test_imp84_slide_base_css_strips_provisional_visual_selectors`).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-26 14:15:02 +09:00

1583 lines
64 KiB
Python

"""IMP-30 first-render invariant tests (per-unit slice).
This file is the shared regression home for IMP-30 units u2~u7. Each
implementation unit adds its own focused tests; u7 (regression coverage)
will broaden the surface (synthetic V4 fixtures for chain_exhausted
provisional, zero-V4 empty-shell, normal-path unchanged).
u3 scope (this slice) — select_composition_units last-resort provisional
fill for uncovered sections + _candidate_state "selected_provisional":
1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard).
2. opt-in fills uncovered sections with provisional candidates whose
phase_z_status would otherwise be filter_status.
3. opt-in never displaces normal greedy selections.
4. opt-in respects coverage non-overlap (no section selected twice).
5. plan_composition._candidate_state returns "selected_provisional"
for fills and "selected" for normal greedy picks.
Synthetic naming convention (Codex #10 E1):
- MOCK_ prefix mandatory
- _a / _b suffixes = enumeration only (NOT ordering / priority)
- rank/order expressed by V4 rank field, NEVER ID suffix
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
from src.phase_z2_composition import (
CompositionUnit,
plan_composition,
select_composition_units,
)
# ─── Synthetic match shape (duck-typed V4Match-like) ───────────────────
@dataclass
class _StubV4Match:
"""Duck-typed V4Match surface used by collect_candidates / score path.
Mirrors src.phase_z2_pipeline.V4Match fields touched by composition:
template_id / frame_id / frame_number / confidence / label / v4_rank /
selection_path / fallback_reason / provisional. Composition module
intentionally does not import V4Match (circular dep avoidance), so a
plain stub object with the same attributes is the contract.
"""
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
provisional: bool = False
@dataclass
class _StubSection:
"""Minimal section surface used by collect_candidates (section_id /
raw_content / title). Matches MdxSection's attribute names without
importing pipeline (keeps test isolated to composition module)."""
section_id: str
title: str = ""
raw_content: str = ""
# Phase Z status mapping fixture — only the keys exercised here are listed.
# Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this
# stub deliberately mirrors only what the tests touch.
_LABEL_TO_STATUS = {
"use_as_is": "matched_zone",
"light_edit": "adapt_matched_zone",
"restructure": "extract_matched_zone",
"reject": "fallback_candidate",
}
_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}
# ─── Helpers ────────────────────────────────────────────────────────────
def _make_lookup(matches_by_section: dict[str, _StubV4Match]):
"""Return v4_lookup_fn (section_id -> _StubV4Match | None)."""
def _fn(section_id: str):
return matches_by_section.get(section_id)
return _fn
def _make_candidates_lookup_empty():
"""v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here)."""
def _fn(section_id: str):
return []
return _fn
# ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ─────────
def test_u3_default_off_preserves_imp05_behavior():
"""IMP-05 regression guard. With allow_provisional_fill=False (default),
select_composition_units must yield the same units as pre-u3 even when
provisional candidates exist in the pool.
Setup:
- S1: use_as_is + provisional=False (normal selection)
- S2: restructure + provisional=True (would be fill-eligible)
Expected (default-off):
- units = [S1 unit] only. S2 stays uncovered.
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
# allow_provisional_fill omitted → default False
)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
assert units[0].provisional is False
assert preset == "single"
# S2 candidate must still appear in debug summary as filtered_status
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
# ─── u3 case 2 : opt-in fills uncovered sections with provisional ──────
def test_u3_opt_in_fills_uncovered_with_provisional():
"""IMP-30 u3 — opt-in path.
Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but
with allow_provisional_fill=True. S2 must be filled as
selected_provisional unit; greedy S1 selection unchanged.
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
# Both sections must be covered now
section_ids = {sid for u in units for sid in u.source_section_ids}
assert section_ids == {"S1", "S2"}
# Identify which unit covers which section
by_section = {tuple(u.source_section_ids): u for u in units}
s1_unit = by_section[("S1",)]
s2_unit = by_section[("S2",)]
# Normal greedy pick — provisional flag stays False
assert s1_unit.provisional is False
# Provisional fill — provisional flag carried from V4Match (u1) via u2
assert s2_unit.provisional is True
assert s2_unit.label == "restructure"
# Layout preset reflects 2-unit count
assert preset == "horizontal-2"
# ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─
def test_u3_candidate_state_marks_selected_provisional():
"""plan_composition._candidate_state must return:
- "selected" for normal greedy picks
- "selected_provisional" for last-resort fills
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
selection_path="provisional_rank_1",
provisional=True,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S1",)]["selection_state"] == "selected"
assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional"
# ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ─────
def test_u3_opt_in_respects_coverage_non_overlap():
"""Provisional fill must not pick a candidate whose source_section_ids
overlap with already-covered sections.
Setup:
- S1 use_as_is (normal selection)
- S2 restructure provisional (eligible for fill)
- parent_merged_inferred over [S1, S2] with provisional=True
(synthetic — would normally not exist, but stresses non-overlap)
With allow_provisional_fill=True, the [S1,S2] provisional merge must
NOT be selected (S1 already covered by normal pick).
"""
# 2 children with derive_parent_id → "S" parent. But derive_parent_id
# only triggers on "<base>-sub-<n>" or "<id>-<suffix>.<sub>". Use the
# canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible).
sections = [
_StubSection("P-sub-1", raw_content="alpha"),
_StubSection("P-sub-2", raw_content="beta"),
]
matches = {
"P-sub-1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
"P-sub-2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
provisional=True,
),
# No parent V4 → branch 3 may synthesize parent_merged_inferred
# if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1
# (higher confidence) → rep_match.provisional=False, so the inferred
# merge is NOT provisional. The normal greedy pass should prefer
# the single P-sub-1 (same score, but inferred merge has coverage
# tiebreak win). Test asserts: covered set is exact, no double-fill.
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
covered = []
for u in units:
covered.extend(u.source_section_ids)
# No section appears twice — non-overlap invariant
assert len(covered) == len(set(covered))
# Both sections covered exactly once
assert set(covered) == {"P-sub-1", "P-sub-2"}
# ─── u3 case 5 : opt-in with no provisional candidates is a no-op ──────
def test_u3_opt_in_noop_when_no_provisional_candidates():
"""allow_provisional_fill=True with zero provisional candidates must
behave identically to default-off. No fill is forced; uncovered sections
simply remain uncovered (u4 owns the zero-unit empty-shell terminal).
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001", frame_number=1,
confidence=0.9, label="use_as_is", v4_rank=1,
),
# S2: restructure but NOT provisional (e.g., pipeline did not opt
# into u1 allow_provisional, or section had real rank-1 restructure)
"S2": _StubV4Match(
template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002", frame_number=2,
confidence=0.65, label="restructure", v4_rank=1,
provisional=False,
),
}
units, preset, debug = plan_composition(
sections,
_make_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
assert preset == "single"
# S2 remains filter_status — not provisional, so u3 fill ignores it
summary_by_section = {
tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
}
assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"
# ─── u3 case 6 : select_composition_units direct invocation parity ─────
def test_u3_select_composition_units_default_off_signature():
"""Direct invocation without keyword-only u3 args must remain valid
(backward-compat for existing callers that import the function directly).
"""
# Build a minimal CompositionUnit by hand — bypass collect_candidates.
c1 = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
units = select_composition_units([c1], _ALLOWED_STATUSES)
assert len(units) == 1
assert units[0].source_section_ids == ["S1"]
def test_u3_select_composition_units_opt_in_direct():
"""Direct invocation with u3 opt-in must fill uncovered section from
provisional candidate pool, leaving greedy pick untouched.
"""
c_greedy = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
c_provisional = CompositionUnit(
source_section_ids=["S2"],
merge_type="single",
frame_template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002",
frame_number=2,
confidence=0.65,
label="restructure",
phase_z_status="extract_matched_zone",
raw_content="beta",
title="S2",
provisional=True,
)
units = select_composition_units(
[c_greedy, c_provisional],
_ALLOWED_STATUSES,
all_section_ids=["S1", "S2"],
allow_provisional_fill=True,
)
assert len(units) == 2
by_section = {tuple(u.source_section_ids): u for u in units}
assert by_section[("S1",)].provisional is False
assert by_section[("S2",)].provisional is True
# ════════════════════════════════════════════════════════════════════════
# u4 — pipeline abort guard empty-shell synthesis
# ════════════════════════════════════════════════════════════════════════
#
# u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort
# guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3)
# then terminal empty-shell (Phase B). The shell is a single CompositionUnit
# with frame_template_id="__empty__" and preset="single"; the per-unit
# for-loop's __empty__ branch bypasses mapper/contract and emits a
# placeholder zones_data/debug_zones record so final.html still writes.
#
# These tests verify the composition-side invariants that u4 relies on:
# - CompositionUnit can be constructed in the empty-shell shape.
# - The shell shape carries the data needed for u5 (provisional flag) /
# u6 (status qualifier) / render_slide __empty__ branch (template_id).
# The pipeline-level integration (provisional retry / empty-shell synthesis
# at the abort guard, plus the per-unit __empty__ bypass) is covered by
# u7 (regression coverage) with synthetic V4 fixtures.
def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis():
"""The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort
guard must carry the field shape downstream consumers (per-unit
__empty__ branch, compute_slide_status, slide_base template) rely on.
Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block):
- frame_template_id == "__empty__" → render_slide short-circuits
partial_html to "" (existing __empty__ branch at line 2106).
- phase_z_status == "empty_shell" → Step 20 distinguishes from
matched_zone / adapt_matched_zone / extract_matched_zone /
fallback_candidate (u6 surfaces this as additive qualifier).
- provisional == True → u5 zone--provisional class + needs-adaptation
badge (template-side wiring).
- source_section_ids covers all aligned section ids → compute_slide_status
treats every section as "covered by the shell" (u6 marks the count
of provisional_first_render_units).
- selection_path == "empty_shell" / fallback_reason set → audit trace
survives in step06_composition_plan.json.
"""
aligned_section_ids = ["S1", "S2", "S3"]
raw_contents = ["alpha", "beta", "gamma"]
titles = ["First", "Second", "Third"]
shell = CompositionUnit(
source_section_ids=list(aligned_section_ids),
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="\n\n".join(raw_contents),
title=" / ".join(titles),
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
rationale={
"imp30_u4": "terminal_first_render_empty_shell",
"reason": "no_rank_1_V4_evidence_in_any_section",
"aligned_section_ids": aligned_section_ids,
},
provisional=True,
)
assert shell.frame_template_id == "__empty__"
assert shell.frame_id == "__empty__"
assert shell.label == "empty_shell"
assert shell.phase_z_status == "empty_shell"
assert shell.provisional is True
assert shell.selection_path == "empty_shell"
assert shell.fallback_reason == "no_v4_rank_1_for_any_section"
assert shell.source_section_ids == aligned_section_ids
assert shell.v4_rank is None
assert shell.confidence == 0.0
assert shell.score == 0.0
# MDX content preserved (no rewrite) — full raw content kept in the unit
# even though no V4 mapping is applied. Adaptation deferred to IMP-31.
assert shell.raw_content == "alpha\n\nbeta\n\ngamma"
# Rationale carries the audit trail consumed by Step 6 artifact + u6.
assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell"
assert shell.rationale["aligned_section_ids"] == aligned_section_ids
def test_u4_empty_shell_unit_default_provisional_is_false():
"""Smoke test — provisional flag is opt-in. A plain CompositionUnit
(no explicit provisional=True) does NOT mark itself as empty-shell.
Guards against accidental positive on normal units when u5 / u6 read
unit.provisional.
"""
normal = CompositionUnit(
source_section_ids=["S1"],
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="S1",
)
assert normal.provisional is False
assert normal.frame_template_id != "__empty__"
def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed():
"""The empty-shell unit's phase_z_status ('empty_shell') must NOT be
inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over
units filtered by allowed_statuses would treat the shell as a normal
matched zone — defeating the "needs adaptation" signal.
This test pins the contract at the composition-test level so a status
rename in the pipeline cannot silently leak the shell into normal flows.
"""
# _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES
# ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct
# status so downstream filters reject it.
assert "empty_shell" not in _ALLOWED_STATUSES
# ════════════════════════════════════════════════════════════════════════
# u5 — zones_data carries provisional flag; slide_base.html zone div adds
# zone--provisional class + inline needs-adaptation badge
# ════════════════════════════════════════════════════════════════════════
#
# u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in
# u1, or directly by u4 empty-shell synthesis) through the zones_data payload
# into the slide_base.html template. Visual contract:
# - zones_data[i]['provisional'] = bool (default False; True only for IMP-30
# opt-in synthesized units).
# - slide_base.html zone div gets `zone--provisional` class when True; an
# inline `<span class="zone__needs-adaptation-badge">needs adaptation</span>`
# element is rendered inside the zone (top-right corner via absolute pos).
# - data-provisional="1" attribute set for downstream selectors / overflow
# checker / e2e tooling.
#
# The composition / pipeline-level handoff is exercised by u3 / u4 already.
# u5 tests focus on:
# - template-rendering output: class + badge HTML correctly emitted ONLY when
# zones[i].provisional is truthy. (default-off path unchanged.)
# - byte-equivalence: non-provisional zones render the same div shape as
# pre-u5 (just no zone--provisional class / no badge element).
import re
from pathlib import Path
from jinja2 import Environment, FileSystemLoader, select_autoescape
# ─── u5 helpers ────────────────────────────────────────────────────────
def _render_slide_base(zones: list[dict], *, layout_preset: str = "single",
layout_css: dict | None = None) -> str:
"""Render templates/phase_z2/slide_base.html directly via Jinja2 with a
minimal zones list. Bypasses render_slide() so u5 can exercise the
template-only contract without spinning up the full pipeline (no mapper,
no contracts, no token CSS loader). slot_payload / partial_html are
stubbed to fixed strings so the test focuses on zone div attributes."""
template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2"
env = Environment(
loader=FileSystemLoader(str(template_dir)),
autoescape=select_autoescape(["html"]),
)
if layout_css is None:
layout_css = {
"cols": "1fr",
"rows": "1fr",
"areas": '"single"',
}
# Each zone needs a partial_html (render_slide normally populates this).
# Use a stable placeholder per zone so the assertion can target zone-level
# attributes without coupling to frame template internals.
for z in zones:
z.setdefault("partial_html", "<div class=\"_stub_partial\">stub</div>")
base = env.get_template("slide_base.html")
return base.render(
slide_title="IMP-30 u5 test slide",
slide_footer=None,
zones=zones,
layout_preset=layout_preset,
layout_css=layout_css,
gap_px=12,
token_css="", # empty token CSS — not under test here
embedded_mode="standalone",
)
def _zone_div_for_position(html: str, position: str) -> str:
"""Return the opening tag + immediate inner content (up to but not
including partial_html) for the zone div at a given `data-zone-position`
value. Tight enough for class/attribute assertions, lenient enough not
to depend on partial_html internals."""
pattern = re.compile(
r'<div class="zone[^"]*"\s+data-zone-position="' + re.escape(position) + r'"[^>]*>'
r'(?:\s*<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>)?',
re.DOTALL,
)
match = pattern.search(html)
if not match:
return ""
return match.group(0)
def _all_zone_div_openings(html: str) -> list[str]:
"""Return every zone-div opening tag in the layout body. Used to scope
class / attribute assertions away from the CSS <style> block (which
contains `.zone--provisional` / `.zone__needs-adaptation-badge` as
selectors — must not be mistaken for zone-div class emissions)."""
return re.findall(
r'<div class="zone[^"]*"[^>]*data-zone-position="[^"]*"[^>]*>',
html,
)
def _all_badge_spans(html: str) -> list[str]:
"""Return every actual badge `<span>` element in the rendered body
(NOT the `.zone__needs-adaptation-badge` selector in the <style> block).
Used to count badge emission accurately."""
return re.findall(
r'<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>',
html,
)
# ─── u5 case 1 : non-provisional zone renders pre-u5 div shape ────────
def test_u5_non_provisional_zone_renders_without_class_or_badge():
"""Default-off path. zones[i].provisional=False (or absent) must render
the zone div as `<div class="zone" ...>` with no zone--provisional class
and no needs-adaptation badge — byte-equivalent to pre-u5.
Assertions are scoped to actual zone div emissions (not the CSS
selectors in the <style> block, which always contain the strings
`.zone--provisional` and `.zone__needs-adaptation-badge`)."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": False,
}
]
html = _render_slide_base(zones)
# Scope: zone div openings only.
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert 'data-provisional="1"' not in zone_divs[0]
# No actual badge <span> element (CSS selector in style block excluded).
assert _all_badge_spans(html) == []
# Sanity: the zone div carries the canonical class.
assert 'class="zone"' in zone_divs[0]
def test_u5_zone_without_provisional_key_treated_as_non_provisional():
"""Belt-and-suspenders: a zones dict that omits the `provisional` key
entirely (Jinja2 truthy check on missing attr → falsy) must render the
same as provisional=False. Pre-u5 callers that haven't been updated
still produce valid output without crashing the template."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
# provisional key intentionally absent
}
]
html = _render_slide_base(zones)
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert _all_badge_spans(html) == []
# ─── u5 case 2 : provisional zone renders class + badge + data attr ───
def test_imp84_provisional_zone_silent_no_class_no_badge():
"""IMP-84 silent-automation inversion of the prior IMP-30 u5 contract.
Under the silent contract, zones[i].provisional=True must:
1. NOT append `zone--provisional` class to the zone div (no user-visible
outline / striped wash).
2. Still set `data-provisional="1"` data attribute as silent telemetry
for downstream selectors / inspection.
3. NOT render any `<span class="zone__needs-adaptation-badge">` element
and NOT surface the literal text "needs adaptation" or its
aria-label (no user-facing badge).
Scope: assertions target the zone div body. The CSS <style> block must
likewise not carry the removed visual selectors — that surface is pinned
in `test_imp84_slide_base_css_strips_provisional_visual_selectors` below.
"""
zones = [
{
"position": "single",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
}
]
html = _render_slide_base(zones)
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
# No zone--provisional class on the zone div (visual removed).
assert "zone--provisional" not in zone_divs[0]
# data-provisional="1" attribute still present as silent telemetry.
assert 'data-provisional="1"' in zone_divs[0]
# No badge <span> element and no badge label text anywhere in the body.
assert _all_badge_spans(html) == []
assert "needs adaptation" not in html
assert 'aria-label="needs user or AI adaptation"' not in html
def test_imp84_provisional_badge_never_rendered_in_mixed_zones():
"""IMP-84 silent-automation inversion of the prior IMP-30 u5 mixed-zone
contract. Mixed-zone slide: one provisional zone + one normal zone. The
silent contract requires that NO badge span and NO `zone--provisional`
class be emitted on either zone div. The provisional zone is identifiable
only through the silent `data-provisional="1"` telemetry attribute, which
must be scoped to the provisional zone alone (no bleed onto the normal
zone)."""
zones = [
{
"position": "top",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": False,
},
{
"position": "bottom",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
},
]
layout_css = {
"cols": "1fr",
"rows": "1fr 1fr",
"areas": '"top" "bottom"',
}
html = _render_slide_base(
zones, layout_preset="vertical-2", layout_css=layout_css
)
# No badge <span> element should be rendered anywhere in the body
# (silent-automation policy).
assert _all_badge_spans(html) == []
# No zone div should carry the zone--provisional class (visual removed).
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 2
assert all("zone--provisional" not in d for d in zone_divs)
# data-provisional="1" telemetry must be present on the bottom (provisional)
# zone only — never on the top (non-provisional) zone.
bottom_zone_open = _zone_div_for_position(html, "bottom")
assert 'data-provisional="1"' in bottom_zone_open
assert "zone--provisional" not in bottom_zone_open
assert "zone__needs-adaptation-badge" not in bottom_zone_open
top_zone_open = _zone_div_for_position(html, "top")
assert 'data-provisional="1"' not in top_zone_open
assert "zone--provisional" not in top_zone_open
assert "zone__needs-adaptation-badge" not in top_zone_open
# ─── u5 case 3 : zones_data data shape contract ────────────────────────
def test_u5_zones_data_provisional_field_defaults_false_in_template():
"""Template-level fallback: even if a future zones_data builder forgets
to set provisional explicitly, the template's truthy check must not
falsely emit zone--provisional. Pin this so a template refactor cannot
silently invert the default."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_direct_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": None, # explicit falsy but not False
}
]
html = _render_slide_base(zones)
zone_divs = _all_zone_div_openings(html)
assert len(zone_divs) == 1
assert "zone--provisional" not in zone_divs[0]
assert _all_badge_spans(html) == []
def test_imp84_slide_base_css_strips_provisional_visual_selectors():
"""IMP-84 silent-automation inversion of the prior IMP-30 u5 CSS-presence
contract. The provisional visual treatment (dashed outline + striped wash
+ badge) was deleted from `slide_base.html <style>` by IMP-84 u2. Pin
that the CSS class selectors `.zone--provisional` and
`.zone__needs-adaptation-badge` no longer appear in the rendered HTML —
a refactor that re-introduces them must break this test rather than
silently re-surfacing the removed visual signal.
Scope: the assertion targets the entire rendered HTML (style block plus
body). Since the body badge span is also gone (covered separately above),
any occurrence of these strings in the rendered output would only come
from a regressed style block."""
zones = [
{
"position": "single",
"template_id": "MOCK_template_restructure_a",
"slot_payload": {},
"content_weight": {"score": 1},
"min_height_px": 100,
"provisional": True,
}
]
html = _render_slide_base(zones)
# Style block must NOT define .zone--provisional or the badge selector.
assert ".zone--provisional" not in html
assert ".zone__needs-adaptation-badge" not in html
# ════════════════════════════════════════════════════════════════════════
# u6 — compute_slide_status additive qualifiers
# provisional_first_render_count + provisional_first_render_units
# ════════════════════════════════════════════════════════════════════════
#
# u6 surfaces the IMP-30 first-render invariant in Step 20 slide_status.
# Contract :
# - Additive only. Top-level `overall` enum (PASS / RENDERED_WITH_VISUAL_REGRESSION /
# PARTIAL_COVERAGE / PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION) is NOT extended.
# Stage 1 Q3 lock + Codex #10 D4 (IMP-05) preservation.
# - `provisional_first_render_count` = int >= 0 — number of selected units with
# unit.provisional == True (set by u1 V4Match synthesis → u2 propagation,
# u3 last-resort fill, or u4 empty-shell synthesis).
# - `provisional_first_render_units` = list[dict] — per-unit entries mirroring
# the shape of `fallback_selections` / `adapter_needed_units` so downstream
# consumers can branch uniformly without re-deriving intent from labels.
# - Defensive `getattr` keeps the function safe when units come from legacy
# code paths that predate u2 (no .provisional attribute) — those units are
# treated as non-provisional.
from src.phase_z2_pipeline import MdxSection, compute_slide_status
def _mk_unit(*, section_ids: list[str], provisional: bool, **overrides):
"""Helper — build a real CompositionUnit for compute_slide_status tests.
Uses the production dataclass (not a stub) so the .provisional getattr
path is exercised end-to-end. Field defaults mirror what u1~u4 produce.
"""
base = dict(
source_section_ids=list(section_ids),
merge_type="single",
frame_template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.9,
label="use_as_is",
phase_z_status="matched_zone",
raw_content="alpha",
title="MOCK section",
v4_rank=1,
selection_path="rank_1",
fallback_reason=None,
score=1.0,
provisional=provisional,
)
base.update(overrides)
return CompositionUnit(**base)
def _mk_section(section_id: str) -> MdxSection:
"""Minimal MdxSection — only fields touched by compute_slide_status
(section_id, raw_content, title) populated; others get dataclass defaults."""
return MdxSection(
section_id=section_id,
section_num=int(section_id.lstrip("S") or "0"),
title=f"Section {section_id}",
raw_content=f"raw {section_id}",
)
# ─── u6 case 1 : no provisional units — defensive default 0 / [] ──────
def test_u6_no_provisional_units_returns_zero_and_empty_list():
"""Normal happy-path slide with all units selected via rank_1 (no IMP-30
recovery). Both u6 fields must surface as zero / empty list — defensive
default. Pre-IMP-30 callers see no behavioral change beyond the two new
keys being present in the returned dict."""
sections = [_mk_section("S1"), _mk_section("S2")]
units = [
_mk_unit(section_ids=["S1"], provisional=False),
_mk_unit(section_ids=["S2"], provisional=False),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
# Overall enum unchanged — full coverage + visual pass = PASS.
assert status["overall"] == "PASS"
# Existing IMP-05 qualifier fields remain (regression guard).
assert status["fallback_selection_count"] == 0
assert status["selection_paths"] == []
def test_u6_provisional_field_absent_is_treated_as_false():
"""Legacy code path that constructs CompositionUnit-like objects without
a .provisional attribute (or sets it to a falsy non-False value) must
NOT count as provisional. Defensive getattr in compute_slide_status keeps
the count accurate."""
sections = [_mk_section("S1")]
units = [_mk_unit(section_ids=["S1"], provisional=False)]
# Forcibly delete the attribute to simulate a legacy duck-typed unit.
# CompositionUnit is a dataclass so this exercises the getattr default.
delattr(units[0], "provisional")
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
# ─── u6 case 2 : provisional unit synthesized via u1 (chain_exhausted) ─
def test_u6_chain_exhausted_provisional_unit_listed_with_full_shape():
"""u1 synthesizes a rank-1 V4Match with provisional=True when the V4
chain is exhausted and the caller opts in. u2 propagates the flag onto
the CompositionUnit. u6 must surface this unit in
provisional_first_render_units with the full shape (source_section_ids /
phase_z_status / frame_template_id / frame_id / label / selection_path /
fallback_reason / v4_rank) so debug consumers can audit it without
re-parsing the units list.
"""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
label="restructure",
phase_z_status="extract_matched_zone",
frame_template_id="MOCK_template_restructure_a",
frame_id="MOCK_frame_002",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
v4_rank=1,
),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entries = status["provisional_first_render_units"]
assert len(entries) == 1
entry = entries[0]
assert entry["source_section_ids"] == ["S1"]
assert entry["phase_z_status"] == "extract_matched_zone"
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
assert entry["frame_id"] == "MOCK_frame_002"
assert entry["label"] == "restructure"
assert entry["selection_path"] == "provisional_rank_1"
assert entry["fallback_reason"] == "phase_z_status_not_allowed:extract_matched_zone"
assert entry["v4_rank"] == 1
# Overall enum still PASS — full coverage + visual pass + adapter=0.
# IMP-30 provisional is a qualifier, not a failure class.
assert status["overall"] == "PASS"
# ─── u6 case 3 : empty-shell unit (u4) listed with __empty__ identifiers ─
def test_u6_empty_shell_unit_listed_with_empty_identifiers():
"""u4 synthesizes a single empty-shell CompositionUnit when both the
normal greedy pass AND the provisional retry yield zero units (terminal
first-render route). u6 must list it in provisional_first_render_units
with frame_template_id/frame_id == "__empty__" + phase_z_status ==
"empty_shell" so Step 20 distinguishes terminal shell from non-shell
provisional units (chain_exhausted_provisional)."""
sections = [_mk_section("S1"), _mk_section("S2")]
shell = _mk_unit(
section_ids=["S1", "S2"],
provisional=True,
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="raw S1\n\nraw S2",
title="Section S1 / Section S2",
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
)
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, [shell], comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entry = status["provisional_first_render_units"][0]
assert entry["frame_template_id"] == "__empty__"
assert entry["frame_id"] == "__empty__"
assert entry["phase_z_status"] == "empty_shell"
assert entry["label"] == "empty_shell"
assert entry["selection_path"] == "empty_shell"
assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
assert entry["v4_rank"] is None
# IMP-87 u4 — honesty defect inversion. The shell.source_section_ids
# still feeds legacy covered_section_ids for display, but the content-
# rendered axis (u1) excludes empty-shell units, so full_mdx_coverage
# MUST flip to False. Overall (u2) MUST elevate to
# EMPTY_SHELL_NO_CONTENT before the legacy ladder, otherwise a slide
# whose sole rendered unit is __empty__ would be reported as PASS —
# the exact Stage 1 mdx05 honesty defect this issue exists to fix.
assert status["full_mdx_coverage"] is False
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──
def test_u6_mixed_selection_counts_only_provisional_units():
"""Realistic IMP-30 retry outcome: some sections covered by normal rank_1
units, others by u3 last-resort provisional fill. u6 must count ONLY
the provisional ones, NOT the normal ones. List preserves the iteration
order of the units argument (so debug.json reads top-down as the slide)."""
sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
units = [
_mk_unit(section_ids=["S1"], provisional=False),
_mk_unit(
section_ids=["S2"],
provisional=True,
label="reject",
phase_z_status="fallback_candidate",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:fallback_candidate",
),
_mk_unit(section_ids=["S3"], provisional=False),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entries = status["provisional_first_render_units"]
assert len(entries) == 1
assert entries[0]["source_section_ids"] == ["S2"]
assert entries[0]["label"] == "reject"
# Normal units do NOT appear in the provisional list.
flat_sections = [sid for e in entries for sid in e["source_section_ids"]]
assert "S1" not in flat_sections
assert "S3" not in flat_sections
# ─── u6 case 5 : overall enum stability under provisional units ───────
def test_u6_overall_enum_unchanged_when_provisional_present_with_visual_pass():
"""A slide with provisional units + full coverage + visual pass still
reports overall == PASS. Stage 1 Q3 + Codex #10 D4 lock: provisional is
additive, not a new failure class. Future code that wants to gate on
provisional must read provisional_first_render_count, NOT overall."""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
label="restructure",
phase_z_status="extract_matched_zone",
selection_path="provisional_rank_1",
fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
),
]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert status["overall"] == "PASS"
assert status["provisional_first_render_count"] == 1
def test_u6_overall_enum_visual_regression_independent_of_provisional():
"""Provisional unit + full coverage + visual FAIL must still report
RENDERED_WITH_VISUAL_REGRESSION (existing enum), not a new value. Pins
that visual outcome alone drives the enum and provisional is orthogonal."""
sections = [_mk_section("S1")]
units = [
_mk_unit(
section_ids=["S1"],
provisional=True,
selection_path="provisional_rank_1",
),
]
overflow_fail = {"passed": False, "fail_reasons": ["zone overflow"]}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_fail,
adapter_needed_units=None, debug_zones=None,
)
assert status["overall"] == "RENDERED_WITH_VISUAL_REGRESSION"
assert status["provisional_first_render_count"] == 1
# ─── u6 case 6 : note field documents the new qualifier ───────────────
def test_u6_note_field_mentions_provisional_first_render_count():
"""The slide_status `note` field is a human-readable summary embedded in
Step 20 JSON / HTML. u6 must extend it with a mention of
`provisional_first_render_count > 0 = IMP-30 first-render invariant 작동`
so operators reading Step 20 see the qualifier without having to grep
the JSON keys themselves."""
sections = [_mk_section("S1")]
units = [_mk_unit(section_ids=["S1"], provisional=False)]
overflow_pass = {"passed": True, "fail_reasons": []}
comp_debug = {"candidates_summary": []}
status = compute_slide_status(
sections, units, comp_debug, overflow_pass,
adapter_needed_units=None, debug_zones=None,
)
assert "provisional_first_render_count" in status["note"]
# Existing note guidance (adapter_needed_count, content_truncated_count)
# must remain — regression guard for IMP-05 / earlier qualifier callers.
assert "adapter_needed_count" in status["note"]
assert "content_truncated_count" in status["note"]
# ════════════════════════════════════════════════════════════════════════
# u7 — broader pipeline fixture coverage for the empty-shell + provisional
# retry end-to-end (deferred from u4 verification per mid-stage
# compaction). Threads real production functions together with
# synthetic V4 fixtures so the cross-layer data flow (u1 → u2 → u3 →
# u4 → u6) is exercised in one pass, instead of relying on per-unit
# tests that each touch only one layer.
# ════════════════════════════════════════════════════════════════════════
#
# Each u7 case runs the real chain:
# synthetic V4 dict
# → lookup_v4_match_with_fallback (u1)
# → plan_composition (u2 + u3 propagation / fill)
# → compute_slide_status (u6 qualifier surface)
#
# Monkeypatched dependencies mirror tests/test_phase_z2_v4_fallback.py
# (get_contract + compute_capacity_fit). MOCK_ naming + rank-by-field
# convention preserved (Codex #10 E1).
import pytest
from src import phase_z2_pipeline as _pz_pipeline
from src.phase_z2_pipeline import (
V4_LABEL_TO_PHASE_Z_STATUS as _PROD_LABEL_TO_STATUS,
compute_slide_status as _compute_slide_status,
lookup_v4_match_with_fallback as _real_lookup,
)
# Synthetic catalog stub — only MOCK_ templates considered registered.
# Mirrors test_phase_z2_v4_fallback.py shape so the two suites stay in sync.
_U7_MOCK_CATALOG: dict[str, object] = {
"MOCK_template_direct_a": object(),
"MOCK_template_restructure_a": object(),
"MOCK_template_reject_a": object(),
}
def _u7_get_contract(template_id: str):
return _U7_MOCK_CATALOG.get(template_id)
def _u7_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
return {"fit_status": "ok"}
@pytest.fixture
def u7_patch_selector_deps(monkeypatch):
"""Monkeypatch module-level dependencies of lookup_v4_match_with_fallback.
Selector has no DI (Codex #10 E3) — module-level get_contract +
compute_capacity_fit must be patched at the pipeline module."""
monkeypatch.setattr(_pz_pipeline, "get_contract", _u7_get_contract)
monkeypatch.setattr(_pz_pipeline, "compute_capacity_fit", _u7_capacity_fit_ok)
def _u7_v4_section(judgments: list[dict]) -> dict:
return {"judgments_full32": judgments}
def _u7_j(rank: int, template_id: str, frame_id: str, label: str,
confidence: float = 0.9) -> dict:
return {
"frame_id": frame_id,
"frame_number": rank,
"template_id": template_id,
"confidence": confidence,
"label": label,
"v4_full_rank": rank,
}
def _u7_section(section_id: str) -> _pz_pipeline.MdxSection:
return _pz_pipeline.MdxSection(
section_id=section_id,
section_num=int(section_id.lstrip("S") or "0"),
title=f"Section {section_id}",
raw_content=f"- bullet for {section_id}\n",
)
# ─── u7 case 1 : e2e chain_exhausted → provisional retry → slide_status ──
def test_u7_e2e_chain_exhausted_provisional_flows_through_layers(
u7_patch_selector_deps,
):
"""End-to-end: a section whose rank-1..3 are all restructure/reject must
surface as a provisional unit when both opt-in flags are on, and the
provisional flag must propagate cleanly through V4Match (u1) →
CompositionUnit (u2) → select_composition_units provisional fill (u3) →
compute_slide_status qualifier (u6).
This mirrors the production pipeline.py:3262 _lookup_fn_provisional +
plan_composition(allow_provisional_fill=True) recovery path (u4 Phase A).
"""
v4 = {
"mdx_sections": {
# S1 — auto-renderable, normal rank-1 selection.
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
# S2 — chain exhausted (all restructure / reject).
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
_u7_j(2, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
]),
}
}
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n- b\n",
allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# Recovery succeeded: 2 units (S1 normal + S2 provisional fill).
by_section = {u.source_section_ids[0]: u for u in units}
assert set(by_section) == {"S1", "S2"}
assert by_section["S1"].provisional is False
assert by_section["S1"].label == "use_as_is"
assert by_section["S2"].provisional is True
assert by_section["S2"].label == "restructure"
assert by_section["S2"].selection_path == "provisional_rank_1"
assert layout_preset == "horizontal-2"
# u6 qualifier surface — only S2 counted.
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
entry = status["provisional_first_render_units"][0]
assert entry["source_section_ids"] == ["S2"]
assert entry["selection_path"] == "provisional_rank_1"
assert entry["frame_template_id"] == "MOCK_template_restructure_a"
# overall enum unchanged — full coverage + visual pass = PASS.
assert status["overall"] == "PASS"
# ─── u7 case 2 : e2e zero-V4 → u4 empty-shell synthesis → slide_status ───
def test_u7_e2e_zero_v4_empty_shell_status_surface(u7_patch_selector_deps):
"""End-to-end zero-V4 path: when V4 has no usable judgments for any
section, plan_composition (even with both opt-in flags on) yields zero
units, and the u4 Phase B empty-shell synthesis kicks in. The synthesized
shell must reach compute_slide_status with provisional=True +
phase_z_status='empty_shell', so Step 20 reports the first-render
invariant outcome without altering overall enum.
"""
v4 = {"mdx_sections": {}} # nothing matches any section
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n",
allow_provisional=True,
)
return match
units_first, preset_first, _ = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# No V4 evidence anywhere — recovery cannot fabricate a match, so units
# stays empty. This is the trigger condition for u4 Phase B (empty-shell).
assert units_first == []
assert preset_first is None
# Simulate the production u4 Phase B synthesis (pipeline.py:3325~).
empty_shell = CompositionUnit(
source_section_ids=[s.section_id for s in sections],
merge_type="empty_shell",
frame_template_id="__empty__",
frame_id="__empty__",
frame_number=0,
confidence=0.0,
label="empty_shell",
phase_z_status="empty_shell",
raw_content="\n\n".join(s.raw_content for s in sections),
title=" / ".join(s.title for s in sections),
v4_rank=None,
selection_path="empty_shell",
fallback_reason="no_v4_rank_1_for_any_section",
score=0.0,
provisional=True,
)
status = _compute_slide_status(
sections, [empty_shell], comp_debug={"candidates_summary": []},
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 1
shell_entry = status["provisional_first_render_units"][0]
assert shell_entry["phase_z_status"] == "empty_shell"
assert shell_entry["frame_template_id"] == "__empty__"
assert shell_entry["source_section_ids"] == ["S1", "S2"]
# IMP-87 u4 — honesty defect inversion. The shell unit still attaches
# both sections to legacy covered_section_ids (display preserved), but
# the content-rendered axis (u1) excludes empty-shell units, so
# full_mdx_coverage MUST flip to False. Overall (u2) MUST elevate to
# EMPTY_SHELL_NO_CONTENT before the legacy 4-way ladder, so a zero-V4
# slide cannot disguise itself as PASS through visual-overflow alone.
assert status["full_mdx_coverage"] is False
assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"
# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────
def test_u7_e2e_normal_path_unchanged_with_opt_in_flags(u7_patch_selector_deps):
"""IMP-05 regression guard at e2e level. When every section has an
auto-renderable rank-1 match, turning BOTH opt-in flags on (allow_provisional
at the lookup function + allow_provisional_fill at plan_composition) must
not produce any provisional unit. The normal greedy pass owns every
section, leaving the provisional fill pool with nothing to cover.
"""
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
]),
}
}
sections = [_u7_section("S1"), _u7_section("S2")]
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- a\n",
allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
assert {u.source_section_ids[0] for u in units} == {"S1", "S2"}
assert all(u.provisional is False for u in units)
assert all(u.selection_path == "rank_1" for u in units)
assert layout_preset == "horizontal-2"
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 0
assert status["provisional_first_render_units"] == []
assert status["overall"] == "PASS"
# ─── u7 case 4 : IMP-30 invariants — MDX content preserved + audit trail ──
def test_u7_imp30_invariants_mdx_preserved_and_audit_trail_surfaced(
u7_patch_selector_deps,
):
"""Issue-body invariants consolidation:
- 'MDX content preserved (no rewrite)' → provisional unit's
raw_content equals the source section's raw_content byte-for-byte.
- comp_debug audit trail surfaces 'selected_provisional' so the
recovery is observable (not silent — per IMP-30 scope-lock
'Telemetry: degraded outcomes must surface in slide_status').
"""
raw_s2 = "- restructure-only bullet alpha\n- restructure-only bullet beta\n"
section_s2 = _pz_pipeline.MdxSection(
section_id="S2", section_num=2, title="Section S2",
raw_content=raw_s2,
)
sections = [_u7_section("S1"), section_s2]
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
]),
}
}
def lookup_fn(sid: str):
# raw_content threaded through real lookup — provisional V4Match
# must carry the section's actual raw_content (no compression).
raw = sections[0].raw_content if sid == "S1" else raw_s2
match, _trace = _real_lookup(
v4, sid, raw_content=raw, allow_provisional=True,
)
return match
units, _preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
by_section = {u.source_section_ids[0]: u for u in units}
assert by_section["S2"].provisional is True
# IMP-30 contract: MDX content preserved through u1→u2→u3 path.
assert by_section["S2"].raw_content == raw_s2
# Audit-trail surface: candidates_summary must record S2's provisional
# pick as 'selected_provisional' (selection_state set in
# src/phase_z2_composition.py:862 _candidate_state).
summary = comp_debug.get("candidates_summary", [])
s2_selected = [
e for e in summary
if e["source_section_ids"] == ["S2"]
and e["selection_state"] == "selected_provisional"
]
assert len(s2_selected) == 1
assert s2_selected[0]["template_id"] == "MOCK_template_restructure_a"
assert s2_selected[0]["selection_path"] == "provisional_rank_1"
# ─── u7 case 5 : all-restructure/reject → every section gets placeholder ──
def test_u7_imp30_all_restructure_only_each_section_gets_provisional_unit(
u7_patch_selector_deps,
):
"""Issue-body invariant: 'restructure / reject 만 있는 section 도
*placeholder zone + trace*'. When EVERY section is restructure/reject,
the recovery must yield one provisional unit per section (no zero-unit
abort path (a), no chain_exhausted swallow at path (b)).
"""
sections = [_u7_section(sid) for sid in ("S1", "S2", "S3")]
v4 = {
"mdx_sections": {
"S1": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
]),
"S2": _u7_v4_section([
_u7_j(1, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
]),
"S3": _u7_v4_section([
_u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_013", "restructure"),
]),
}
}
def lookup_fn(sid: str):
match, _trace = _real_lookup(
v4, sid, raw_content="- bullet\n", allow_provisional=True,
)
return match
units, layout_preset, comp_debug = plan_composition(
sections,
v4_lookup_fn=lookup_fn,
v4_label_to_status=_PROD_LABEL_TO_STATUS,
allowed_statuses=_ALLOWED_STATUSES,
capacity_fit_fn=None,
v4_candidates_lookup_fn=None,
allow_provisional_fill=True,
)
# Every section must be covered by exactly one provisional unit —
# the issue-body 'placeholder zone + trace' contract.
by_section = {u.source_section_ids[0]: u for u in units}
assert set(by_section) == {"S1", "S2", "S3"}
assert all(u.provisional is True for u in units)
assert layout_preset is not None # path (a) abort guard bypassed
# All three audit entries must report selected_provisional.
summary = comp_debug.get("candidates_summary", [])
selected_provisional_sids = {
e["source_section_ids"][0]
for e in summary
if e["selection_state"] == "selected_provisional"
}
assert selected_provisional_sids == {"S1", "S2", "S3"}
# Step 20 surface: all three sections counted as provisional, overall
# enum unchanged (qualifier-not-enum per IMP-05 Codex #10 D4).
status = _compute_slide_status(
sections, units, comp_debug,
overflow={"passed": True, "fail_reasons": []},
adapter_needed_units=None, debug_zones=None,
)
assert status["provisional_first_render_count"] == 3
assert {
e["source_section_ids"][0]
for e in status["provisional_first_render_units"]
} == {"S1", "S2", "S3"}
assert status["overall"] == "PASS"