C.E.L_Slide_test2/tests/test_phase_z2_imp30_first_render.py

"""IMP-30 first-render invariant tests (per-unit slice).

This file is the shared regression home for IMP-30 units u2~u7. Each
implementation unit adds its own focused tests; u7 (regression coverage)
will broaden the surface (synthetic V4 fixtures for chain_exhausted
provisional, zero-V4 empty-shell, normal-path unchanged).

u3 scope (this slice) — select_composition_units last-resort provisional
fill for uncovered sections + _candidate_state "selected_provisional":
  1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard).
  2. opt-in fills uncovered sections with provisional candidates whose
     phase_z_status would otherwise be filter_status.
  3. opt-in never displaces normal greedy selections.
  4. opt-in respects coverage non-overlap (no section selected twice).
  5. plan_composition._candidate_state returns "selected_provisional"
     for fills and "selected" for normal greedy picks.

Synthetic naming convention (Codex #10 E1):
  - MOCK_ prefix mandatory
  - _a / _b suffixes = enumeration only (NOT ordering / priority)
  - rank/order expressed by V4 rank field, NEVER ID suffix
"""
from __future__ import annotations

from dataclasses import dataclass, field
from typing import Optional

from src.phase_z2_composition import (
    CompositionUnit,
    plan_composition,
    select_composition_units,
)


# ─── Synthetic match shape (duck-typed V4Match-like) ───────────────────

@dataclass
class _StubV4Match:
    """Duck-typed V4Match surface used by collect_candidates / score path.

    Mirrors src.phase_z2_pipeline.V4Match fields touched by composition:
    template_id / frame_id / frame_number / confidence / label / v4_rank /
    selection_path / fallback_reason / provisional. Composition module
    intentionally does not import V4Match (circular dep avoidance), so a
    plain stub object with the same attributes is the contract.
    """
    template_id: str
    frame_id: str
    frame_number: int
    confidence: float
    label: str
    v4_rank: Optional[int] = None
    selection_path: str = "rank_1"
    fallback_reason: Optional[str] = None
    provisional: bool = False


@dataclass
class _StubSection:
    """Minimal section surface used by collect_candidates (section_id /
    raw_content / title). Matches MdxSection's attribute names without
    importing pipeline (keeps test isolated to composition module)."""
    section_id: str
    title: str = ""
    raw_content: str = ""


# Phase Z status mapping fixture — only the keys exercised here are listed.
# Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this
# stub deliberately mirrors only what the tests touch.
_LABEL_TO_STATUS = {
    "use_as_is": "matched_zone",
    "light_edit": "adapt_matched_zone",
    "restructure": "extract_matched_zone",
    "reject": "fallback_candidate",
}

_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}


# ─── Helpers ────────────────────────────────────────────────────────────

def _make_lookup(matches_by_section: dict[str, _StubV4Match]):
    """Return v4_lookup_fn (section_id -> _StubV4Match | None)."""
    def _fn(section_id: str):
        return matches_by_section.get(section_id)
    return _fn


def _make_candidates_lookup_empty():
    """v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here)."""
    def _fn(section_id: str):
        return []
    return _fn


# ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ─────────

def test_u3_default_off_preserves_imp05_behavior():
    """IMP-05 regression guard. With allow_provisional_fill=False (default),
    select_composition_units must yield the same units as pre-u3 even when
    provisional candidates exist in the pool.

    Setup:
      - S1: use_as_is + provisional=False (normal selection)
      - S2: restructure + provisional=True (would be fill-eligible)
    Expected (default-off):
      - units = [S1 unit] only. S2 stays uncovered.
    """
    sections = [_StubSection("S1"), _StubSection("S2")]
    matches = {
        "S1": _StubV4Match(
            template_id="MOCK_template_direct_a",
            frame_id="MOCK_frame_001", frame_number=1,
            confidence=0.9, label="use_as_is", v4_rank=1,
        ),
        "S2": _StubV4Match(
            template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002", frame_number=2,
            confidence=0.65, label="restructure", v4_rank=1,
            selection_path="provisional_rank_1",
            fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
            provisional=True,
        ),
    }
    units, preset, debug = plan_composition(
        sections,
        _make_lookup(matches),
        _LABEL_TO_STATUS,
        _ALLOWED_STATUSES,
        v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
        # allow_provisional_fill omitted → default False
    )
    assert len(units) == 1
    assert units[0].source_section_ids == ["S1"]
    assert units[0].provisional is False
    assert preset == "single"
    # S2 candidate must still appear in debug summary as filtered_status
    summary_by_section = {
        tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
    }
    assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"


# ─── u3 case 2 : opt-in fills uncovered sections with provisional ──────

def test_u3_opt_in_fills_uncovered_with_provisional():
    """IMP-30 u3 — opt-in path.

    Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but
    with allow_provisional_fill=True. S2 must be filled as
    selected_provisional unit; greedy S1 selection unchanged.
    """
    sections = [_StubSection("S1"), _StubSection("S2")]
    matches = {
        "S1": _StubV4Match(
            template_id="MOCK_template_direct_a",
            frame_id="MOCK_frame_001", frame_number=1,
            confidence=0.9, label="use_as_is", v4_rank=1,
        ),
        "S2": _StubV4Match(
            template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002", frame_number=2,
            confidence=0.65, label="restructure", v4_rank=1,
            selection_path="provisional_rank_1",
            fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
            provisional=True,
        ),
    }
    units, preset, debug = plan_composition(
        sections,
        _make_lookup(matches),
        _LABEL_TO_STATUS,
        _ALLOWED_STATUSES,
        v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
        allow_provisional_fill=True,
    )
    # Both sections must be covered now
    section_ids = {sid for u in units for sid in u.source_section_ids}
    assert section_ids == {"S1", "S2"}
    # Identify which unit covers which section
    by_section = {tuple(u.source_section_ids): u for u in units}
    s1_unit = by_section[("S1",)]
    s2_unit = by_section[("S2",)]
    # Normal greedy pick — provisional flag stays False
    assert s1_unit.provisional is False
    # Provisional fill — provisional flag carried from V4Match (u1) via u2
    assert s2_unit.provisional is True
    assert s2_unit.label == "restructure"
    # Layout preset reflects 2-unit count
    assert preset == "horizontal-2"


# ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─

def test_u3_candidate_state_marks_selected_provisional():
    """plan_composition._candidate_state must return:
      - "selected" for normal greedy picks
      - "selected_provisional" for last-resort fills
    """
    sections = [_StubSection("S1"), _StubSection("S2")]
    matches = {
        "S1": _StubV4Match(
            template_id="MOCK_template_direct_a",
            frame_id="MOCK_frame_001", frame_number=1,
            confidence=0.9, label="use_as_is", v4_rank=1,
        ),
        "S2": _StubV4Match(
            template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002", frame_number=2,
            confidence=0.65, label="restructure", v4_rank=1,
            selection_path="provisional_rank_1",
            provisional=True,
        ),
    }
    units, preset, debug = plan_composition(
        sections,
        _make_lookup(matches),
        _LABEL_TO_STATUS,
        _ALLOWED_STATUSES,
        v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
        allow_provisional_fill=True,
    )
    summary_by_section = {
        tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
    }
    assert summary_by_section[("S1",)]["selection_state"] == "selected"
    assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional"


# ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ─────

def test_u3_opt_in_respects_coverage_non_overlap():
    """Provisional fill must not pick a candidate whose source_section_ids
    overlap with already-covered sections.

    Setup:
      - S1 use_as_is (normal selection)
      - S2 restructure provisional (eligible for fill)
      - parent_merged_inferred over [S1, S2] with provisional=True
        (synthetic — would normally not exist, but stresses non-overlap)

    With allow_provisional_fill=True, the [S1,S2] provisional merge must
    NOT be selected (S1 already covered by normal pick).
    """
    # 2 children with derive_parent_id → "S" parent. But derive_parent_id
    # only triggers on "<base>-sub-<n>" or "<id>-<suffix>.<sub>". Use the
    # canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible).
    sections = [
        _StubSection("P-sub-1", raw_content="alpha"),
        _StubSection("P-sub-2", raw_content="beta"),
    ]
    matches = {
        "P-sub-1": _StubV4Match(
            template_id="MOCK_template_direct_a",
            frame_id="MOCK_frame_001", frame_number=1,
            confidence=0.9, label="use_as_is", v4_rank=1,
        ),
        "P-sub-2": _StubV4Match(
            template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002", frame_number=2,
            confidence=0.65, label="restructure", v4_rank=1,
            provisional=True,
        ),
        # No parent V4 → branch 3 may synthesize parent_merged_inferred
        # if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1
        # (higher confidence) → rep_match.provisional=False, so the inferred
        # merge is NOT provisional. The normal greedy pass should prefer
        # the single P-sub-1 (same score, but inferred merge has coverage
        # tiebreak win). Test asserts: covered set is exact, no double-fill.
    }
    units, preset, debug = plan_composition(
        sections,
        _make_lookup(matches),
        _LABEL_TO_STATUS,
        _ALLOWED_STATUSES,
        v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
        allow_provisional_fill=True,
    )
    covered = []
    for u in units:
        covered.extend(u.source_section_ids)
    # No section appears twice — non-overlap invariant
    assert len(covered) == len(set(covered))
    # Both sections covered exactly once
    assert set(covered) == {"P-sub-1", "P-sub-2"}


# ─── u3 case 5 : opt-in with no provisional candidates is a no-op ──────

def test_u3_opt_in_noop_when_no_provisional_candidates():
    """allow_provisional_fill=True with zero provisional candidates must
    behave identically to default-off. No fill is forced; uncovered sections
    simply remain uncovered (u4 owns the zero-unit empty-shell terminal).
    """
    sections = [_StubSection("S1"), _StubSection("S2")]
    matches = {
        "S1": _StubV4Match(
            template_id="MOCK_template_direct_a",
            frame_id="MOCK_frame_001", frame_number=1,
            confidence=0.9, label="use_as_is", v4_rank=1,
        ),
        # S2: restructure but NOT provisional (e.g., pipeline did not opt
        # into u1 allow_provisional, or section had real rank-1 restructure)
        "S2": _StubV4Match(
            template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002", frame_number=2,
            confidence=0.65, label="restructure", v4_rank=1,
            provisional=False,
        ),
    }
    units, preset, debug = plan_composition(
        sections,
        _make_lookup(matches),
        _LABEL_TO_STATUS,
        _ALLOWED_STATUSES,
        v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
        allow_provisional_fill=True,
    )
    assert len(units) == 1
    assert units[0].source_section_ids == ["S1"]
    assert preset == "single"
    # S2 remains filter_status — not provisional, so u3 fill ignores it
    summary_by_section = {
        tuple(c["source_section_ids"]): c for c in debug["candidates_summary"]
    }
    assert summary_by_section[("S2",)]["selection_state"] == "filtered_status"


# ─── u3 case 6 : select_composition_units direct invocation parity ─────

def test_u3_select_composition_units_default_off_signature():
    """Direct invocation without keyword-only u3 args must remain valid
    (backward-compat for existing callers that import the function directly).
    """
    # Build a minimal CompositionUnit by hand — bypass collect_candidates.
    c1 = CompositionUnit(
        source_section_ids=["S1"],
        merge_type="single",
        frame_template_id="MOCK_template_direct_a",
        frame_id="MOCK_frame_001",
        frame_number=1,
        confidence=0.9,
        label="use_as_is",
        phase_z_status="matched_zone",
        raw_content="alpha",
        title="S1",
    )
    units = select_composition_units([c1], _ALLOWED_STATUSES)
    assert len(units) == 1
    assert units[0].source_section_ids == ["S1"]


def test_u3_select_composition_units_opt_in_direct():
    """Direct invocation with u3 opt-in must fill uncovered section from
    provisional candidate pool, leaving greedy pick untouched.
    """
    c_greedy = CompositionUnit(
        source_section_ids=["S1"],
        merge_type="single",
        frame_template_id="MOCK_template_direct_a",
        frame_id="MOCK_frame_001",
        frame_number=1,
        confidence=0.9,
        label="use_as_is",
        phase_z_status="matched_zone",
        raw_content="alpha",
        title="S1",
    )
    c_provisional = CompositionUnit(
        source_section_ids=["S2"],
        merge_type="single",
        frame_template_id="MOCK_template_restructure_a",
        frame_id="MOCK_frame_002",
        frame_number=2,
        confidence=0.65,
        label="restructure",
        phase_z_status="extract_matched_zone",
        raw_content="beta",
        title="S2",
        provisional=True,
    )
    units = select_composition_units(
        [c_greedy, c_provisional],
        _ALLOWED_STATUSES,
        all_section_ids=["S1", "S2"],
        allow_provisional_fill=True,
    )
    assert len(units) == 2
    by_section = {tuple(u.source_section_ids): u for u in units}
    assert by_section[("S1",)].provisional is False
    assert by_section[("S2",)].provisional is True


# ════════════════════════════════════════════════════════════════════════
# u4 — pipeline abort guard empty-shell synthesis
# ════════════════════════════════════════════════════════════════════════
#
# u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort
# guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3)
# then terminal empty-shell (Phase B). The shell is a single CompositionUnit
# with frame_template_id="__empty__" and preset="single"; the per-unit
# for-loop's __empty__ branch bypasses mapper/contract and emits a
# placeholder zones_data/debug_zones record so final.html still writes.
#
# These tests verify the composition-side invariants that u4 relies on:
#   - CompositionUnit can be constructed in the empty-shell shape.
#   - The shell shape carries the data needed for u5 (provisional flag) /
#     u6 (status qualifier) / render_slide __empty__ branch (template_id).
# The pipeline-level integration (provisional retry / empty-shell synthesis
# at the abort guard, plus the per-unit __empty__ bypass) is covered by
# u7 (regression coverage) with synthetic V4 fixtures.


def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis():
    """The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort
    guard must carry the field shape downstream consumers (per-unit
    __empty__ branch, compute_slide_status, slide_base template) rely on.

    Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block):
      - frame_template_id == "__empty__" → render_slide short-circuits
        partial_html to "" (existing __empty__ branch at line 2106).
      - phase_z_status == "empty_shell" → Step 20 distinguishes from
        matched_zone / adapt_matched_zone / extract_matched_zone /
        fallback_candidate (u6 surfaces this as additive qualifier).
      - provisional == True → u5 zone--provisional class + needs-adaptation
        badge (template-side wiring).
      - source_section_ids covers all aligned section ids → compute_slide_status
        treats every section as "covered by the shell" (u6 marks the count
        of provisional_first_render_units).
      - selection_path == "empty_shell" / fallback_reason set → audit trace
        survives in step06_composition_plan.json.
    """
    aligned_section_ids = ["S1", "S2", "S3"]
    raw_contents = ["alpha", "beta", "gamma"]
    titles = ["First", "Second", "Third"]

    shell = CompositionUnit(
        source_section_ids=list(aligned_section_ids),
        merge_type="empty_shell",
        frame_template_id="__empty__",
        frame_id="__empty__",
        frame_number=0,
        confidence=0.0,
        label="empty_shell",
        phase_z_status="empty_shell",
        raw_content="\n\n".join(raw_contents),
        title=" / ".join(titles),
        v4_rank=None,
        selection_path="empty_shell",
        fallback_reason="no_v4_rank_1_for_any_section",
        score=0.0,
        rationale={
            "imp30_u4": "terminal_first_render_empty_shell",
            "reason": "no_rank_1_V4_evidence_in_any_section",
            "aligned_section_ids": aligned_section_ids,
        },
        provisional=True,
    )

    assert shell.frame_template_id == "__empty__"
    assert shell.frame_id == "__empty__"
    assert shell.label == "empty_shell"
    assert shell.phase_z_status == "empty_shell"
    assert shell.provisional is True
    assert shell.selection_path == "empty_shell"
    assert shell.fallback_reason == "no_v4_rank_1_for_any_section"
    assert shell.source_section_ids == aligned_section_ids
    assert shell.v4_rank is None
    assert shell.confidence == 0.0
    assert shell.score == 0.0
    # MDX content preserved (no rewrite) — full raw content kept in the unit
    # even though no V4 mapping is applied. Adaptation deferred to IMP-31.
    assert shell.raw_content == "alpha\n\nbeta\n\ngamma"
    # Rationale carries the audit trail consumed by Step 6 artifact + u6.
    assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell"
    assert shell.rationale["aligned_section_ids"] == aligned_section_ids


def test_u4_empty_shell_unit_default_provisional_is_false():
    """Smoke test — provisional flag is opt-in. A plain CompositionUnit
    (no explicit provisional=True) does NOT mark itself as empty-shell.
    Guards against accidental positive on normal units when u5 / u6 read
    unit.provisional.
    """
    normal = CompositionUnit(
        source_section_ids=["S1"],
        merge_type="single",
        frame_template_id="MOCK_template_direct_a",
        frame_id="MOCK_frame_001",
        frame_number=1,
        confidence=0.9,
        label="use_as_is",
        phase_z_status="matched_zone",
        raw_content="alpha",
        title="S1",
    )
    assert normal.provisional is False
    assert normal.frame_template_id != "__empty__"


def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed():
    """The empty-shell unit's phase_z_status ('empty_shell') must NOT be
    inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over
    units filtered by allowed_statuses would treat the shell as a normal
    matched zone — defeating the "needs adaptation" signal.

    This test pins the contract at the composition-test level so a status
    rename in the pipeline cannot silently leak the shell into normal flows.
    """
    # _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES
    # ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct
    # status so downstream filters reject it.
    assert "empty_shell" not in _ALLOWED_STATUSES


# ════════════════════════════════════════════════════════════════════════
# u5 — zones_data carries provisional flag; slide_base.html zone div adds
#       zone--provisional class + inline needs-adaptation badge
# ════════════════════════════════════════════════════════════════════════
#
# u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in
# u1, or directly by u4 empty-shell synthesis) through the zones_data payload
# into the slide_base.html template. Visual contract:
#   - zones_data[i]['provisional'] = bool (default False; True only for IMP-30
#     opt-in synthesized units).
#   - slide_base.html zone div gets `zone--provisional` class when True; an
#     inline `<span class="zone__needs-adaptation-badge">needs adaptation</span>`
#     element is rendered inside the zone (top-right corner via absolute pos).
#   - data-provisional="1" attribute set for downstream selectors / overflow
#     checker / e2e tooling.
#
# The composition / pipeline-level handoff is exercised by u3 / u4 already.
# u5 tests focus on:
#   - template-rendering output: class + badge HTML correctly emitted ONLY when
#     zones[i].provisional is truthy. (default-off path unchanged.)
#   - byte-equivalence: non-provisional zones render the same div shape as
#     pre-u5 (just no zone--provisional class / no badge element).

import re
from pathlib import Path

from jinja2 import Environment, FileSystemLoader, select_autoescape


# ─── u5 helpers ────────────────────────────────────────────────────────

def _render_slide_base(zones: list[dict], *, layout_preset: str = "single",
                       layout_css: dict | None = None) -> str:
    """Render templates/phase_z2/slide_base.html directly via Jinja2 with a
    minimal zones list. Bypasses render_slide() so u5 can exercise the
    template-only contract without spinning up the full pipeline (no mapper,
    no contracts, no token CSS loader). slot_payload / partial_html are
    stubbed to fixed strings so the test focuses on zone div attributes."""
    template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2"
    env = Environment(
        loader=FileSystemLoader(str(template_dir)),
        autoescape=select_autoescape(["html"]),
    )
    if layout_css is None:
        layout_css = {
            "cols": "1fr",
            "rows": "1fr",
            "areas": '"single"',
        }
    # Each zone needs a partial_html (render_slide normally populates this).
    # Use a stable placeholder per zone so the assertion can target zone-level
    # attributes without coupling to frame template internals.
    for z in zones:
        z.setdefault("partial_html", "<div class=\"_stub_partial\">stub</div>")
    base = env.get_template("slide_base.html")
    return base.render(
        slide_title="IMP-30 u5 test slide",
        slide_footer=None,
        zones=zones,
        layout_preset=layout_preset,
        layout_css=layout_css,
        gap_px=12,
        token_css="",  # empty token CSS — not under test here
        embedded_mode="standalone",
    )


def _zone_div_for_position(html: str, position: str) -> str:
    """Return the opening tag + immediate inner content (up to but not
    including partial_html) for the zone div at a given `data-zone-position`
    value. Tight enough for class/attribute assertions, lenient enough not
    to depend on partial_html internals."""
    pattern = re.compile(
        r'<div class="zone[^"]*"\s+data-zone-position="' + re.escape(position) + r'"[^>]*>'
        r'(?:\s*<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>)?',
        re.DOTALL,
    )
    match = pattern.search(html)
    if not match:
        return ""
    return match.group(0)


def _all_zone_div_openings(html: str) -> list[str]:
    """Return every zone-div opening tag in the layout body. Used to scope
    class / attribute assertions away from the CSS <style> block (which
    contains `.zone--provisional` / `.zone__needs-adaptation-badge` as
    selectors — must not be mistaken for zone-div class emissions)."""
    return re.findall(
        r'<div class="zone[^"]*"[^>]*data-zone-position="[^"]*"[^>]*>',
        html,
    )


def _all_badge_spans(html: str) -> list[str]:
    """Return every actual badge `<span>` element in the rendered body
    (NOT the `.zone__needs-adaptation-badge` selector in the <style> block).
    Used to count badge emission accurately."""
    return re.findall(
        r'<span class="zone__needs-adaptation-badge"[^>]*>[^<]*</span>',
        html,
    )


# ─── u5 case 1 : non-provisional zone renders pre-u5 div shape ────────


def test_u5_non_provisional_zone_renders_without_class_or_badge():
    """Default-off path. zones[i].provisional=False (or absent) must render
    the zone div as `<div class="zone" ...>` with no zone--provisional class
    and no needs-adaptation badge — byte-equivalent to pre-u5.

    Assertions are scoped to actual zone div emissions (not the CSS
    selectors in the <style> block, which always contain the strings
    `.zone--provisional` and `.zone__needs-adaptation-badge`)."""
    zones = [
        {
            "position": "single",
            "template_id": "MOCK_template_direct_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": False,
        }
    ]
    html = _render_slide_base(zones)
    # Scope: zone div openings only.
    zone_divs = _all_zone_div_openings(html)
    assert len(zone_divs) == 1
    assert "zone--provisional" not in zone_divs[0]
    assert 'data-provisional="1"' not in zone_divs[0]
    # No actual badge <span> element (CSS selector in style block excluded).
    assert _all_badge_spans(html) == []
    # Sanity: the zone div carries the canonical class.
    assert 'class="zone"' in zone_divs[0]


def test_u5_zone_without_provisional_key_treated_as_non_provisional():
    """Belt-and-suspenders: a zones dict that omits the `provisional` key
    entirely (Jinja2 truthy check on missing attr → falsy) must render the
    same as provisional=False. Pre-u5 callers that haven't been updated
    still produce valid output without crashing the template."""
    zones = [
        {
            "position": "single",
            "template_id": "MOCK_template_direct_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            # provisional key intentionally absent
        }
    ]
    html = _render_slide_base(zones)
    zone_divs = _all_zone_div_openings(html)
    assert len(zone_divs) == 1
    assert "zone--provisional" not in zone_divs[0]
    assert _all_badge_spans(html) == []


# ─── u5 case 2 : provisional zone renders class + badge + data attr ───


def test_u5_provisional_zone_renders_class_and_badge():
    """Opt-in path. zones[i].provisional=True must:
      1. Append `zone--provisional` class to the zone div.
      2. Set `data-provisional="1"` data attribute (for downstream selectors).
      3. Render a `<span class="zone__needs-adaptation-badge">` element with
         the literal text "needs adaptation" (aria-label included for a11y).
    """
    zones = [
        {
            "position": "single",
            "template_id": "MOCK_template_restructure_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": True,
        }
    ]
    html = _render_slide_base(zones)
    # zone--provisional class must appear on the zone div for position=single.
    assert "zone--provisional" in html
    # data-provisional="1" attribute must be present.
    assert 'data-provisional="1"' in html
    # Badge element with the required label text.
    assert 'class="zone__needs-adaptation-badge"' in html
    assert "needs adaptation" in html
    assert 'aria-label="needs user or AI adaptation"' in html


def test_u5_provisional_badge_appears_inside_provisional_zone_only():
    """Mixed-zone slide: one provisional zone + one normal zone. The badge
    + class must appear ONLY in the provisional zone, not bleed into the
    normal one (CSS-level isolation should already prevent this, but the
    template must not emit the badge for both)."""
    zones = [
        {
            "position": "top",
            "template_id": "MOCK_template_direct_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": False,
        },
        {
            "position": "bottom",
            "template_id": "MOCK_template_restructure_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": True,
        },
    ]
    layout_css = {
        "cols": "1fr",
        "rows": "1fr 1fr",
        "areas": '"top" "bottom"',
    }
    html = _render_slide_base(
        zones, layout_preset="vertical-2", layout_css=layout_css
    )
    # Exactly one badge span element should be present in the rendered body
    # (CSS selector in <style> excluded by the helper).
    assert len(_all_badge_spans(html)) == 1
    # zone--provisional must appear on exactly one zone div (CSS selector
    # in <style> excluded by the helper).
    zone_divs = _all_zone_div_openings(html)
    assert len(zone_divs) == 2
    provisional_zone_divs = [d for d in zone_divs if "zone--provisional" in d]
    assert len(provisional_zone_divs) == 1
    # The provisional class must be associated with the bottom zone.
    bottom_zone_open = _zone_div_for_position(html, "bottom")
    assert "zone--provisional" in bottom_zone_open
    assert "zone__needs-adaptation-badge" in bottom_zone_open
    # The top zone must NOT carry the provisional class.
    top_zone_open = _zone_div_for_position(html, "top")
    assert "zone--provisional" not in top_zone_open
    assert "zone__needs-adaptation-badge" not in top_zone_open


# ─── u5 case 3 : zones_data data shape contract ────────────────────────


def test_u5_zones_data_provisional_field_defaults_false_in_template():
    """Template-level fallback: even if a future zones_data builder forgets
    to set provisional explicitly, the template's truthy check must not
    falsely emit zone--provisional. Pin this so a template refactor cannot
    silently invert the default."""
    zones = [
        {
            "position": "single",
            "template_id": "MOCK_template_direct_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": None,  # explicit falsy but not False
        }
    ]
    html = _render_slide_base(zones)
    zone_divs = _all_zone_div_openings(html)
    assert len(zone_divs) == 1
    assert "zone--provisional" not in zone_divs[0]
    assert _all_badge_spans(html) == []


def test_u5_slide_base_css_carries_provisional_marker_styles():
    """The provisional visual contract (dashed outline + striped wash + badge)
    is defined in slide_base.html <style>. Pin that the relevant CSS class
    selectors exist in the rendered HTML so a refactor that removes them
    breaks this test rather than silently rendering an unstyled badge.

    This is a class-selector existence check; it does not validate the
    specific color / dash pattern, which is a design decision intentionally
    left malleable (e.g., palette swap for a different theme)."""
    zones = [
        {
            "position": "single",
            "template_id": "MOCK_template_restructure_a",
            "slot_payload": {},
            "content_weight": {"score": 1},
            "min_height_px": 100,
            "provisional": True,
        }
    ]
    html = _render_slide_base(zones)
    # Style block must define .zone--provisional and the badge selector.
    assert ".zone--provisional" in html
    assert ".zone__needs-adaptation-badge" in html


# ════════════════════════════════════════════════════════════════════════
# u6 — compute_slide_status additive qualifiers
#       provisional_first_render_count + provisional_first_render_units
# ════════════════════════════════════════════════════════════════════════
#
# u6 surfaces the IMP-30 first-render invariant in Step 20 slide_status.
# Contract :
#   - Additive only. Top-level `overall` enum (PASS / RENDERED_WITH_VISUAL_REGRESSION /
#     PARTIAL_COVERAGE / PARTIAL_COVERAGE_WITH_VISUAL_REGRESSION) is NOT extended.
#     Stage 1 Q3 lock + Codex #10 D4 (IMP-05) preservation.
#   - `provisional_first_render_count` = int >= 0 — number of selected units with
#     unit.provisional == True (set by u1 V4Match synthesis → u2 propagation,
#     u3 last-resort fill, or u4 empty-shell synthesis).
#   - `provisional_first_render_units` = list[dict] — per-unit entries mirroring
#     the shape of `fallback_selections` / `adapter_needed_units` so downstream
#     consumers can branch uniformly without re-deriving intent from labels.
#   - Defensive `getattr` keeps the function safe when units come from legacy
#     code paths that predate u2 (no .provisional attribute) — those units are
#     treated as non-provisional.

from src.phase_z2_pipeline import MdxSection, compute_slide_status


def _mk_unit(*, section_ids: list[str], provisional: bool, **overrides):
    """Helper — build a real CompositionUnit for compute_slide_status tests.
    Uses the production dataclass (not a stub) so the .provisional getattr
    path is exercised end-to-end. Field defaults mirror what u1~u4 produce.
    """
    base = dict(
        source_section_ids=list(section_ids),
        merge_type="single",
        frame_template_id="MOCK_template_direct_a",
        frame_id="MOCK_frame_001",
        frame_number=1,
        confidence=0.9,
        label="use_as_is",
        phase_z_status="matched_zone",
        raw_content="alpha",
        title="MOCK section",
        v4_rank=1,
        selection_path="rank_1",
        fallback_reason=None,
        score=1.0,
        provisional=provisional,
    )
    base.update(overrides)
    return CompositionUnit(**base)


def _mk_section(section_id: str) -> MdxSection:
    """Minimal MdxSection — only fields touched by compute_slide_status
    (section_id, raw_content, title) populated; others get dataclass defaults."""
    return MdxSection(
        section_id=section_id,
        section_num=int(section_id.lstrip("S") or "0"),
        title=f"Section {section_id}",
        raw_content=f"raw {section_id}",
    )


# ─── u6 case 1 : no provisional units — defensive default 0 / [] ──────


def test_u6_no_provisional_units_returns_zero_and_empty_list():
    """Normal happy-path slide with all units selected via rank_1 (no IMP-30
    recovery). Both u6 fields must surface as zero / empty list — defensive
    default. Pre-IMP-30 callers see no behavioral change beyond the two new
    keys being present in the returned dict."""
    sections = [_mk_section("S1"), _mk_section("S2")]
    units = [
        _mk_unit(section_ids=["S1"], provisional=False),
        _mk_unit(section_ids=["S2"], provisional=False),
    ]
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 0
    assert status["provisional_first_render_units"] == []
    # Overall enum unchanged — full coverage + visual pass = PASS.
    assert status["overall"] == "PASS"
    # Existing IMP-05 qualifier fields remain (regression guard).
    assert status["fallback_selection_count"] == 0
    assert status["selection_paths"] == []


def test_u6_provisional_field_absent_is_treated_as_false():
    """Legacy code path that constructs CompositionUnit-like objects without
    a .provisional attribute (or sets it to a falsy non-False value) must
    NOT count as provisional. Defensive getattr in compute_slide_status keeps
    the count accurate."""
    sections = [_mk_section("S1")]
    units = [_mk_unit(section_ids=["S1"], provisional=False)]
    # Forcibly delete the attribute to simulate a legacy duck-typed unit.
    # CompositionUnit is a dataclass so this exercises the getattr default.
    delattr(units[0], "provisional")
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 0
    assert status["provisional_first_render_units"] == []


# ─── u6 case 2 : provisional unit synthesized via u1 (chain_exhausted) ─


def test_u6_chain_exhausted_provisional_unit_listed_with_full_shape():
    """u1 synthesizes a rank-1 V4Match with provisional=True when the V4
    chain is exhausted and the caller opts in. u2 propagates the flag onto
    the CompositionUnit. u6 must surface this unit in
    provisional_first_render_units with the full shape (source_section_ids /
    phase_z_status / frame_template_id / frame_id / label / selection_path /
    fallback_reason / v4_rank) so debug consumers can audit it without
    re-parsing the units list.
    """
    sections = [_mk_section("S1")]
    units = [
        _mk_unit(
            section_ids=["S1"],
            provisional=True,
            label="restructure",
            phase_z_status="extract_matched_zone",
            frame_template_id="MOCK_template_restructure_a",
            frame_id="MOCK_frame_002",
            selection_path="provisional_rank_1",
            fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
            v4_rank=1,
        ),
    ]
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 1
    entries = status["provisional_first_render_units"]
    assert len(entries) == 1
    entry = entries[0]
    assert entry["source_section_ids"] == ["S1"]
    assert entry["phase_z_status"] == "extract_matched_zone"
    assert entry["frame_template_id"] == "MOCK_template_restructure_a"
    assert entry["frame_id"] == "MOCK_frame_002"
    assert entry["label"] == "restructure"
    assert entry["selection_path"] == "provisional_rank_1"
    assert entry["fallback_reason"] == "phase_z_status_not_allowed:extract_matched_zone"
    assert entry["v4_rank"] == 1
    # Overall enum still PASS — full coverage + visual pass + adapter=0.
    # IMP-30 provisional is a qualifier, not a failure class.
    assert status["overall"] == "PASS"


# ─── u6 case 3 : empty-shell unit (u4) listed with __empty__ identifiers ─


def test_u6_empty_shell_unit_listed_with_empty_identifiers():
    """u4 synthesizes a single empty-shell CompositionUnit when both the
    normal greedy pass AND the provisional retry yield zero units (terminal
    first-render route). u6 must list it in provisional_first_render_units
    with frame_template_id/frame_id == "__empty__" + phase_z_status ==
    "empty_shell" so Step 20 distinguishes terminal shell from non-shell
    provisional units (chain_exhausted_provisional)."""
    sections = [_mk_section("S1"), _mk_section("S2")]
    shell = _mk_unit(
        section_ids=["S1", "S2"],
        provisional=True,
        merge_type="empty_shell",
        frame_template_id="__empty__",
        frame_id="__empty__",
        frame_number=0,
        confidence=0.0,
        label="empty_shell",
        phase_z_status="empty_shell",
        raw_content="raw S1\n\nraw S2",
        title="Section S1 / Section S2",
        v4_rank=None,
        selection_path="empty_shell",
        fallback_reason="no_v4_rank_1_for_any_section",
        score=0.0,
    )
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, [shell], comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 1
    entry = status["provisional_first_render_units"][0]
    assert entry["frame_template_id"] == "__empty__"
    assert entry["frame_id"] == "__empty__"
    assert entry["phase_z_status"] == "empty_shell"
    assert entry["label"] == "empty_shell"
    assert entry["selection_path"] == "empty_shell"
    assert entry["fallback_reason"] == "no_v4_rank_1_for_any_section"
    assert entry["v4_rank"] is None
    # IMP-87 u4 — honesty defect inversion. The shell.source_section_ids
    # still feeds legacy covered_section_ids for display, but the content-
    # rendered axis (u1) excludes empty-shell units, so full_mdx_coverage
    # MUST flip to False. Overall (u2) MUST elevate to
    # EMPTY_SHELL_NO_CONTENT before the legacy ladder, otherwise a slide
    # whose sole rendered unit is __empty__ would be reported as PASS —
    # the exact Stage 1 mdx05 honesty defect this issue exists to fix.
    assert status["full_mdx_coverage"] is False
    assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"


# ─── u6 case 4 : mixed selection — provisional + normal units coexist ──


def test_u6_mixed_selection_counts_only_provisional_units():
    """Realistic IMP-30 retry outcome: some sections covered by normal rank_1
    units, others by u3 last-resort provisional fill. u6 must count ONLY
    the provisional ones, NOT the normal ones. List preserves the iteration
    order of the units argument (so debug.json reads top-down as the slide)."""
    sections = [_mk_section("S1"), _mk_section("S2"), _mk_section("S3")]
    units = [
        _mk_unit(section_ids=["S1"], provisional=False),
        _mk_unit(
            section_ids=["S2"],
            provisional=True,
            label="reject",
            phase_z_status="fallback_candidate",
            selection_path="provisional_rank_1",
            fallback_reason="phase_z_status_not_allowed:fallback_candidate",
        ),
        _mk_unit(section_ids=["S3"], provisional=False),
    ]
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 1
    entries = status["provisional_first_render_units"]
    assert len(entries) == 1
    assert entries[0]["source_section_ids"] == ["S2"]
    assert entries[0]["label"] == "reject"
    # Normal units do NOT appear in the provisional list.
    flat_sections = [sid for e in entries for sid in e["source_section_ids"]]
    assert "S1" not in flat_sections
    assert "S3" not in flat_sections


# ─── u6 case 5 : overall enum stability under provisional units ───────


def test_u6_overall_enum_unchanged_when_provisional_present_with_visual_pass():
    """A slide with provisional units + full coverage + visual pass still
    reports overall == PASS. Stage 1 Q3 + Codex #10 D4 lock: provisional is
    additive, not a new failure class. Future code that wants to gate on
    provisional must read provisional_first_render_count, NOT overall."""
    sections = [_mk_section("S1")]
    units = [
        _mk_unit(
            section_ids=["S1"],
            provisional=True,
            label="restructure",
            phase_z_status="extract_matched_zone",
            selection_path="provisional_rank_1",
            fallback_reason="phase_z_status_not_allowed:extract_matched_zone",
        ),
    ]
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["overall"] == "PASS"
    assert status["provisional_first_render_count"] == 1


def test_u6_overall_enum_visual_regression_independent_of_provisional():
    """Provisional unit + full coverage + visual FAIL must still report
    RENDERED_WITH_VISUAL_REGRESSION (existing enum), not a new value. Pins
    that visual outcome alone drives the enum and provisional is orthogonal."""
    sections = [_mk_section("S1")]
    units = [
        _mk_unit(
            section_ids=["S1"],
            provisional=True,
            selection_path="provisional_rank_1",
        ),
    ]
    overflow_fail = {"passed": False, "fail_reasons": ["zone overflow"]}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_fail,
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["overall"] == "RENDERED_WITH_VISUAL_REGRESSION"
    assert status["provisional_first_render_count"] == 1


# ─── u6 case 6 : note field documents the new qualifier ───────────────


def test_u6_note_field_mentions_provisional_first_render_count():
    """The slide_status `note` field is a human-readable summary embedded in
    Step 20 JSON / HTML. u6 must extend it with a mention of
    `provisional_first_render_count > 0 = IMP-30 first-render invariant 작동`
    so operators reading Step 20 see the qualifier without having to grep
    the JSON keys themselves."""
    sections = [_mk_section("S1")]
    units = [_mk_unit(section_ids=["S1"], provisional=False)]
    overflow_pass = {"passed": True, "fail_reasons": []}
    comp_debug = {"candidates_summary": []}

    status = compute_slide_status(
        sections, units, comp_debug, overflow_pass,
        adapter_needed_units=None, debug_zones=None,
    )

    assert "provisional_first_render_count" in status["note"]
    # Existing note guidance (adapter_needed_count, content_truncated_count)
    # must remain — regression guard for IMP-05 / earlier qualifier callers.
    assert "adapter_needed_count" in status["note"]
    assert "content_truncated_count" in status["note"]


# ════════════════════════════════════════════════════════════════════════
# u7 — broader pipeline fixture coverage for the empty-shell + provisional
#       retry end-to-end (deferred from u4 verification per mid-stage
#       compaction). Threads real production functions together with
#       synthetic V4 fixtures so the cross-layer data flow (u1 → u2 → u3 →
#       u4 → u6) is exercised in one pass, instead of relying on per-unit
#       tests that each touch only one layer.
# ════════════════════════════════════════════════════════════════════════
#
# Each u7 case runs the real chain:
#   synthetic V4 dict
#     → lookup_v4_match_with_fallback (u1)
#     → plan_composition (u2 + u3 propagation / fill)
#     → compute_slide_status (u6 qualifier surface)
#
# Monkeypatched dependencies mirror tests/test_phase_z2_v4_fallback.py
# (get_contract + compute_capacity_fit). MOCK_ naming + rank-by-field
# convention preserved (Codex #10 E1).

import pytest

from src import phase_z2_pipeline as _pz_pipeline
from src.phase_z2_pipeline import (
    V4_LABEL_TO_PHASE_Z_STATUS as _PROD_LABEL_TO_STATUS,
    compute_slide_status as _compute_slide_status,
    lookup_v4_match_with_fallback as _real_lookup,
)


# Synthetic catalog stub — only MOCK_ templates considered registered.
# Mirrors test_phase_z2_v4_fallback.py shape so the two suites stay in sync.
_U7_MOCK_CATALOG: dict[str, object] = {
    "MOCK_template_direct_a": object(),
    "MOCK_template_restructure_a": object(),
    "MOCK_template_reject_a": object(),
}


def _u7_get_contract(template_id: str):
    return _U7_MOCK_CATALOG.get(template_id)


def _u7_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
    return {"fit_status": "ok"}


@pytest.fixture
def u7_patch_selector_deps(monkeypatch):
    """Monkeypatch module-level dependencies of lookup_v4_match_with_fallback.
    Selector has no DI (Codex #10 E3) — module-level get_contract +
    compute_capacity_fit must be patched at the pipeline module."""
    monkeypatch.setattr(_pz_pipeline, "get_contract", _u7_get_contract)
    monkeypatch.setattr(_pz_pipeline, "compute_capacity_fit", _u7_capacity_fit_ok)


def _u7_v4_section(judgments: list[dict]) -> dict:
    return {"judgments_full32": judgments}


def _u7_j(rank: int, template_id: str, frame_id: str, label: str,
          confidence: float = 0.9) -> dict:
    return {
        "frame_id": frame_id,
        "frame_number": rank,
        "template_id": template_id,
        "confidence": confidence,
        "label": label,
        "v4_full_rank": rank,
    }


def _u7_section(section_id: str) -> _pz_pipeline.MdxSection:
    return _pz_pipeline.MdxSection(
        section_id=section_id,
        section_num=int(section_id.lstrip("S") or "0"),
        title=f"Section {section_id}",
        raw_content=f"- bullet for {section_id}\n",
    )


# ─── u7 case 1 : e2e chain_exhausted → provisional retry → slide_status ──


def test_u7_e2e_chain_exhausted_provisional_flows_through_layers(
    u7_patch_selector_deps,
):
    """End-to-end: a section whose rank-1..3 are all restructure/reject must
    surface as a provisional unit when both opt-in flags are on, and the
    provisional flag must propagate cleanly through V4Match (u1) →
    CompositionUnit (u2) → select_composition_units provisional fill (u3) →
    compute_slide_status qualifier (u6).

    This mirrors the production pipeline.py:3262 _lookup_fn_provisional +
    plan_composition(allow_provisional_fill=True) recovery path (u4 Phase A).
    """
    v4 = {
        "mdx_sections": {
            # S1 — auto-renderable, normal rank-1 selection.
            "S1": _u7_v4_section([
                _u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
            ]),
            # S2 — chain exhausted (all restructure / reject).
            "S2": _u7_v4_section([
                _u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
                _u7_j(2, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
            ]),
        }
    }
    sections = [_u7_section("S1"), _u7_section("S2")]

    def lookup_fn(sid: str):
        match, _trace = _real_lookup(
            v4, sid, raw_content="- a\n- b\n",
            allow_provisional=True,
        )
        return match

    units, layout_preset, comp_debug = plan_composition(
        sections,
        v4_lookup_fn=lookup_fn,
        v4_label_to_status=_PROD_LABEL_TO_STATUS,
        allowed_statuses=_ALLOWED_STATUSES,
        capacity_fit_fn=None,
        v4_candidates_lookup_fn=None,
        allow_provisional_fill=True,
    )

    # Recovery succeeded: 2 units (S1 normal + S2 provisional fill).
    by_section = {u.source_section_ids[0]: u for u in units}
    assert set(by_section) == {"S1", "S2"}
    assert by_section["S1"].provisional is False
    assert by_section["S1"].label == "use_as_is"
    assert by_section["S2"].provisional is True
    assert by_section["S2"].label == "restructure"
    assert by_section["S2"].selection_path == "provisional_rank_1"
    assert layout_preset == "horizontal-2"

    # u6 qualifier surface — only S2 counted.
    status = _compute_slide_status(
        sections, units, comp_debug,
        overflow={"passed": True, "fail_reasons": []},
        adapter_needed_units=None, debug_zones=None,
    )
    assert status["provisional_first_render_count"] == 1
    entry = status["provisional_first_render_units"][0]
    assert entry["source_section_ids"] == ["S2"]
    assert entry["selection_path"] == "provisional_rank_1"
    assert entry["frame_template_id"] == "MOCK_template_restructure_a"
    # overall enum unchanged — full coverage + visual pass = PASS.
    assert status["overall"] == "PASS"


# ─── u7 case 2 : e2e zero-V4 → u4 empty-shell synthesis → slide_status ───


def test_u7_e2e_zero_v4_empty_shell_status_surface(u7_patch_selector_deps):
    """End-to-end zero-V4 path: when V4 has no usable judgments for any
    section, plan_composition (even with both opt-in flags on) yields zero
    units, and the u4 Phase B empty-shell synthesis kicks in. The synthesized
    shell must reach compute_slide_status with provisional=True +
    phase_z_status='empty_shell', so Step 20 reports the first-render
    invariant outcome without altering overall enum.
    """
    v4 = {"mdx_sections": {}}  # nothing matches any section
    sections = [_u7_section("S1"), _u7_section("S2")]

    def lookup_fn(sid: str):
        match, _trace = _real_lookup(
            v4, sid, raw_content="- a\n",
            allow_provisional=True,
        )
        return match

    units_first, preset_first, _ = plan_composition(
        sections,
        v4_lookup_fn=lookup_fn,
        v4_label_to_status=_PROD_LABEL_TO_STATUS,
        allowed_statuses=_ALLOWED_STATUSES,
        capacity_fit_fn=None,
        v4_candidates_lookup_fn=None,
        allow_provisional_fill=True,
    )
    # No V4 evidence anywhere — recovery cannot fabricate a match, so units
    # stays empty. This is the trigger condition for u4 Phase B (empty-shell).
    assert units_first == []
    assert preset_first is None

    # Simulate the production u4 Phase B synthesis (pipeline.py:3325~).
    empty_shell = CompositionUnit(
        source_section_ids=[s.section_id for s in sections],
        merge_type="empty_shell",
        frame_template_id="__empty__",
        frame_id="__empty__",
        frame_number=0,
        confidence=0.0,
        label="empty_shell",
        phase_z_status="empty_shell",
        raw_content="\n\n".join(s.raw_content for s in sections),
        title=" / ".join(s.title for s in sections),
        v4_rank=None,
        selection_path="empty_shell",
        fallback_reason="no_v4_rank_1_for_any_section",
        score=0.0,
        provisional=True,
    )

    status = _compute_slide_status(
        sections, [empty_shell], comp_debug={"candidates_summary": []},
        overflow={"passed": True, "fail_reasons": []},
        adapter_needed_units=None, debug_zones=None,
    )

    assert status["provisional_first_render_count"] == 1
    shell_entry = status["provisional_first_render_units"][0]
    assert shell_entry["phase_z_status"] == "empty_shell"
    assert shell_entry["frame_template_id"] == "__empty__"
    assert shell_entry["source_section_ids"] == ["S1", "S2"]
    # IMP-87 u4 — honesty defect inversion. The shell unit still attaches
    # both sections to legacy covered_section_ids (display preserved), but
    # the content-rendered axis (u1) excludes empty-shell units, so
    # full_mdx_coverage MUST flip to False. Overall (u2) MUST elevate to
    # EMPTY_SHELL_NO_CONTENT before the legacy 4-way ladder, so a zero-V4
    # slide cannot disguise itself as PASS through visual-overflow alone.
    assert status["full_mdx_coverage"] is False
    assert status["overall"] == "EMPTY_SHELL_NO_CONTENT"


# ─── u7 case 3 : e2e normal path unchanged when opt-in flags both on ─────


def test_u7_e2e_normal_path_unchanged_with_opt_in_flags(u7_patch_selector_deps):
    """IMP-05 regression guard at e2e level. When every section has an
    auto-renderable rank-1 match, turning BOTH opt-in flags on (allow_provisional
    at the lookup function + allow_provisional_fill at plan_composition) must
    not produce any provisional unit. The normal greedy pass owns every
    section, leaving the provisional fill pool with nothing to cover.
    """
    v4 = {
        "mdx_sections": {
            "S1": _u7_v4_section([
                _u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
            ]),
            "S2": _u7_v4_section([
                _u7_j(1, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
            ]),
        }
    }
    sections = [_u7_section("S1"), _u7_section("S2")]

    def lookup_fn(sid: str):
        match, _trace = _real_lookup(
            v4, sid, raw_content="- a\n",
            allow_provisional=True,
        )
        return match

    units, layout_preset, comp_debug = plan_composition(
        sections,
        v4_lookup_fn=lookup_fn,
        v4_label_to_status=_PROD_LABEL_TO_STATUS,
        allowed_statuses=_ALLOWED_STATUSES,
        capacity_fit_fn=None,
        v4_candidates_lookup_fn=None,
        allow_provisional_fill=True,
    )

    assert {u.source_section_ids[0] for u in units} == {"S1", "S2"}
    assert all(u.provisional is False for u in units)
    assert all(u.selection_path == "rank_1" for u in units)
    assert layout_preset == "horizontal-2"

    status = _compute_slide_status(
        sections, units, comp_debug,
        overflow={"passed": True, "fail_reasons": []},
        adapter_needed_units=None, debug_zones=None,
    )
    assert status["provisional_first_render_count"] == 0
    assert status["provisional_first_render_units"] == []
    assert status["overall"] == "PASS"


# ─── u7 case 4 : IMP-30 invariants — MDX content preserved + audit trail ──


def test_u7_imp30_invariants_mdx_preserved_and_audit_trail_surfaced(
    u7_patch_selector_deps,
):
    """Issue-body invariants consolidation:
      - 'MDX content preserved (no rewrite)' → provisional unit's
        raw_content equals the source section's raw_content byte-for-byte.
      - comp_debug audit trail surfaces 'selected_provisional' so the
        recovery is observable (not silent — per IMP-30 scope-lock
        'Telemetry: degraded outcomes must surface in slide_status').
    """
    raw_s2 = "- restructure-only bullet alpha\n- restructure-only bullet beta\n"
    section_s2 = _pz_pipeline.MdxSection(
        section_id="S2", section_num=2, title="Section S2",
        raw_content=raw_s2,
    )
    sections = [_u7_section("S1"), section_s2]
    v4 = {
        "mdx_sections": {
            "S1": _u7_v4_section([
                _u7_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
            ]),
            "S2": _u7_v4_section([
                _u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
            ]),
        }
    }

    def lookup_fn(sid: str):
        # raw_content threaded through real lookup — provisional V4Match
        # must carry the section's actual raw_content (no compression).
        raw = sections[0].raw_content if sid == "S1" else raw_s2
        match, _trace = _real_lookup(
            v4, sid, raw_content=raw, allow_provisional=True,
        )
        return match

    units, _preset, comp_debug = plan_composition(
        sections,
        v4_lookup_fn=lookup_fn,
        v4_label_to_status=_PROD_LABEL_TO_STATUS,
        allowed_statuses=_ALLOWED_STATUSES,
        capacity_fit_fn=None,
        v4_candidates_lookup_fn=None,
        allow_provisional_fill=True,
    )

    by_section = {u.source_section_ids[0]: u for u in units}
    assert by_section["S2"].provisional is True
    # IMP-30 contract: MDX content preserved through u1→u2→u3 path.
    assert by_section["S2"].raw_content == raw_s2

    # Audit-trail surface: candidates_summary must record S2's provisional
    # pick as 'selected_provisional' (selection_state set in
    # src/phase_z2_composition.py:862 _candidate_state).
    summary = comp_debug.get("candidates_summary", [])
    s2_selected = [
        e for e in summary
        if e["source_section_ids"] == ["S2"]
        and e["selection_state"] == "selected_provisional"
    ]
    assert len(s2_selected) == 1
    assert s2_selected[0]["template_id"] == "MOCK_template_restructure_a"
    assert s2_selected[0]["selection_path"] == "provisional_rank_1"


# ─── u7 case 5 : all-restructure/reject → every section gets placeholder ──


def test_u7_imp30_all_restructure_only_each_section_gets_provisional_unit(
    u7_patch_selector_deps,
):
    """Issue-body invariant: 'restructure / reject 만 있는 section 도
    *placeholder zone + trace*'. When EVERY section is restructure/reject,
    the recovery must yield one provisional unit per section (no zero-unit
    abort path (a), no chain_exhausted swallow at path (b)).
    """
    sections = [_u7_section(sid) for sid in ("S1", "S2", "S3")]
    v4 = {
        "mdx_sections": {
            "S1": _u7_v4_section([
                _u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_011", "restructure"),
            ]),
            "S2": _u7_v4_section([
                _u7_j(1, "MOCK_template_reject_a", "MOCK_frame_012", "reject"),
            ]),
            "S3": _u7_v4_section([
                _u7_j(1, "MOCK_template_restructure_a", "MOCK_frame_013", "restructure"),
            ]),
        }
    }

    def lookup_fn(sid: str):
        match, _trace = _real_lookup(
            v4, sid, raw_content="- bullet\n", allow_provisional=True,
        )
        return match

    units, layout_preset, comp_debug = plan_composition(
        sections,
        v4_lookup_fn=lookup_fn,
        v4_label_to_status=_PROD_LABEL_TO_STATUS,
        allowed_statuses=_ALLOWED_STATUSES,
        capacity_fit_fn=None,
        v4_candidates_lookup_fn=None,
        allow_provisional_fill=True,
    )

    # Every section must be covered by exactly one provisional unit —
    # the issue-body 'placeholder zone + trace' contract.
    by_section = {u.source_section_ids[0]: u for u in units}
    assert set(by_section) == {"S1", "S2", "S3"}
    assert all(u.provisional is True for u in units)
    assert layout_preset is not None  # path (a) abort guard bypassed
    # All three audit entries must report selected_provisional.
    summary = comp_debug.get("candidates_summary", [])
    selected_provisional_sids = {
        e["source_section_ids"][0]
        for e in summary
        if e["selection_state"] == "selected_provisional"
    }
    assert selected_provisional_sids == {"S1", "S2", "S3"}

    # Step 20 surface: all three sections counted as provisional, overall
    # enum unchanged (qualifier-not-enum per IMP-05 Codex #10 D4).
    status = _compute_slide_status(
        sections, units, comp_debug,
        overflow={"passed": True, "fail_reasons": []},
        adapter_needed_units=None, debug_zones=None,
    )
    assert status["provisional_first_render_count"] == 3
    assert {
        e["source_section_ids"][0]
        for e in status["provisional_first_render_units"]
    } == {"S1", "S2", "S3"}
    assert status["overall"] == "PASS"