"""IMP-30 first-render invariant tests (per-unit slice). This file is the shared regression home for IMP-30 units u2~u7. Each implementation unit adds its own focused tests; u7 (regression coverage) will broaden the surface (synthetic V4 fixtures for chain_exhausted provisional, zero-V4 empty-shell, normal-path unchanged). u3 scope (this slice) — select_composition_units last-resort provisional fill for uncovered sections + _candidate_state "selected_provisional": 1. default-off behavior is byte-identical to pre-u3 (IMP-05 guard). 2. opt-in fills uncovered sections with provisional candidates whose phase_z_status would otherwise be filter_status. 3. opt-in never displaces normal greedy selections. 4. opt-in respects coverage non-overlap (no section selected twice). 5. plan_composition._candidate_state returns "selected_provisional" for fills and "selected" for normal greedy picks. Synthetic naming convention (Codex #10 E1): - MOCK_ prefix mandatory - _a / _b suffixes = enumeration only (NOT ordering / priority) - rank/order expressed by V4 rank field, NEVER ID suffix """ from __future__ import annotations from dataclasses import dataclass, field from typing import Optional from src.phase_z2_composition import ( CompositionUnit, plan_composition, select_composition_units, ) # ─── Synthetic match shape (duck-typed V4Match-like) ─────────────────── @dataclass class _StubV4Match: """Duck-typed V4Match surface used by collect_candidates / score path. Mirrors src.phase_z2_pipeline.V4Match fields touched by composition: template_id / frame_id / frame_number / confidence / label / v4_rank / selection_path / fallback_reason / provisional. Composition module intentionally does not import V4Match (circular dep avoidance), so a plain stub object with the same attributes is the contract. """ template_id: str frame_id: str frame_number: int confidence: float label: str v4_rank: Optional[int] = None selection_path: str = "rank_1" fallback_reason: Optional[str] = None provisional: bool = False @dataclass class _StubSection: """Minimal section surface used by collect_candidates (section_id / raw_content / title). Matches MdxSection's attribute names without importing pipeline (keeps test isolated to composition module).""" section_id: str title: str = "" raw_content: str = "" # Phase Z status mapping fixture — only the keys exercised here are listed. # Real mapping (V4_LABEL_TO_PHASE_Z_STATUS in pipeline) is broader; this # stub deliberately mirrors only what the tests touch. _LABEL_TO_STATUS = { "use_as_is": "matched_zone", "light_edit": "adapt_matched_zone", "restructure": "extract_matched_zone", "reject": "fallback_candidate", } _ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"} # ─── Helpers ──────────────────────────────────────────────────────────── def _make_lookup(matches_by_section: dict[str, _StubV4Match]): """Return v4_lookup_fn (section_id -> _StubV4Match | None).""" def _fn(section_id: str): return matches_by_section.get(section_id) return _fn def _make_candidates_lookup_empty(): """v4_candidates_lookup_fn that always returns [] (no Step 6-A axis here).""" def _fn(section_id: str): return [] return _fn # ─── u3 case 1 : default-off behavior byte-identical to pre-u3 ───────── def test_u3_default_off_preserves_imp05_behavior(): """IMP-05 regression guard. With allow_provisional_fill=False (default), select_composition_units must yield the same units as pre-u3 even when provisional candidates exist in the pool. Setup: - S1: use_as_is + provisional=False (normal selection) - S2: restructure + provisional=True (would be fill-eligible) Expected (default-off): - units = [S1 unit] only. S2 stays uncovered. """ sections = [_StubSection("S1"), _StubSection("S2")] matches = { "S1": _StubV4Match( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", v4_rank=1, ), "S2": _StubV4Match( template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", v4_rank=1, selection_path="provisional_rank_1", fallback_reason="phase_z_status_not_allowed:extract_matched_zone", provisional=True, ), } units, preset, debug = plan_composition( sections, _make_lookup(matches), _LABEL_TO_STATUS, _ALLOWED_STATUSES, v4_candidates_lookup_fn=_make_candidates_lookup_empty(), # allow_provisional_fill omitted → default False ) assert len(units) == 1 assert units[0].source_section_ids == ["S1"] assert units[0].provisional is False assert preset == "single" # S2 candidate must still appear in debug summary as filtered_status summary_by_section = { tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] } assert summary_by_section[("S2",)]["selection_state"] == "filtered_status" # ─── u3 case 2 : opt-in fills uncovered sections with provisional ────── def test_u3_opt_in_fills_uncovered_with_provisional(): """IMP-30 u3 — opt-in path. Setup mirrors case 1 (S1 use_as_is + S2 provisional restructure) but with allow_provisional_fill=True. S2 must be filled as selected_provisional unit; greedy S1 selection unchanged. """ sections = [_StubSection("S1"), _StubSection("S2")] matches = { "S1": _StubV4Match( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", v4_rank=1, ), "S2": _StubV4Match( template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", v4_rank=1, selection_path="provisional_rank_1", fallback_reason="phase_z_status_not_allowed:extract_matched_zone", provisional=True, ), } units, preset, debug = plan_composition( sections, _make_lookup(matches), _LABEL_TO_STATUS, _ALLOWED_STATUSES, v4_candidates_lookup_fn=_make_candidates_lookup_empty(), allow_provisional_fill=True, ) # Both sections must be covered now section_ids = {sid for u in units for sid in u.source_section_ids} assert section_ids == {"S1", "S2"} # Identify which unit covers which section by_section = {tuple(u.source_section_ids): u for u in units} s1_unit = by_section[("S1",)] s2_unit = by_section[("S2",)] # Normal greedy pick — provisional flag stays False assert s1_unit.provisional is False # Provisional fill — provisional flag carried from V4Match (u1) via u2 assert s2_unit.provisional is True assert s2_unit.label == "restructure" # Layout preset reflects 2-unit count assert preset == "horizontal-2" # ─── u3 case 3 : _candidate_state distinguishes selected vs provisional ─ def test_u3_candidate_state_marks_selected_provisional(): """plan_composition._candidate_state must return: - "selected" for normal greedy picks - "selected_provisional" for last-resort fills """ sections = [_StubSection("S1"), _StubSection("S2")] matches = { "S1": _StubV4Match( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", v4_rank=1, ), "S2": _StubV4Match( template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", v4_rank=1, selection_path="provisional_rank_1", provisional=True, ), } units, preset, debug = plan_composition( sections, _make_lookup(matches), _LABEL_TO_STATUS, _ALLOWED_STATUSES, v4_candidates_lookup_fn=_make_candidates_lookup_empty(), allow_provisional_fill=True, ) summary_by_section = { tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] } assert summary_by_section[("S1",)]["selection_state"] == "selected" assert summary_by_section[("S2",)]["selection_state"] == "selected_provisional" # ─── u3 case 4 : opt-in preserves non-overlap (no double coverage) ───── def test_u3_opt_in_respects_coverage_non_overlap(): """Provisional fill must not pick a candidate whose source_section_ids overlap with already-covered sections. Setup: - S1 use_as_is (normal selection) - S2 restructure provisional (eligible for fill) - parent_merged_inferred over [S1, S2] with provisional=True (synthetic — would normally not exist, but stresses non-overlap) With allow_provisional_fill=True, the [S1,S2] provisional merge must NOT be selected (S1 already covered by normal pick). """ # 2 children with derive_parent_id → "S" parent. But derive_parent_id # only triggers on "-sub-" or "-.". Use the # canonical sub form: P-sub-1, P-sub-2 → parent P (auto-merge eligible). sections = [ _StubSection("P-sub-1", raw_content="alpha"), _StubSection("P-sub-2", raw_content="beta"), ] matches = { "P-sub-1": _StubV4Match( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", v4_rank=1, ), "P-sub-2": _StubV4Match( template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", v4_rank=1, provisional=True, ), # No parent V4 → branch 3 may synthesize parent_merged_inferred # if rep child is auto-renderable (P-sub-1). Rep here is P-sub-1 # (higher confidence) → rep_match.provisional=False, so the inferred # merge is NOT provisional. The normal greedy pass should prefer # the single P-sub-1 (same score, but inferred merge has coverage # tiebreak win). Test asserts: covered set is exact, no double-fill. } units, preset, debug = plan_composition( sections, _make_lookup(matches), _LABEL_TO_STATUS, _ALLOWED_STATUSES, v4_candidates_lookup_fn=_make_candidates_lookup_empty(), allow_provisional_fill=True, ) covered = [] for u in units: covered.extend(u.source_section_ids) # No section appears twice — non-overlap invariant assert len(covered) == len(set(covered)) # Both sections covered exactly once assert set(covered) == {"P-sub-1", "P-sub-2"} # ─── u3 case 5 : opt-in with no provisional candidates is a no-op ────── def test_u3_opt_in_noop_when_no_provisional_candidates(): """allow_provisional_fill=True with zero provisional candidates must behave identically to default-off. No fill is forced; uncovered sections simply remain uncovered (u4 owns the zero-unit empty-shell terminal). """ sections = [_StubSection("S1"), _StubSection("S2")] matches = { "S1": _StubV4Match( template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", v4_rank=1, ), # S2: restructure but NOT provisional (e.g., pipeline did not opt # into u1 allow_provisional, or section had real rank-1 restructure) "S2": _StubV4Match( template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", v4_rank=1, provisional=False, ), } units, preset, debug = plan_composition( sections, _make_lookup(matches), _LABEL_TO_STATUS, _ALLOWED_STATUSES, v4_candidates_lookup_fn=_make_candidates_lookup_empty(), allow_provisional_fill=True, ) assert len(units) == 1 assert units[0].source_section_ids == ["S1"] assert preset == "single" # S2 remains filter_status — not provisional, so u3 fill ignores it summary_by_section = { tuple(c["source_section_ids"]): c for c in debug["candidates_summary"] } assert summary_by_section[("S2",)]["selection_state"] == "filtered_status" # ─── u3 case 6 : select_composition_units direct invocation parity ───── def test_u3_select_composition_units_default_off_signature(): """Direct invocation without keyword-only u3 args must remain valid (backward-compat for existing callers that import the function directly). """ # Build a minimal CompositionUnit by hand — bypass collect_candidates. c1 = CompositionUnit( source_section_ids=["S1"], merge_type="single", frame_template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", phase_z_status="matched_zone", raw_content="alpha", title="S1", ) units = select_composition_units([c1], _ALLOWED_STATUSES) assert len(units) == 1 assert units[0].source_section_ids == ["S1"] def test_u3_select_composition_units_opt_in_direct(): """Direct invocation with u3 opt-in must fill uncovered section from provisional candidate pool, leaving greedy pick untouched. """ c_greedy = CompositionUnit( source_section_ids=["S1"], merge_type="single", frame_template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", phase_z_status="matched_zone", raw_content="alpha", title="S1", ) c_provisional = CompositionUnit( source_section_ids=["S2"], merge_type="single", frame_template_id="MOCK_template_restructure_a", frame_id="MOCK_frame_002", frame_number=2, confidence=0.65, label="restructure", phase_z_status="extract_matched_zone", raw_content="beta", title="S2", provisional=True, ) units = select_composition_units( [c_greedy, c_provisional], _ALLOWED_STATUSES, all_section_ids=["S1", "S2"], allow_provisional_fill=True, ) assert len(units) == 2 by_section = {tuple(u.source_section_ids): u for u in units} assert by_section[("S1",)].provisional is False assert by_section[("S2",)].provisional is True # ════════════════════════════════════════════════════════════════════════ # u4 — pipeline abort guard empty-shell synthesis # ════════════════════════════════════════════════════════════════════════ # # u4 replaces the pre-IMP-30 `sys.exit(1)` at the composition_planner abort # guard with two-phase recovery: provisional retry (Phase A, opt-in u1+u3) # then terminal empty-shell (Phase B). The shell is a single CompositionUnit # with frame_template_id="__empty__" and preset="single"; the per-unit # for-loop's __empty__ branch bypasses mapper/contract and emits a # placeholder zones_data/debug_zones record so final.html still writes. # # These tests verify the composition-side invariants that u4 relies on: # - CompositionUnit can be constructed in the empty-shell shape. # - The shell shape carries the data needed for u5 (provisional flag) / # u6 (status qualifier) / render_slide __empty__ branch (template_id). # The pipeline-level integration (provisional retry / empty-shell synthesis # at the abort guard, plus the per-unit __empty__ bypass) is covered by # u7 (regression coverage) with synthetic V4 fixtures. def test_u4_empty_shell_unit_shape_matches_pipeline_synthesis(): """The empty-shell CompositionUnit synthesized at the IMP-30 u4 abort guard must carry the field shape downstream consumers (per-unit __empty__ branch, compute_slide_status, slide_base template) rely on. Required invariants (per src/phase_z2_pipeline.py:3203~ u4 block): - frame_template_id == "__empty__" → render_slide short-circuits partial_html to "" (existing __empty__ branch at line 2106). - phase_z_status == "empty_shell" → Step 20 distinguishes from matched_zone / adapt_matched_zone / extract_matched_zone / fallback_candidate (u6 surfaces this as additive qualifier). - provisional == True → u5 zone--provisional class + needs-adaptation badge (template-side wiring). - source_section_ids covers all aligned section ids → compute_slide_status treats every section as "covered by the shell" (u6 marks the count of provisional_first_render_units). - selection_path == "empty_shell" / fallback_reason set → audit trace survives in step06_composition_plan.json. """ aligned_section_ids = ["S1", "S2", "S3"] raw_contents = ["alpha", "beta", "gamma"] titles = ["First", "Second", "Third"] shell = CompositionUnit( source_section_ids=list(aligned_section_ids), merge_type="empty_shell", frame_template_id="__empty__", frame_id="__empty__", frame_number=0, confidence=0.0, label="empty_shell", phase_z_status="empty_shell", raw_content="\n\n".join(raw_contents), title=" / ".join(titles), v4_rank=None, selection_path="empty_shell", fallback_reason="no_v4_rank_1_for_any_section", score=0.0, rationale={ "imp30_u4": "terminal_first_render_empty_shell", "reason": "no_rank_1_V4_evidence_in_any_section", "aligned_section_ids": aligned_section_ids, }, provisional=True, ) assert shell.frame_template_id == "__empty__" assert shell.frame_id == "__empty__" assert shell.label == "empty_shell" assert shell.phase_z_status == "empty_shell" assert shell.provisional is True assert shell.selection_path == "empty_shell" assert shell.fallback_reason == "no_v4_rank_1_for_any_section" assert shell.source_section_ids == aligned_section_ids assert shell.v4_rank is None assert shell.confidence == 0.0 assert shell.score == 0.0 # MDX content preserved (no rewrite) — full raw content kept in the unit # even though no V4 mapping is applied. Adaptation deferred to IMP-31. assert shell.raw_content == "alpha\n\nbeta\n\ngamma" # Rationale carries the audit trail consumed by Step 6 artifact + u6. assert shell.rationale["imp30_u4"] == "terminal_first_render_empty_shell" assert shell.rationale["aligned_section_ids"] == aligned_section_ids def test_u4_empty_shell_unit_default_provisional_is_false(): """Smoke test — provisional flag is opt-in. A plain CompositionUnit (no explicit provisional=True) does NOT mark itself as empty-shell. Guards against accidental positive on normal units when u5 / u6 read unit.provisional. """ normal = CompositionUnit( source_section_ids=["S1"], merge_type="single", frame_template_id="MOCK_template_direct_a", frame_id="MOCK_frame_001", frame_number=1, confidence=0.9, label="use_as_is", phase_z_status="matched_zone", raw_content="alpha", title="S1", ) assert normal.provisional is False assert normal.frame_template_id != "__empty__" def test_u4_empty_shell_phase_z_status_outside_mvp1_allowed(): """The empty-shell unit's phase_z_status ('empty_shell') must NOT be inside MVP1_ALLOWED_STATUSES. If it were, future code that loops over units filtered by allowed_statuses would treat the shell as a normal matched zone — defeating the "needs adaptation" signal. This test pins the contract at the composition-test level so a status rename in the pipeline cannot silently leak the shell into normal flows. """ # _ALLOWED_STATUSES mirrors the pipeline's MVP1_ALLOWED_STATUSES # ({"matched_zone", "adapt_matched_zone"}). The shell uses a distinct # status so downstream filters reject it. assert "empty_shell" not in _ALLOWED_STATUSES # ════════════════════════════════════════════════════════════════════════ # u5 — zones_data carries provisional flag; slide_base.html zone div adds # zone--provisional class + inline needs-adaptation badge # ════════════════════════════════════════════════════════════════════════ # # u5 wires the unit.provisional signal (set by u2 from V4Match.provisional in # u1, or directly by u4 empty-shell synthesis) through the zones_data payload # into the slide_base.html template. Visual contract: # - zones_data[i]['provisional'] = bool (default False; True only for IMP-30 # opt-in synthesized units). # - slide_base.html zone div gets `zone--provisional` class when True; an # inline `needs adaptation` # element is rendered inside the zone (top-right corner via absolute pos). # - data-provisional="1" attribute set for downstream selectors / overflow # checker / e2e tooling. # # The composition / pipeline-level handoff is exercised by u3 / u4 already. # u5 tests focus on: # - template-rendering output: class + badge HTML correctly emitted ONLY when # zones[i].provisional is truthy. (default-off path unchanged.) # - byte-equivalence: non-provisional zones render the same div shape as # pre-u5 (just no zone--provisional class / no badge element). import re from pathlib import Path from jinja2 import Environment, FileSystemLoader, select_autoescape # ─── u5 helpers ──────────────────────────────────────────────────────── def _render_slide_base(zones: list[dict], *, layout_preset: str = "single", layout_css: dict | None = None) -> str: """Render templates/phase_z2/slide_base.html directly via Jinja2 with a minimal zones list. Bypasses render_slide() so u5 can exercise the template-only contract without spinning up the full pipeline (no mapper, no contracts, no token CSS loader). slot_payload / partial_html are stubbed to fixed strings so the test focuses on zone div attributes.""" template_dir = Path(__file__).resolve().parents[1] / "templates" / "phase_z2" env = Environment( loader=FileSystemLoader(str(template_dir)), autoescape=select_autoescape(["html"]), ) if layout_css is None: layout_css = { "cols": "1fr", "rows": "1fr", "areas": '"single"', } # Each zone needs a partial_html (render_slide normally populates this). # Use a stable placeholder per zone so the assertion can target zone-level # attributes without coupling to frame template internals. for z in zones: z.setdefault("partial_html", "
stub
") base = env.get_template("slide_base.html") return base.render( slide_title="IMP-30 u5 test slide", slide_footer=None, zones=zones, layout_preset=layout_preset, layout_css=layout_css, gap_px=12, token_css="", # empty token CSS — not under test here embedded_mode="standalone", ) def _zone_div_for_position(html: str, position: str) -> str: """Return the opening tag + immediate inner content (up to but not including partial_html) for the zone div at a given `data-zone-position` value. Tight enough for class/attribute assertions, lenient enough not to depend on partial_html internals.""" pattern = re.compile( r'
]*>' r'(?:\s*]*>[^<]*)?', re.DOTALL, ) match = pattern.search(html) if not match: return "" return match.group(0) def _all_zone_div_openings(html: str) -> list[str]: """Return every zone-div opening tag in the layout body. Used to scope class / attribute assertions away from the CSS