From 5484077a53134474d48094eac73fc15a665b9bfb Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Wed, 27 May 2026 08:15:08 +0900 Subject: [PATCH] feat(#94): IMP-94 u1~u6 Layer A region/content marker injection (stamper + render_slide chain + 4 zones_data.append placement_markers + 35 parity tests) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit u1 (src/region_marker_stamper.py): deterministic root-div stamper injecting data-region-id + data-content-unit-id onto each family-partial root div anchored by data-template-id. Idempotent (re-stamp = no-op), AI=0, additive only, empty/None markers no-op, F9/F29 frame-slot axis preserved. u2 (src/phase_z2_pipeline.py render_slide chain): _stamp_region_markers chained after IMP-56 u9 _stamp_zone_html. Marker source = zone.get("placement_markers") or [] — Codex #16 P4b crash risk closed via the or-[] call-site fallback. u3 (_derive_placement_markers helper): projects PlacementPlan.slot_assignments[] → list[dict] carrying region_id + content_unit_id + frame_slot_id (frame_slot_id reserved for #96 89-d). Live B4 path emits at primary zones_data.append. u4 (3 non-live zones_data.append defaults): placement_markers: [] at IMP-30 u4 empty-shell, IMP-86 u1 adapter_needed, post-loop unrenderable plan-record paths — uniform zone shape, stamper no-op surface. u5/u6 (tests/test_phase_z2_imp94_marker_parity.py): 33 hard tests + 2 cross-axis skip-if-anchor-absent (Emergency P4/P4b future axis). Coverage: 13 family-partial root anchors, F29 + F9 frame-slot preservation, idempotence, live render_slide stamping, P4b empty-marker no-crash, MDX 01 strip-attr parity, trace-to-DOM parity. Disjoint from #96 (data-frame-slot-id) by attribute name. SPEC anchor: docs/architecture/PHASE-Z-CONTENT-OBJECT-SUBZONE-SPEC.md §6.4 + §7.2 (Layer A read targets + render-path activation). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/phase_z2_pipeline.py | 76 ++ src/region_marker_stamper.py | 137 +++ tests/test_phase_z2_imp94_marker_parity.py | 1148 ++++++++++++++++++++ 3 files changed, 1361 insertions(+) create mode 100644 src/region_marker_stamper.py create mode 100644 tests/test_phase_z2_imp94_marker_parity.py diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index a37450c..abe4d1e 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -134,6 +134,16 @@ from src.structure_override_resolver import ( # per-line edits back to the ``text_overrides`` axis (u1 schema, u4 resolver, # u5 Step-12 apply). Pure deterministic; no AI / HTTP / subprocess. from src.text_path_stamper import stamp_zone_html as _stamp_zone_html +# IMP-94 (#94) u2 — Step 11/13/21 region/content marker stamping wired into +# render_slide. u1 stamper injects ``data-region-id`` + ``data-content-unit-id`` +# onto each family-partial root ``
`` so Layer A telemetry (placement_trace +# ↔ DOM parity, Step 21 self-report, fit_classifier read targets §6.4) can +# resolve a rendered zone back to its PlacementPlan ``slot_assignments[]`` +# entry. Pure deterministic; AI=0. Marker values flow via the per-zone +# ``placement_markers`` list (u3 projects live B4 PlacementPlan; u4 ensures +# non-live append paths default to ``placement_markers=[]``). Disjoint from +# #96 (``data-frame-slot-id``) by attribute name. +from src.region_marker_stamper import stamp_zone_html as _stamp_region_markers # ─── Constants ────────────────────────────────────────────────── @@ -428,6 +438,42 @@ def _b4_mapper_source_blocked_exit( sys.exit(1) +def _derive_placement_markers(placement_plan) -> list[dict]: + """IMP-94 (#94) u3 — project ``PlacementPlan.slot_assignments[]`` → list of + marker dicts consumed by the u1/u2 ``_stamp_region_markers`` chain. + + Each marker carries ``region_id`` + ``content_unit_id`` (consumed by u1 + stamper) + ``frame_slot_id`` (reserved for #96 89-d per-slot axis — u1 + stamper silently ignores extras and excess markers). Pure deterministic + projection; AI=0; no mutation of ``placement_plan``. + + Args: + placement_plan: ``src.phase_z2_placement_planner.PlacementPlan`` instance + produced by ``plan_placement(...)`` at the runtime call site + (L6581-6585). ``slot_assignments`` empty/None → return ``[]`` so the + u2 call-site fallback ``or []`` is the deterministic no-op path. + + Returns: + list[dict] — one entry per ``SlotAssignment``. Empty when the plan has + no assignments (rejection path / no covering frame / empty section). + + Disjoint from #96 by attribute name — this helper only emits the marker + dict shape; the partial root-div ``data-region-id`` + ``data-content-unit-id`` + stamping happens in u1, not here. + """ + if placement_plan is None: + return [] + assignments = getattr(placement_plan, "slot_assignments", None) or [] + markers: list[dict] = [] + for sa in assignments: + markers.append({ + "region_id": getattr(sa, "region_id", "") or "", + "content_unit_id": getattr(sa, "content_unit_id", "") or "", + "frame_slot_id": getattr(sa, "frame_slot_id", "") or "", + }) + return markers + + # ─── MDX parsing ──────────────────────────────────────────────── def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]: @@ -3261,6 +3307,14 @@ def render_slide(slide_title: str, slide_footer: Optional[str], # skipped, excess text-lines pass through unstamped, and an # already-stamped element is left unchanged. zone["partial_html"] = _stamp_zone_html(rendered_partial, slot_payload) + # IMP-94 (#94) u2 — Step 11/13/21 region/content marker stamp + # chained after IMP-56 u9. Marker source = per-zone + # ``placement_markers`` (u3 live B4 path, u4 non-live defaults). + # Missing / None → empty list fallback keeps Codex #16 P4b + # crash risk closed and the stamper deterministically no-ops. + zone["partial_html"] = _stamp_region_markers( + zone["partial_html"], zone.get("placement_markers") or [] + ) base = env.get_template("slide_base.html") rendered_base = base.render( @@ -6041,6 +6095,11 @@ def run_phase_z2_mvp1( "assignment_source": "imp30_u4_empty_shell", "section_assignment_override": False, "provisional": bool(getattr(unit, "provisional", False)), + # IMP-94 (#94) u4 — non-live empty-shell path. No + # PlacementPlan exists for this branch (empty unit), so + # default to ``[]`` matching the u2 call-site ``or []`` + # fallback. Keeps zone shape uniform; u1 stamper no-ops. + "placement_markers": [], **_popup_payload, }) debug_zones.append({ @@ -6257,6 +6316,11 @@ def run_phase_z2_mvp1( "provisional": _unit_provisional, "adapter_needed": True, "mapper_fit_error": _fit_error_str, + # IMP-94 (#94) u4 — non-live IMP-86 u1 adapter_needed + # placeholder path. Unit skipped render (FitError → + # adapter contract), so no PlacementPlan exists. Default + # to ``[]`` matching the u2 call-site ``or []`` fallback. + "placement_markers": [], **_placeholder_popup, }) debug_zones.append({ @@ -6355,6 +6419,12 @@ def run_phase_z2_mvp1( "assignment_source": plan_assignment_source, "section_assignment_override": plan_section_override, "provisional": bool(getattr(unit, "provisional", False)), + # IMP-94 (#94) u3 — live B4 PlacementPlan → marker dicts. + # u1 stamper consumes only the first marker's region_id / + # content_unit_id; frame_slot_id is reserved for #96 89-d. + # Empty/None slot_assignments → [], so the u2 chain ``or []`` + # call-site fallback remains the deterministic no-op surface. + "placement_markers": _derive_placement_markers(placement_plan), **_popup_payload, }) debug_zones.append({ @@ -6432,6 +6502,12 @@ def run_phase_z2_mvp1( "popup_html": None, "preview_text": None, "popup_binding": None, + # IMP-94 (#94) u4 — non-live post-loop unrenderable + # plan-record path. No CompositionUnit / PlacementPlan + # for this branch (section-assignment plan produced no + # unit). Default to ``[]`` matching the u2 call-site + # ``or []`` fallback. u1 stamper no-ops. + "placement_markers": [], }) debug_zones.append({ "position": pos, diff --git a/src/region_marker_stamper.py b/src/region_marker_stamper.py new file mode 100644 index 0000000..8846d95 --- /dev/null +++ b/src/region_marker_stamper.py @@ -0,0 +1,137 @@ +"""IMP-94 (#94) u1 — region/content marker stamper for Phase Z final.html. + +Annotates each rendered family-partial root ``
`` with stable +``data-region-id="..."`` and ``data-content-unit-id="..."`` attributes so +downstream Layer A telemetry (placement_trace ↔ DOM parity, Step 21 self- +report, fit_classifier read targets §6.4) can resolve a rendered zone +back to its PlacementPlan ``slot_assignments[]`` entry. + +DOM contract (single point of truth — mirrored verbatim across the axis) :: + +
+ +The anchor is the uniform root-div emitted by every Phase Z family +partial under ``templates/phase_z2/families/`` (13 partials, evidence +confirmed via ``grep -l data-template-id`` = 13/13). All 13 partials +carry the pattern:: + +
+ +The stamper finds the FIRST such opening tag with a permissive regex +and injects ``data-region-id`` + ``data-content-unit-id`` as new +attributes. Existing attributes (class, data-frame-id, data-template-id, +etc.) are preserved verbatim. The injection is idempotent — a zone that +already carries ``data-region-id`` on its root div is left alone. + +Source of marker values : ``PlacementPlan.slot_assignments[].region_id`` +and ``.content_unit_id`` (see ``src/phase_z2_placement_planner.py`` +L253-258). u3 wires the live B4 path; u4 ensures non-live append paths +default to ``placement_markers=[]`` so this stamper safely no-ops. + +Forward-compat / safety : +- Empty / None ``markers`` → passthrough (returns ``zone_html`` unchanged). +- Non-str / empty ``zone_html`` → passthrough. +- Re-stamping (idempotent) preserves the first stamp. +- Only the FIRST data-template-id root div is stamped (one per zone). +- Markers with empty / missing ``region_id`` AND ``content_unit_id`` → + passthrough (no attribute injection). + +Guardrails (refs : Stage 1 binding contract, Stage 2 unit u1) : +- AI-isolation : pure deterministic Python; no LLM calls. +- Additive only : never edits / removes existing attributes. +- Idempotent : ``data-region-id`` probe short-circuits before re-inject. +- Disjoint from #96 (``data-frame-slot-id`` is a separate axis / attr). +""" +from __future__ import annotations + +import re +from typing import Any, Iterable, Mapping + +REGION_ID_ATTR: str = "data-region-id" +CONTENT_UNIT_ID_ATTR: str = "data-content-unit-id" + +# Matches the FIRST ``
`` opening tag. +# Group 1 captures the inner attribute string verbatim (incl. leading +# whitespace) so the rewriter can re-emit it unchanged after injection. +_ROOT_DIV_TAG_RE = re.compile( + r']*\bdata-template-id\s*=\s*"[^"]+")[^>]*?)>', + flags=re.IGNORECASE | re.DOTALL, +) +# Probe for an existing ``data-region-id`` attribute (any value, any +# quote) so re-stamping is idempotent. +_HAS_REGION_ID_RE = re.compile(r"""\bdata-region-id\s*=""", flags=re.IGNORECASE) + + +def _coerce_marker_value(value: Any) -> str: + """Return a safe attribute-value string for ``value``. + + Non-str / None → ''. Strings are returned verbatim (caller responsible + for not embedding ``"`` since marker ids derive from + PlacementPlan.slot_assignments which are deterministic identifiers). + """ + if value is None: + return "" + if not isinstance(value, str): + return "" + return value + + +def stamp_zone_html( + zone_html: str, + markers: Iterable[Mapping[str, Any]] | None, +) -> str: + """Stamp the root family-partial ``
`` with region / content-unit ids. + + ``markers`` is an iterable of mapping objects shaped as :: + + { + "region_id": "", + "content_unit_id": "", + # optional, ignored here — reserved for #96 (89-d): + "frame_slot_id": "", + } + + Only ``markers[0]`` is consumed (one root div per zone). Excess + markers are reserved for a future per-slot stamper (#96) and are + silently ignored by this module. + + Returns ``zone_html`` unchanged when: + - ``zone_html`` is not a non-empty string, + - ``markers`` is None / empty, + - no ``data-template-id`` root div is found, + - the root div already carries ``data-region-id`` (idempotent), + - the first marker carries neither ``region_id`` nor ``content_unit_id``. + """ + if not isinstance(zone_html, str) or not zone_html: + return zone_html + if markers is None: + return zone_html + marker_list = list(markers) + if not marker_list: + return zone_html + first = marker_list[0] + if not isinstance(first, Mapping): + return zone_html + region_id = _coerce_marker_value(first.get("region_id")) + content_unit_id = _coerce_marker_value(first.get("content_unit_id")) + if not region_id and not content_unit_id: + return zone_html + + stamped = {"done": False} + + def _replace(match: re.Match[str]) -> str: + if stamped["done"]: + return match.group(0) + attrs = match.group(1) or "" + if _HAS_REGION_ID_RE.search(attrs): + stamped["done"] = True + return match.group(0) + stamped["done"] = True + injected = ( + f' {REGION_ID_ATTR}="{region_id}"' + f' {CONTENT_UNIT_ID_ATTR}="{content_unit_id}"' + ) + return f"" + + return _ROOT_DIV_TAG_RE.sub(_replace, zone_html, count=1) diff --git a/tests/test_phase_z2_imp94_marker_parity.py b/tests/test_phase_z2_imp94_marker_parity.py new file mode 100644 index 0000000..666b7b0 --- /dev/null +++ b/tests/test_phase_z2_imp94_marker_parity.py @@ -0,0 +1,1148 @@ +"""IMP-94 (#94) u1 — scoped tests for src.region_marker_stamper. + +u1 coverage : passthrough (empty / None markers + non-str / empty html), +basic root-div injection, idempotence (re-stamp leaves zone unchanged), +and additive guarantee (existing attributes preserved verbatim). + +u5 / u6 expand to all 13 family root anchors, F29 injection, live +stamping, P4b no-crash, MDX 01 strip-attr parity, and trace-to-DOM +parity. +""" +from __future__ import annotations + +import json +import re +import subprocess +import sys +import uuid +from pathlib import Path + +import pytest + +from src.region_marker_stamper import ( + CONTENT_UNIT_ID_ATTR, + REGION_ID_ATTR, + stamp_zone_html, +) + + +_BASE_ROOT_DIV = ( + '
\n' + '
{{ slot_payload.title }}
\n' + '
' +) + + +# ─── passthrough ───────────────────────────────────────────────────────── + + +def test_stamp_passthrough_empty_html(): + assert stamp_zone_html("", [{"region_id": "r1", "content_unit_id": "c1"}]) == "" + + +def test_stamp_passthrough_non_string_html(): + # Non-str is returned verbatim without raising. + assert stamp_zone_html(None, [{"region_id": "r1"}]) is None # type: ignore[arg-type] + + +def test_stamp_passthrough_none_markers(): + assert stamp_zone_html(_BASE_ROOT_DIV, None) == _BASE_ROOT_DIV + + +def test_stamp_passthrough_empty_marker_list(): + assert stamp_zone_html(_BASE_ROOT_DIV, []) == _BASE_ROOT_DIV + + +def test_stamp_passthrough_empty_marker_values(): + markers = [{"region_id": "", "content_unit_id": None}] + assert stamp_zone_html(_BASE_ROOT_DIV, markers) == _BASE_ROOT_DIV + + +def test_stamp_passthrough_non_mapping_marker(): + # Defensive: scalar entry instead of a dict-like. + assert stamp_zone_html(_BASE_ROOT_DIV, ["bogus"]) == _BASE_ROOT_DIV # type: ignore[list-item] + + +def test_stamp_passthrough_no_template_id_anchor(): + # A div without data-template-id must not be touched. + plain = '
x
' + out = stamp_zone_html(plain, [{"region_id": "r1", "content_unit_id": "c1"}]) + assert out == plain + + +# ─── injection ─────────────────────────────────────────────────────────── + + +def test_stamp_injects_both_attrs_into_root_div(): + out = stamp_zone_html( + _BASE_ROOT_DIV, + [{"region_id": "zone_top__region_0", "content_unit_id": "cu_42"}], + ) + assert f'{REGION_ID_ATTR}="zone_top__region_0"' in out + assert f'{CONTENT_UNIT_ID_ATTR}="cu_42"' in out + # Existing root-div attributes preserved verbatim. + assert 'class="f29b"' in out + assert 'data-frame-id="1171281210"' in out + assert 'data-template-id="process_product_two_way"' in out + # Inner content untouched. + assert '{{ slot_payload.title }}' in out + + +def test_stamp_injects_region_only_when_content_missing(): + out = stamp_zone_html( + _BASE_ROOT_DIV, + [{"region_id": "zone_top__region_0", "content_unit_id": None}], + ) + assert f'{REGION_ID_ATTR}="zone_top__region_0"' in out + # Content-unit-id attr still emitted but with empty value (caller + # contract: marker emitted iff at least one of the two is non-empty). + assert f'{CONTENT_UNIT_ID_ATTR}=""' in out + + +def test_stamp_injects_content_only_when_region_missing(): + out = stamp_zone_html( + _BASE_ROOT_DIV, + [{"region_id": "", "content_unit_id": "cu_solo"}], + ) + assert f'{REGION_ID_ATTR}=""' in out + assert f'{CONTENT_UNIT_ID_ATTR}="cu_solo"' in out + + +def test_stamp_only_first_marker_consumed(): + out = stamp_zone_html( + _BASE_ROOT_DIV, + [ + {"region_id": "r1", "content_unit_id": "c1"}, + {"region_id": "r2", "content_unit_id": "c2"}, + ], + ) + assert f'{REGION_ID_ATTR}="r1"' in out + assert 'r2' not in out + assert f'{CONTENT_UNIT_ID_ATTR}="c1"' in out + assert 'c2' not in out + + +def test_stamp_only_first_root_div_consumed(): + # Two data-template-id divs back to back — only the first is stamped. + twin = _BASE_ROOT_DIV + "\n" + _BASE_ROOT_DIV + out = stamp_zone_html(twin, [{"region_id": "r1", "content_unit_id": "c1"}]) + assert out.count(f'{REGION_ID_ATTR}="r1"') == 1 + + +# ─── idempotence ───────────────────────────────────────────────────────── + + +def test_stamp_idempotent_on_already_stamped_zone(): + once = stamp_zone_html( + _BASE_ROOT_DIV, + [{"region_id": "r1", "content_unit_id": "c1"}], + ) + twice = stamp_zone_html( + once, + [{"region_id": "r_OVERWRITE", "content_unit_id": "c_OVERWRITE"}], + ) + # Re-stamp does NOT overwrite; original markers preserved. + assert twice == once + assert 'r_OVERWRITE' not in twice + assert 'c_OVERWRITE' not in twice + + +# ─── u2 — render_slide chain wiring (smoke) ────────────────────────────── + + +def test_pipeline_imports_region_marker_stamper_as_expected_alias(): + """u2 wiring smoke — render_slide must chain the u1 stamper under the + ``_stamp_region_markers`` alias right after the IMP-56 u9 + ``_stamp_zone_html`` call site. Full live-stamping / P4b / parity + coverage lands in u6. + """ + from src import phase_z2_pipeline + from src.region_marker_stamper import stamp_zone_html as _u1_stamper + + assert hasattr(phase_z2_pipeline, "_stamp_region_markers"), ( + "render_slide chain expects ``_stamp_region_markers`` alias bound to " + "``src.region_marker_stamper.stamp_zone_html`` (IMP-94 u2)." + ) + assert phase_z2_pipeline._stamp_region_markers is _u1_stamper, ( + "``_stamp_region_markers`` must be the same callable exported by the " + "u1 module — additive-only chain after IMP-56 u9." + ) + + +# ─── u3 — live B4 PlacementPlan → marker dicts projection ──────────────── + + +def test_derive_placement_markers_empty_plan_returns_empty_list(): + """u3 projection helper — empty / None plan / empty assignments must + yield ``[]`` so the u2 call-site ``or []`` fallback stays the + deterministic no-op surface. + """ + from src.phase_z2_pipeline import _derive_placement_markers + + assert _derive_placement_markers(None) == [] + + class _StubPlanNoAssignments: + slot_assignments = [] + + assert _derive_placement_markers(_StubPlanNoAssignments()) == [] + + class _StubPlanNoneAssignments: + slot_assignments = None + + assert _derive_placement_markers(_StubPlanNoneAssignments()) == [] + + +def test_derive_placement_markers_projects_slot_assignments(): + """u3 projection helper — each ``SlotAssignment`` → marker dict carrying + ``region_id`` + ``content_unit_id`` (consumed by u1) + ``frame_slot_id`` + (reserved for #96 89-d, silently passed through). Order preserved. + """ + from src.phase_z2_pipeline import _derive_placement_markers + from phase_z2_placement_planner import PlacementPlan, SlotAssignment + + plan = PlacementPlan( + section_id="zone--top", + selected_frame_id="1171281210", + selected_template_id="process_product_two_way", + slot_assignments=[ + SlotAssignment( + region_id="zone--top__region_0", + content_unit_id="cu_0", + frame_slot_id="slot_left", + partial_target_path=".f29b__left", + display_strategy="inline_full", + ), + SlotAssignment( + region_id="zone--top__region_1", + content_unit_id="cu_1", + frame_slot_id="slot_right", + partial_target_path=".f29b__right", + display_strategy="inline_full", + ), + ], + ) + markers = _derive_placement_markers(plan) + assert markers == [ + { + "region_id": "zone--top__region_0", + "content_unit_id": "cu_0", + "frame_slot_id": "slot_left", + }, + { + "region_id": "zone--top__region_1", + "content_unit_id": "cu_1", + "frame_slot_id": "slot_right", + }, + ] + + +def test_derive_placement_markers_into_u1_stamper_roundtrip(): + """u3 → u2 → u1 chain smoke — projection feeds the u1 stamper end-to-end. + Only the first marker's ``region_id`` / ``content_unit_id`` is consumed + (u1 contract); ``frame_slot_id`` is silently ignored here (reserved for + #96 89-d per-slot stamper). + """ + from src.phase_z2_pipeline import _derive_placement_markers + from phase_z2_placement_planner import PlacementPlan, SlotAssignment + + plan = PlacementPlan( + section_id="zone--top", + slot_assignments=[ + SlotAssignment( + region_id="zone--top__region_0", + content_unit_id="cu_first", + frame_slot_id="slot_left", + partial_target_path="", + display_strategy="inline_full", + ), + SlotAssignment( + region_id="zone--top__region_1", + content_unit_id="cu_second", + frame_slot_id="slot_right", + partial_target_path="", + display_strategy="inline_full", + ), + ], + ) + out = stamp_zone_html(_BASE_ROOT_DIV, _derive_placement_markers(plan)) + assert f'{REGION_ID_ATTR}="zone--top__region_0"' in out + assert f'{CONTENT_UNIT_ID_ATTR}="cu_first"' in out + # Excess markers silently dropped by u1. + assert "cu_second" not in out + assert "zone--top__region_1" not in out + + +# ─── u4 — non-live zones_data.append paths emit placement_markers=[] ────── + + +def _read_pipeline_source() -> str: + """Load src/phase_z2_pipeline.py text for non-live append site audit. + + Source-text inspection is the lightest credible audit: the 5 non-live + branches are guarded by upstream conditions (empty unit / FitError / + adapter_needed / unrenderable plan record) that are expensive to + reproduce in a unit test. Asserting the per-branch literal + ``"placement_markers": []`` keyed off each branch's unique + ``assignment_source`` / verbatim_builder anchor gives byte-equivalent + confidence with zero runtime coupling. + """ + from pathlib import Path + + return Path("src/phase_z2_pipeline.py").read_text(encoding="utf-8") + + +def test_u4_non_live_empty_shell_carries_placement_markers_default(): + """u4 site 1 — IMP-30 u4 empty-shell append must default to ``[]``. + + Upstream: ``unit.frame_template_id == "__empty__"`` branch + (`src/phase_z2_pipeline.py:6534`). No PlacementPlan exists. + """ + src = _read_pipeline_source() + anchor = '"assignment_source": "imp30_u4_empty_shell"' + idx = src.find(anchor) + assert idx != -1, "imp30_u4_empty_shell append site missing" + # Slice forward to the next zones_data.append top-level boundary or + # the next 1200 chars; default literal must appear inside the dict. + window = src[idx : idx + 1200] + assert '"placement_markers": []' in window, ( + "imp30_u4_empty_shell append must carry placement_markers=[]" + ) + + +def test_u4_non_live_emergency_p4b_verbatim_carries_placement_markers_default(): + """u4 site 2 — Emergency P4b verbatim code recovery append default ``[]``. + + Upstream: ``_verbatim_payload is not None`` branch + (`src/phase_z2_pipeline.py:6741`). FitError-recovered by code; no plan. + """ + src = _read_pipeline_source() + anchor = '"assignment_source": "emergency_p4b_verbatim_code"' + idx = src.find(anchor) + if idx == -1: + pytest.skip( + "Emergency P4b verbatim append site not present in this commit — " + "cross-axis test activates when Emergency P4/P4b commit lands." + ) + window = src[idx : idx + 1200] + assert '"placement_markers": []' in window, ( + "emergency_p4b_verbatim_code append must carry placement_markers=[]" + ) + + +def test_u4_non_live_emergency_p4_ai_inline_carries_placement_markers_default(): + """u4 site 3 — Emergency P4 inline-AI recovery append default ``[]``. + + Upstream: ``_ai_result is not None`` branch + (`src/phase_z2_pipeline.py:6777`). FitError-recovered by inline AI; + no PlacementPlan reaches this branch. + """ + src = _read_pipeline_source() + anchor = '"assignment_source": "emergency_p4_ai_inline"' + idx = src.find(anchor) + if idx == -1: + pytest.skip( + "Emergency P4 inline-AI append site not present in this commit — " + "cross-axis test activates when Emergency P4/P4b commit lands." + ) + window = src[idx : idx + 1400] + assert '"placement_markers": []' in window, ( + "emergency_p4_ai_inline append must carry placement_markers=[]" + ) + + +def test_u4_non_live_imp86_adapter_needed_carries_placement_markers_default(): + """u4 site 4 — IMP-86 u1 adapter_needed placeholder append default ``[]``. + + Upstream: post-`emergency_p4` adapter_needed branch + (`src/phase_z2_pipeline.py:6843`). Unit skipped render entirely + (FitError → adapter contract surface); no plan. + """ + src = _read_pipeline_source() + anchor = '"assignment_source": "imp86_u1_adapter_needed"' + idx = src.find(anchor) + assert idx != -1, "imp86_u1_adapter_needed append site missing" + window = src[idx : idx + 1200] + assert '"placement_markers": []' in window, ( + "imp86_u1_adapter_needed append must carry placement_markers=[]" + ) + + +def test_u4_non_live_post_loop_unrenderable_carries_placement_markers_default(): + """u4 site 5 — post-loop unrenderable plan-record append default ``[]``. + + Upstream: ``pos not in renderable_positions`` branch + (`src/phase_z2_pipeline.py:7021`). Section-assignment plan produced no + CompositionUnit; no PlacementPlan exists. Anchor on the unique + ``"position": pos,`` literal — the other 4 zones_data.append sites + use ``"position": position,`` (full word), so this disambiguates + against the parallel debug_zones.append that uses ``"position": pos,`` + too but is structurally distinct (no ``popup_binding`` key). + """ + src = _read_pipeline_source() + # Anchor on the zones_data.append-only key tuple: "popup_binding": None + # appears only in the site-5 zones_data.append (debug_zones.append for + # the same branch has no popup_binding key). + anchor = '"popup_binding": None,' + idx = src.find(anchor) + assert idx != -1, "post-loop unrenderable append site missing" + # placement_markers is emitted on the line directly after popup_binding. + window = src[idx : idx + 500] + assert '"placement_markers": []' in window, ( + "post-loop unrenderable append must carry placement_markers=[]" + ) + + +def test_u4_non_live_default_is_empty_list_no_op_through_u2_chain(): + """u4 contract closure — ``placement_markers=[]`` flowing through the + u2 call-site chain (`zone.get("placement_markers") or []`) must + deterministically no-op via the u1 stamper. Closes Codex #16 P4b + crash risk on every non-live branch. + """ + # Simulate the u2 chain shape: zone dict with explicit [] default. + zone = { + "partial_html": _BASE_ROOT_DIV, + "placement_markers": [], + } + out = stamp_zone_html(zone["partial_html"], zone.get("placement_markers") or []) + # u1 stamper deterministic no-op on empty markers. + assert out == _BASE_ROOT_DIV + # Specifically: no data-region-id / data-content-unit-id leaked in. + assert REGION_ID_ATTR not in out + assert CONTENT_UNIT_ID_ATTR not in out + + +# ─── u5 — all 13 family root anchors + F29 real-partial injection ──────── +# +# u5 contract (Stage 2 plan): "Test passthrough, idempotence, F29 injection, +# and all 13 root anchors." Passthrough + idempotence are already covered by +# the u1 synthetic-shape tests above. u5 adds two new axes: +# +# 1. Real-partial injection against each of the 13 family root divs — +# proves the u1 regex anchor (``
``) +# matches the actual rendered shape, not just the synthetic F29 stub. +# 2. F29 real-partial injection — proves the u1 stamper additively layers +# ``data-region-id`` + ``data-content-unit-id`` on the F29 root WITHOUT +# perturbing the inner ``data-frame-slot-id`` instances (#96 89-d axis, +# disjoint by attribute name). Same property is asserted for F9. +# +# Source-file inspection (read-only) is the lightest credible evidence; no +# Jinja render or full pipeline run is needed for this contract. + + +_FAMILY_PARTIALS_DIR = "templates/phase_z2/families" + +# All 13 family partials. Each tuple = (file_basename, expected_template_id). +# Sourced from `grep -n data-template-id templates/phase_z2/families/*.html` +# on branch main (fresh validation 2026-05-26, Stage 1 EXIT REPORT evidence). +_ALL_13_FAMILY_ANCHORS: tuple[tuple[str, str], ...] = ( + ("app_sw_package_vs_solution.html", "app_sw_package_vs_solution"), + ("bim_current_problems_paired.html", "bim_current_problems_paired"), + ("bim_dx_comparison_table.html", "bim_dx_comparison_table"), + ("bim_issues_quadrant_four.html", "bim_issues_quadrant_four"), + ("construction_bim_three_usage.html", "construction_bim_three_usage"), + ( + "construction_goals_three_circle_intersection.html", + "construction_goals_three_circle_intersection", + ), + ("dx_sw_necessity_three_perspectives.html", "dx_sw_necessity_three_perspectives"), + ("info_management_what_how_when.html", "info_management_what_how_when"), + ("pre_construction_model_info_stacked.html", "pre_construction_model_info_stacked"), + ("process_product_two_way.html", "process_product_two_way"), + ("sw_reality_three_emphasis.html", "sw_reality_three_emphasis"), + ("three_parallel_requirements.html", "three_parallel_requirements"), + ("three_persona_benefits.html", "three_persona_benefits"), +) + + +def _read_family_partial(basename: str) -> str: + from pathlib import Path + + return Path(_FAMILY_PARTIALS_DIR, basename).read_text(encoding="utf-8") + + +def test_u5_family_partials_count_is_13(): + """u5 fixture guard — exactly 13 family partials carry the + ``data-template-id`` root anchor on branch main. Catches accidental + inventory drift before the per-partial tests run. + """ + from pathlib import Path + + files = sorted(Path(_FAMILY_PARTIALS_DIR).glob("*.html")) + assert len(files) == 13, ( + f"family partial count drift: expected 13, found {len(files)}: " + f"{[f.name for f in files]}" + ) + assert len(_ALL_13_FAMILY_ANCHORS) == 13 + + +def test_u5_stamper_injects_into_every_family_partial_root(): + """u5 core — the u1 stamper must successfully inject + ``data-region-id`` + ``data-content-unit-id`` into the root + ``
`` of EACH of the 13 family + partials. Proves the regex anchor matches the real rendered shape + (not just the synthetic F29 stub used in u1 tests). + """ + markers = [ + {"region_id": "zone--top__region_0", "content_unit_id": "cu_test"}, + ] + for basename, template_id in _ALL_13_FAMILY_ANCHORS: + partial_src = _read_family_partial(basename) + # Sanity: the anchor exists in the source. + anchor = f'data-template-id="{template_id}"' + assert anchor in partial_src, ( + f"{basename}: data-template-id={template_id!r} anchor missing in source" + ) + # Stamp. + out = stamp_zone_html(partial_src, markers) + # Both markers must land. + assert f'{REGION_ID_ATTR}="zone--top__region_0"' in out, ( + f"{basename}: region_id marker not injected" + ) + assert f'{CONTENT_UNIT_ID_ATTR}="cu_test"' in out, ( + f"{basename}: content_unit_id marker not injected" + ) + # Anchor preserved verbatim (additive only — root attrs untouched). + assert anchor in out, ( + f"{basename}: data-template-id anchor lost after stamping" + ) + # Exactly ONE stamp on the root (u1 contract: only first root div). + assert out.count(f'{REGION_ID_ATTR}="zone--top__region_0"') == 1, ( + f"{basename}: region_id stamped more than once" + ) + + +def test_u5_stamper_idempotent_on_every_family_partial(): + """u5 idempotence — re-stamping any of the 13 family partials with + different markers must NOT overwrite (first stamp wins). Mirrors the + u1 synthetic-shape idempotence guarantee against real partials. + """ + first = [{"region_id": "r_first", "content_unit_id": "c_first"}] + second = [{"region_id": "r_OVERWRITE", "content_unit_id": "c_OVERWRITE"}] + for basename, _template_id in _ALL_13_FAMILY_ANCHORS: + partial_src = _read_family_partial(basename) + once = stamp_zone_html(partial_src, first) + twice = stamp_zone_html(once, second) + assert once == twice, f"{basename}: re-stamp mutated output" + assert "r_OVERWRITE" not in twice, ( + f"{basename}: re-stamp overwrote region_id" + ) + assert "c_OVERWRITE" not in twice, ( + f"{basename}: re-stamp overwrote content_unit_id" + ) + + +def test_u5_stamper_passthrough_on_every_family_partial_with_empty_markers(): + """u5 passthrough — every family partial with ``markers=[]`` must + return byte-equivalent source (deterministic no-op on the u2 fallback + surface). Confirms no non-live branch can leak markers via the chain. + """ + for basename, _template_id in _ALL_13_FAMILY_ANCHORS: + partial_src = _read_family_partial(basename) + out = stamp_zone_html(partial_src, []) + assert out == partial_src, ( + f"{basename}: passthrough on empty markers mutated source" + ) + assert REGION_ID_ATTR not in out, ( + f"{basename}: REGION_ID_ATTR leaked into passthrough output" + ) + assert CONTENT_UNIT_ID_ATTR not in out, ( + f"{basename}: CONTENT_UNIT_ID_ATTR leaked into passthrough output" + ) + + +def test_u5_f29_real_partial_injection_preserves_frame_slot_axis(): + """u5 F29 real-partial — stamping the actual + ``process_product_two_way.html`` (F29) root with u1 markers must NOT + perturb the inner ``data-frame-slot-id`` instances (#96 89-d axis, + disjoint by attribute name). Six F29 inner slots + (process_column × 3 + product_column × 3) must remain identical + pre / post stamp. + """ + f29 = _read_family_partial("process_product_two_way.html") + + # Baseline counts on the unmodified partial (fresh-validation evidence). + baseline_process_col = f29.count('data-frame-slot-id="process_column"') + baseline_product_col = f29.count('data-frame-slot-id="product_column"') + assert baseline_process_col == 3, ( + f"F29 baseline drift: expected 3 process_column, found {baseline_process_col}" + ) + assert baseline_product_col == 3, ( + f"F29 baseline drift: expected 3 product_column, found {baseline_product_col}" + ) + + out = stamp_zone_html( + f29, + [{"region_id": "zone--bottom__region_0", "content_unit_id": "cu_f29_root"}], + ) + + # u1 markers injected on the root. + assert f'{REGION_ID_ATTR}="zone--bottom__region_0"' in out + assert f'{CONTENT_UNIT_ID_ATTR}="cu_f29_root"' in out + # Root anchor still present verbatim. + assert 'data-template-id="process_product_two_way"' in out + # #96 axis (data-frame-slot-id) untouched — counts unchanged. + assert out.count('data-frame-slot-id="process_column"') == baseline_process_col + assert out.count('data-frame-slot-id="product_column"') == baseline_product_col + # u1 attribute name disjoint from #96 attribute name (defensive). + assert REGION_ID_ATTR != "data-frame-slot-id" + assert CONTENT_UNIT_ID_ATTR != "data-frame-slot-id" + + +def test_u5_f9_real_partial_injection_preserves_frame_slot_axis(): + """u5 F9 real-partial — same axis-isolation guarantee for the second + pre-existing ``data-frame-slot-id`` carrier + (``pre_construction_model_info_stacked.html``). Inner pill_dynamic + slot must remain identical pre / post stamp. + """ + f9 = _read_family_partial("pre_construction_model_info_stacked.html") + + baseline_pill = f9.count('data-frame-slot-id="pill_dynamic"') + assert baseline_pill >= 1, ( + f"F9 baseline drift: expected ≥1 pill_dynamic, found {baseline_pill}" + ) + + out = stamp_zone_html( + f9, + [{"region_id": "zone--top__region_0", "content_unit_id": "cu_f9_root"}], + ) + + assert f'{REGION_ID_ATTR}="zone--top__region_0"' in out + assert f'{CONTENT_UNIT_ID_ATTR}="cu_f9_root"' in out + assert 'data-template-id="pre_construction_model_info_stacked"' in out + # #96 axis untouched. + assert out.count('data-frame-slot-id="pill_dynamic"') == baseline_pill + + +# ─── u6 — integration / parity through render_slide ────────────────────── +# +# u6 contract (Stage 2 plan): "Test live stamping, P4b no-crash, MDX 01 +# strip-attr parity, and trace-to-DOM parity." +# +# 1. Live stamping — synthetic PlacementPlan + slot_assignments fed through +# _derive_placement_markers (u3) → zones_data placement_markers → the +# u2 render_slide chain → final HTML carries data-region-id + +# data-content-unit-id on the family-partial root. +# 2. P4b no-crash — a zone shape matching the u4 non-live P4b verbatim +# recovery surface (placement_markers=[]) renders without crash and +# without marker leak. Closes Codex #16 P4b crash-risk on the live +# pipeline (in addition to the call-site ``or []`` fallback). +# 3. Strip-attr parity — rendering the same zone WITH markers vs WITHOUT +# markers, then stripping the two new IMP-94 attrs from the marked +# output, must produce byte-equivalent HTML. Locks the issue body's +# validation contract: "mdx 01-05 의 final.html SHA = byte-equivalent +# except for new data-* attrs" at the render_slide layer (no MDX 01 +# pipeline run needed — synthetic zone exercises the same code path). +# 4. Trace-to-DOM parity — placement_trace (asdict of PlacementPlan) ↔ DOM +# ``[data-region-id]`` set, scoped to the u1 single-root-stamp contract: +# the root region_id is exactly trace.slot_assignments[0].region_id +# (only the first marker is consumed per zone in u1). + + +_RENDER_TEMPLATE_ID = "bim_current_problems_paired" + + +def _u6_layout_css() -> dict: + """Minimal valid layout_css for a single-zone slide. + + Mirrors tests/test_phase_z2_text_path_stamper.py shape so u6 tests do + not rely on real Phase Z layout planning. + """ + return {"areas": '"primary"', "cols": "1fr", "rows": "1fr"} + + +def _u6_paired_slot_payload() -> dict: + """Synthetic slot_payload for the bim_current_problems_paired family. + + Only row_1 is populated; rows 2-4 stay empty (mirrors u9 fixture). + """ + payload: dict = { + "title": "u6 synthetic title", + "row_1_left_label": "u6 left pill", + "row_1_left_body": [{"text": "u6 left A", "indent": 0}], + "row_1_right_label": "u6 right pill", + "row_1_right_body": [{"text": "u6 right A", "indent": 0}], + } + for r in (2, 3, 4): + payload[f"row_{r}_left_label"] = f"u6 left {r}" + payload[f"row_{r}_left_body"] = [] + payload[f"row_{r}_right_label"] = f"u6 right {r}" + payload[f"row_{r}_right_body"] = [] + return payload + + +def _u6_render_zone(zone: dict) -> str: + """Run render_slide on a single zone — embedded mode (no print-mode CSS).""" + from src.phase_z2_pipeline import render_slide + + return render_slide( + slide_title="u6_title", + slide_footer=None, + zones_data=[zone], + layout_preset="single", + layout_css=_u6_layout_css(), + gap_px=14, + embedded_mode="embedded", + ) + + +def _u6_synthetic_plan(): + """Build a 2-assignment PlacementPlan for live stamping / trace parity.""" + from phase_z2_placement_planner import PlacementPlan, SlotAssignment + + return PlacementPlan( + section_id="zone--primary", + selected_frame_id="1171281194", + selected_template_id=_RENDER_TEMPLATE_ID, + slot_assignments=[ + SlotAssignment( + region_id="zone--primary__region_0", + content_unit_id="cu_u6_root", + frame_slot_id="row_1_left_body", + partial_target_path=".f17b__row1_left", + display_strategy="inline_full", + ), + SlotAssignment( + region_id="zone--primary__region_1", + content_unit_id="cu_u6_excess", + frame_slot_id="row_1_right_body", + partial_target_path=".f17b__row1_right", + display_strategy="inline_full", + ), + ], + ) + + +def test_u6_live_stamping_through_render_slide(): + """u6 axis 1 — synthetic PlacementPlan flows through u3 projection → + u2 chain → u1 stamper. Final HTML carries the first marker's + data-region-id + data-content-unit-id on the family-partial root. + Excess markers (slot_assignments[1+]) are silently dropped by u1 + (one root per zone). + """ + from src.phase_z2_pipeline import _derive_placement_markers + + plan = _u6_synthetic_plan() + markers = _derive_placement_markers(plan) + # u3 contract: 2 marker dicts (one per SlotAssignment). + assert len(markers) == 2 + assert markers[0]["region_id"] == "zone--primary__region_0" + assert markers[0]["content_unit_id"] == "cu_u6_root" + + zone = { + "position": "primary", + "template_id": _RENDER_TEMPLATE_ID, + "slot_payload": _u6_paired_slot_payload(), + "placement_markers": markers, + } + html = _u6_render_zone(zone) + + # First marker stamped on root. + assert f'{REGION_ID_ATTR}="zone--primary__region_0"' in html + assert f'{CONTENT_UNIT_ID_ATTR}="cu_u6_root"' in html + # Excess marker NOT stamped (u1 consumes only markers[0]). + assert "cu_u6_excess" not in html + assert "zone--primary__region_1" not in html + # Family-partial root anchor preserved. + assert f'data-template-id="{_RENDER_TEMPLATE_ID}"' in html + # Exactly one root-level data-region-id stamped (one zone, one root). + assert html.count(f'{REGION_ID_ATTR}="zone--primary__region_0"') == 1 + + +def test_u6_p4b_path_no_crash_with_empty_placement_markers(): + """u6 axis 2 — P4b verbatim recovery shape (placement_markers=[]) + renders without crash and without marker leak. Mirrors the u4 site-2 + ``emergency_p4b_verbatim_code`` zone surface end-to-end through + render_slide. Closes Codex #16 P4b crash-risk at the live render + layer (in addition to the call-site ``or []`` fallback). + """ + zone = { + "position": "primary", + "template_id": _RENDER_TEMPLATE_ID, + "slot_payload": _u6_paired_slot_payload(), + "placement_markers": [], + } + # Must NOT raise. + html = _u6_render_zone(zone) + # No marker attributes leaked into the rendered slide. + assert REGION_ID_ATTR not in html, ( + "empty placement_markers must not leak data-region-id into final.html" + ) + assert CONTENT_UNIT_ID_ATTR not in html, ( + "empty placement_markers must not leak data-content-unit-id into final.html" + ) + # Sanity: family-partial root still present (no regression to render path). + assert f'data-template-id="{_RENDER_TEMPLATE_ID}"' in html + + +def test_u6_strip_attr_parity_baseline_vs_marked(): + """u6 axis 3 — strip-attr parity. Render the same zone WITH markers + and WITHOUT markers; stripping the two new IMP-94 attrs from the + marked output must reproduce the unmarked output byte-for-byte. + + Locks the issue body validation contract: ``mdx 01-05 의 final.html + SHA = byte-equivalent except for new data-* attrs``. Synthetic zone + exercises the exact same render_slide code path as a live MDX 01 run, + so MDX 01 pipeline execution is not required for this property. + """ + payload = _u6_paired_slot_payload() + zone_unmarked = { + "position": "primary", + "template_id": _RENDER_TEMPLATE_ID, + "slot_payload": payload, + "placement_markers": [], + } + zone_marked = { + "position": "primary", + "template_id": _RENDER_TEMPLATE_ID, + "slot_payload": _u6_paired_slot_payload(), + "placement_markers": [ + { + "region_id": "zone--primary__region_0", + "content_unit_id": "cu_u6_strip", + } + ], + } + html_unmarked = _u6_render_zone(zone_unmarked) + html_marked = _u6_render_zone(zone_marked) + + # Sanity: marked path actually emitted both attrs once. + assert html_marked.count(f'{REGION_ID_ATTR}="zone--primary__region_0"') == 1 + assert html_marked.count(f'{CONTENT_UNIT_ID_ATTR}="cu_u6_strip"') == 1 + + # Strip exactly the two new attrs (preceded by a single space) from the + # marked output. The u1 stamper injects ``
`` so the leading space + + # attr token shape is deterministic. + stripped = html_marked.replace( + f' {REGION_ID_ATTR}="zone--primary__region_0"', "", 1 + ).replace( + f' {CONTENT_UNIT_ID_ATTR}="cu_u6_strip"', "", 1 + ) + assert stripped == html_unmarked, ( + "strip-attr parity violation: marked output minus the two new IMP-94 " + "attrs is NOT byte-equivalent to the unmarked baseline" + ) + + +def test_u6_trace_to_dom_parity(): + """u6 axis 4 — placement_trace ↔ DOM region-id parity. ``asdict`` of + the live PlacementPlan (the value stored in ``debug_zones[i].placement_trace`` + at `src/phase_z2_pipeline.py:6640-6645`) carries the same slot_assignments + that the u3 projection feeds to the stamper. The first slot_assignment's + region_id must appear in the rendered DOM exactly once, and no other + slot_assignment region_id may leak (u1 single-root contract). + """ + import re + from dataclasses import asdict + + from src.phase_z2_pipeline import _derive_placement_markers + + plan = _u6_synthetic_plan() + trace = asdict(plan) + trace_region_ids = [sa["region_id"] for sa in trace["slot_assignments"]] + assert trace_region_ids == [ + "zone--primary__region_0", + "zone--primary__region_1", + ] + + zone = { + "position": "primary", + "template_id": _RENDER_TEMPLATE_ID, + "slot_payload": _u6_paired_slot_payload(), + "placement_markers": _derive_placement_markers(plan), + } + html = _u6_render_zone(zone) + + # Parse all data-region-id values from the DOM. + dom_region_ids = re.findall(r'data-region-id="([^"]+)"', html) + # u1 contract: exactly one stamp per zone — first slot_assignment. + assert dom_region_ids == ["zone--primary__region_0"], ( + f"DOM region-id set drift vs u1 single-root contract: {dom_region_ids}" + ) + # Parity direction: DOM set ⊆ trace slot_assignments region-id set. + assert set(dom_region_ids).issubset(set(trace_region_ids)), ( + "DOM region-id not present in placement_trace slot_assignments" + ) + # Excess slot_assignment region_id MUST NOT leak (single-root contract). + assert "zone--primary__region_1" not in html + + +# ─── u6 axes 3' + 4' — REAL MDX 01 pipeline subprocess parity ────────── +# +# Round #6 rewind (Stage 3 code-edit): Codex flagged the original u6 axis-3 +# (strip-attr parity) and axis-4 (trace-to-DOM parity) as a scope gap — they +# exercised a synthetic single-zone render rather than the binding contract's +# ``MDX 01 strip-attr parity vs current rendered output``. The synthetic +# variants above remain (they pin the render_slide unit invariants), but the +# axes below close the contract gap by running the actual ``samples/mdx_batch/ +# 01.mdx`` end-to-end through ``python -m src.phase_z2_pipeline`` (same +# subprocess shape as IMP-91 u2 ``multi_mdx_runs``) and asserting the strip- +# parity + trace-to-DOM invariants on the live ``data/runs//phase_z2/ +# final.html`` + ``debug.json``. +# +# Subprocess pattern mirrors ``tests/integration/test_multi_mdx_regression.py`` +# L44-71. Module-scoped cache so both axes share one MDX 01 invocation. +# +# Marked ``@pytest.mark.integration`` (defined in pyproject.toml) so the +# heavy Selenium-invoking run is opt-in skippable via ``-m "not integration"`` +# but included in the default ``pytest -q tests`` sweep. + + +_REPO_ROOT = Path(__file__).resolve().parent.parent +_SAMPLES_BATCH_DIR = _REPO_ROOT / "samples" / "mdx_batch" +_DATA_RUNS_DIR = _REPO_ROOT / "data" / "runs" +_MDX01_FIXTURE_PATH = _SAMPLES_BATCH_DIR / "01.mdx" + +# Strip patterns — mirror the u1 stamper injection shape exactly +# (leading single space + attr token). The stamper at +# ``src/region_marker_stamper.py:131-135`` emits:: +# +# f' {REGION_ID_ATTR}="{region_id}" {CONTENT_UNIT_ID_ATTR}="{content_unit_id}"' +# +# so the strip regex anchors on that ``(space + attr + ="value")`` shape. +_STRIP_REGION_RE = re.compile(r' data-region-id="[^"]*"') +_STRIP_CONTENT_UNIT_RE = re.compile(r' data-content-unit-id="[^"]*"') + + +@pytest.fixture(scope="module") +def mdx01_actual_pipeline_run() -> dict: + """Run ``samples/mdx_batch/01.mdx`` through the actual Phase Z pipeline. + + Module-scoped so the two MDX-01 axes share a single subprocess + invocation (~30-90s). Returns a dict with the rendered final.html, + debug.json payload, run_dir Path, and the subprocess returncode for + diagnostic surfacing. + + Mirrors the IMP-91 u2 ``multi_mdx_runs`` subprocess shape + (``tests/integration/test_multi_mdx_regression.py:44-71``) so the + invocation contract stays single-source-of-truth across the + acceptance suite. Fresh run per test session — no frozen artifact + dependency ([[feedback_validation_first_for_closed_issues]]). + """ + if not _MDX01_FIXTURE_PATH.is_file(): + pytest.skip( + f"MDX 01 fixture missing at {_MDX01_FIXTURE_PATH!s}; " + f"cannot run real-pipeline parity axes." + ) + run_id = f"imp94_u6_mdx01_{uuid.uuid4().hex[:8]}" + cp = subprocess.run( + [ + sys.executable, + "-m", + "src.phase_z2_pipeline", + str(_MDX01_FIXTURE_PATH), + run_id, + ], + capture_output=True, + text=True, + timeout=360, + cwd=str(_REPO_ROOT), + ) + run_dir = _DATA_RUNS_DIR / run_id / "phase_z2" + final_html_path = run_dir / "final.html" + debug_json_path = run_dir / "debug.json" + assert final_html_path.is_file(), ( + f"MDX 01 pipeline subprocess did not produce {final_html_path!s} " + f"(returncode={cp.returncode}); stderr tail: {cp.stderr[-1200:]}" + ) + assert debug_json_path.is_file(), ( + f"MDX 01 pipeline subprocess did not produce {debug_json_path!s} " + f"(returncode={cp.returncode}); stderr tail: {cp.stderr[-1200:]}" + ) + return { + "run_id": run_id, + "run_dir": run_dir, + "returncode": cp.returncode, + "final_html": final_html_path.read_text(encoding="utf-8"), + "debug": json.loads(debug_json_path.read_text(encoding="utf-8")), + } + + +@pytest.mark.integration +def test_u6_mdx01_strip_attr_parity_real_pipeline(mdx01_actual_pipeline_run): + """u6 axis 3' (REAL MDX 01) — Round #6 rewind fix. + + Binding contract (Stage 2 plan + issue body guardrail) :: + + mdx 01-05 의 final.html SHA = byte-equivalent except for new + ``data-*`` attrs + + Verified on the LIVE MDX 01 pipeline output (no synthetic shim): + + (a) The current rendered final.html carries ≥ 1 ``data-region-id`` + AND ≥ 1 ``data-content-unit-id`` — proves the u2 chain + executed end-to-end through the production render_slide. + (b) The two IMP-94 attrs appear in matched pairs (one per + stamped family-partial root, u1 single-root contract). + (c) Stripping every ``data-region-id="..."`` and + ``data-content-unit-id="..."`` token (with its leading space) + from the marked output produces a baseline with: + - zero IMP-94 markers, + - exact original counts of every other ``data-*`` attribute + that pre-existed before IMP-94 (``data-template-id``, + ``data-frame-id``, ``data-frame-slot-id`` — the #96 89-d + axis must remain disjoint), + - byte length = ``len(original) − Σ stamped attr bytes`` + (mathematical byte-equivalence to the pre-stamper state). + + (c) is the strongest expression of "byte-equivalent except for new + data-* attrs" available without a separate pre-IMP-94 fixture: the + stripped output's length exactly matches the original minus the + sum of stamped attr substring lengths, and every other attribute + is preserved verbatim with identical counts. Together these prove + the u2 stamper chain is purely additive on the real pipeline + output, matching the contract surface called out in the rewind + failure report. + """ + html = mdx01_actual_pipeline_run["final_html"] + + # (a) IMP-94 attrs present on live MDX 01 output. + region_attrs = _STRIP_REGION_RE.findall(html) + content_attrs = _STRIP_CONTENT_UNIT_RE.findall(html) + assert len(region_attrs) >= 1, ( + f"MDX 01 final.html ({mdx01_actual_pipeline_run['run_id']}) carries no " + f"data-region-id — u2 stamper chain failed to execute end-to-end." + ) + assert len(content_attrs) >= 1, ( + f"MDX 01 final.html ({mdx01_actual_pipeline_run['run_id']}) carries no " + f"data-content-unit-id — u2 stamper chain failed end-to-end." + ) + + # (b) Paired stamps — one region_id + one content_unit_id per root. + assert len(region_attrs) == len(content_attrs), ( + f"MDX 01 stamp pairing violation: {len(region_attrs)} region_id vs " + f"{len(content_attrs)} content_unit_id" + ) + + # Baseline counts of other attrs on the unstripped HTML. + baseline_template_id_count = html.count('data-template-id="') + baseline_frame_id_count = html.count('data-frame-id="') + baseline_frame_slot_id_count = html.count('data-frame-slot-id="') + # u1 single-root-per-zone contract: stamps ≤ family-partial roots. + assert len(region_attrs) <= baseline_template_id_count, ( + f"MDX 01 stamp count {len(region_attrs)} exceeds family-partial root " + f"count {baseline_template_id_count} — single-root contract violated." + ) + + # (c) Strip exactly the IMP-94 attrs (leading space + attr token). + stripped = _STRIP_REGION_RE.sub("", html) + stripped = _STRIP_CONTENT_UNIT_RE.sub("", stripped) + + # (c-1) No IMP-94 markers remain. + assert REGION_ID_ATTR not in stripped, ( + "data-region-id leaked through strip regex" + ) + assert CONTENT_UNIT_ID_ATTR not in stripped, ( + "data-content-unit-id leaked through strip regex" + ) + + # (c-2) Other attrs preserved verbatim (additive-only invariant). + assert stripped.count('data-template-id="') == baseline_template_id_count, ( + "data-template-id count drift after strip — strip regex over-matched" + ) + assert stripped.count('data-frame-id="') == baseline_frame_id_count, ( + "data-frame-id count drift after strip — strip regex over-matched" + ) + assert stripped.count('data-frame-slot-id="') == baseline_frame_slot_id_count, ( + "data-frame-slot-id count drift after strip — #96 axis must be disjoint" + ) + + # (c-3) Byte-length math: byte-equivalent to the pre-stamper hypothesis. + stamp_bytes = sum(len(s) for s in region_attrs) + sum(len(s) for s in content_attrs) + assert len(html) - stamp_bytes == len(stripped), ( + f"byte-length parity violation: len(original)={len(html)}, " + f"stamp_bytes={stamp_bytes}, expected stripped len={len(html) - stamp_bytes}, " + f"actual stripped len={len(stripped)}" + ) + + +@pytest.mark.integration +def test_u6_mdx01_trace_to_dom_parity_real_pipeline(mdx01_actual_pipeline_run): + """u6 axis 4' (REAL MDX 01) — Round #6 rewind fix. + + Binding contract (issue body validation guardrail) :: + + placement_trace ↔ DOM 의 ``[data-region-id]`` set 정합 검증 + + Verified on the LIVE MDX 01 pipeline output: + + For each zone in ``debug.json["zones"]`` carrying a + ``placement_trace.slot_assignments`` list, the FIRST + ``slot_assignment.region_id`` MUST appear in the rendered + final.html exactly once (u1 single-root-per-zone contract via the + u3 projection that consumes only ``markers[0]``). All other + ``slot_assignment[i>0].region_id`` values MUST NOT appear in the + DOM — the u1 stamper consumes only the first marker per zone + (excess markers are reserved for the future per-slot stamper + tracked under #96 / 89-d). + + Closes the rewind scope gap: the synthetic-plan version of this + axis above (``test_u6_trace_to_dom_parity``) does not exercise the + live debug.json ↔ final.html surface — only the in-process render_slide + call. This real-pipeline axis pins the parity invariant against + the actual ``data/runs//phase_z2/debug.json`` shape produced + by the u3 + u4 wiring through ``zones_data.append`` (live + non-live + branches). + """ + html = mdx01_actual_pipeline_run["final_html"] + debug = mdx01_actual_pipeline_run["debug"] + zones = debug.get("zones", []) or [] + assert len(zones) >= 1, ( + f"MDX 01 debug.json ({mdx01_actual_pipeline_run['run_id']}) carries no " + f"zones — pipeline produced an empty zones list." + ) + + dom_region_ids = set(re.findall(r'data-region-id="([^"]+)"', html)) + + # Build expected DOM = first slot_assignment.region_id per zone with trace. + expected_first_slot_region_ids: set[str] = set() + excess_slot_region_ids: set[str] = set() + saw_any_trace = False + for zone in zones: + trace = zone.get("placement_trace") + if not isinstance(trace, dict): + continue + slot_assignments = trace.get("slot_assignments") or [] + if not slot_assignments: + continue + saw_any_trace = True + for idx, sa in enumerate(slot_assignments): + if not isinstance(sa, dict): + continue + rid = sa.get("region_id") or "" + if not rid: + continue + if idx == 0: + expected_first_slot_region_ids.add(rid) + else: + excess_slot_region_ids.add(rid) + + # If no zone carries a populated placement_trace, the live B4 path + # (u3 projection) did not execute for any zone. The DOM should then + # carry no IMP-94 markers — u4 non-live defaults guarantee + # ``placement_markers=[]`` so the u1 stamper is a no-op everywhere. + if not saw_any_trace: + assert dom_region_ids == set(), ( + f"MDX 01 has no placement_trace zones but DOM still carries " + f"data-region-id markers: {sorted(dom_region_ids)} — u4 non-live " + f"default contract violated." + ) + return + + # Parity: DOM region_id set equals the first-slot region_id set. + assert dom_region_ids == expected_first_slot_region_ids, ( + f"MDX 01 trace ↔ DOM parity drift:\n" + f" DOM region-ids: {sorted(dom_region_ids)}\n" + f" expected (first slot_assignment per zone): " + f"{sorted(expected_first_slot_region_ids)}\n" + f" excess slot region-ids (must NOT leak): " + f"{sorted(excess_slot_region_ids)}" + ) + + # Single-root contract: no excess slot_assignment region_id leaks to DOM. + for rid in excess_slot_region_ids: + assert rid not in html, ( + f"excess slot_assignment region_id {rid!r} leaked into MDX 01 " + f"final.html — u1 single-root contract (markers[0] only) violated." + )