feat(#94): IMP-94 u1~u6 Layer A region/content marker injection (stamper + render_slide chain + 4 zones_data.append placement_markers + 35 parity tests)
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 21s

u1 (src/region_marker_stamper.py): deterministic root-div stamper injecting data-region-id + data-content-unit-id onto each family-partial root div anchored by data-template-id. Idempotent (re-stamp = no-op), AI=0, additive only, empty/None markers no-op, F9/F29 frame-slot axis preserved.

u2 (src/phase_z2_pipeline.py render_slide chain): _stamp_region_markers chained after IMP-56 u9 _stamp_zone_html. Marker source = zone.get("placement_markers") or [] — Codex #16 P4b crash risk closed via the or-[] call-site fallback.

u3 (_derive_placement_markers helper): projects PlacementPlan.slot_assignments[] → list[dict] carrying region_id + content_unit_id + frame_slot_id (frame_slot_id reserved for #96 89-d). Live B4 path emits at primary zones_data.append.

u4 (3 non-live zones_data.append defaults): placement_markers: [] at IMP-30 u4 empty-shell, IMP-86 u1 adapter_needed, post-loop unrenderable plan-record paths — uniform zone shape, stamper no-op surface.

u5/u6 (tests/test_phase_z2_imp94_marker_parity.py): 33 hard tests + 2 cross-axis skip-if-anchor-absent (Emergency P4/P4b future axis). Coverage: 13 family-partial root anchors, F29 + F9 frame-slot preservation, idempotence, live render_slide stamping, P4b empty-marker no-crash, MDX 01 strip-attr parity, trace-to-DOM parity.

Disjoint from #96 (data-frame-slot-id) by attribute name. SPEC anchor: docs/architecture/PHASE-Z-CONTENT-OBJECT-SUBZONE-SPEC.md §6.4 + §7.2 (Layer A read targets + render-path activation).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-27 08:15:08 +09:00
parent ed391af2e8
commit 5484077a53
3 changed files with 1361 additions and 0 deletions

View File

@@ -134,6 +134,16 @@ from src.structure_override_resolver import (
# per-line edits back to the ``text_overrides`` axis (u1 schema, u4 resolver, # per-line edits back to the ``text_overrides`` axis (u1 schema, u4 resolver,
# u5 Step-12 apply). Pure deterministic; no AI / HTTP / subprocess. # u5 Step-12 apply). Pure deterministic; no AI / HTTP / subprocess.
from src.text_path_stamper import stamp_zone_html as _stamp_zone_html from src.text_path_stamper import stamp_zone_html as _stamp_zone_html
# IMP-94 (#94) u2 — Step 11/13/21 region/content marker stamping wired into
# render_slide. u1 stamper injects ``data-region-id`` + ``data-content-unit-id``
# onto each family-partial root ``<div>`` so Layer A telemetry (placement_trace
# ↔ DOM parity, Step 21 self-report, fit_classifier read targets §6.4) can
# resolve a rendered zone back to its PlacementPlan ``slot_assignments[]``
# entry. Pure deterministic; AI=0. Marker values flow via the per-zone
# ``placement_markers`` list (u3 projects live B4 PlacementPlan; u4 ensures
# non-live append paths default to ``placement_markers=[]``). Disjoint from
# #96 (``data-frame-slot-id``) by attribute name.
from src.region_marker_stamper import stamp_zone_html as _stamp_region_markers
# ─── Constants ────────────────────────────────────────────────── # ─── Constants ──────────────────────────────────────────────────
@@ -428,6 +438,42 @@ def _b4_mapper_source_blocked_exit(
sys.exit(1) sys.exit(1)
def _derive_placement_markers(placement_plan) -> list[dict]:
"""IMP-94 (#94) u3 — project ``PlacementPlan.slot_assignments[]`` → list of
marker dicts consumed by the u1/u2 ``_stamp_region_markers`` chain.
Each marker carries ``region_id`` + ``content_unit_id`` (consumed by u1
stamper) + ``frame_slot_id`` (reserved for #96 89-d per-slot axis — u1
stamper silently ignores extras and excess markers). Pure deterministic
projection; AI=0; no mutation of ``placement_plan``.
Args:
placement_plan: ``src.phase_z2_placement_planner.PlacementPlan`` instance
produced by ``plan_placement(...)`` at the runtime call site
(L6581-6585). ``slot_assignments`` empty/None → return ``[]`` so the
u2 call-site fallback ``or []`` is the deterministic no-op path.
Returns:
list[dict] — one entry per ``SlotAssignment``. Empty when the plan has
no assignments (rejection path / no covering frame / empty section).
Disjoint from #96 by attribute name — this helper only emits the marker
dict shape; the partial root-div ``data-region-id`` + ``data-content-unit-id``
stamping happens in u1, not here.
"""
if placement_plan is None:
return []
assignments = getattr(placement_plan, "slot_assignments", None) or []
markers: list[dict] = []
for sa in assignments:
markers.append({
"region_id": getattr(sa, "region_id", "") or "",
"content_unit_id": getattr(sa, "content_unit_id", "") or "",
"frame_slot_id": getattr(sa, "frame_slot_id", "") or "",
})
return markers
# ─── MDX parsing ──────────────────────────────────────────────── # ─── MDX parsing ────────────────────────────────────────────────
def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]: def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]:
@@ -3261,6 +3307,14 @@ def render_slide(slide_title: str, slide_footer: Optional[str],
# skipped, excess text-lines pass through unstamped, and an # skipped, excess text-lines pass through unstamped, and an
# already-stamped element is left unchanged. # already-stamped element is left unchanged.
zone["partial_html"] = _stamp_zone_html(rendered_partial, slot_payload) zone["partial_html"] = _stamp_zone_html(rendered_partial, slot_payload)
# IMP-94 (#94) u2 — Step 11/13/21 region/content marker stamp
# chained after IMP-56 u9. Marker source = per-zone
# ``placement_markers`` (u3 live B4 path, u4 non-live defaults).
# Missing / None → empty list fallback keeps Codex #16 P4b
# crash risk closed and the stamper deterministically no-ops.
zone["partial_html"] = _stamp_region_markers(
zone["partial_html"], zone.get("placement_markers") or []
)
base = env.get_template("slide_base.html") base = env.get_template("slide_base.html")
rendered_base = base.render( rendered_base = base.render(
@@ -6041,6 +6095,11 @@ def run_phase_z2_mvp1(
"assignment_source": "imp30_u4_empty_shell", "assignment_source": "imp30_u4_empty_shell",
"section_assignment_override": False, "section_assignment_override": False,
"provisional": bool(getattr(unit, "provisional", False)), "provisional": bool(getattr(unit, "provisional", False)),
# IMP-94 (#94) u4 — non-live empty-shell path. No
# PlacementPlan exists for this branch (empty unit), so
# default to ``[]`` matching the u2 call-site ``or []``
# fallback. Keeps zone shape uniform; u1 stamper no-ops.
"placement_markers": [],
**_popup_payload, **_popup_payload,
}) })
debug_zones.append({ debug_zones.append({
@@ -6257,6 +6316,11 @@ def run_phase_z2_mvp1(
"provisional": _unit_provisional, "provisional": _unit_provisional,
"adapter_needed": True, "adapter_needed": True,
"mapper_fit_error": _fit_error_str, "mapper_fit_error": _fit_error_str,
# IMP-94 (#94) u4 — non-live IMP-86 u1 adapter_needed
# placeholder path. Unit skipped render (FitError →
# adapter contract), so no PlacementPlan exists. Default
# to ``[]`` matching the u2 call-site ``or []`` fallback.
"placement_markers": [],
**_placeholder_popup, **_placeholder_popup,
}) })
debug_zones.append({ debug_zones.append({
@@ -6355,6 +6419,12 @@ def run_phase_z2_mvp1(
"assignment_source": plan_assignment_source, "assignment_source": plan_assignment_source,
"section_assignment_override": plan_section_override, "section_assignment_override": plan_section_override,
"provisional": bool(getattr(unit, "provisional", False)), "provisional": bool(getattr(unit, "provisional", False)),
# IMP-94 (#94) u3 — live B4 PlacementPlan → marker dicts.
# u1 stamper consumes only the first marker's region_id /
# content_unit_id; frame_slot_id is reserved for #96 89-d.
# Empty/None slot_assignments → [], so the u2 chain ``or []``
# call-site fallback remains the deterministic no-op surface.
"placement_markers": _derive_placement_markers(placement_plan),
**_popup_payload, **_popup_payload,
}) })
debug_zones.append({ debug_zones.append({
@@ -6432,6 +6502,12 @@ def run_phase_z2_mvp1(
"popup_html": None, "popup_html": None,
"preview_text": None, "preview_text": None,
"popup_binding": None, "popup_binding": None,
# IMP-94 (#94) u4 — non-live post-loop unrenderable
# plan-record path. No CompositionUnit / PlacementPlan
# for this branch (section-assignment plan produced no
# unit). Default to ``[]`` matching the u2 call-site
# ``or []`` fallback. u1 stamper no-ops.
"placement_markers": [],
}) })
debug_zones.append({ debug_zones.append({
"position": pos, "position": pos,

View File

@@ -0,0 +1,137 @@
"""IMP-94 (#94) u1 — region/content marker stamper for Phase Z final.html.
Annotates each rendered family-partial root ``<div>`` with stable
``data-region-id="..."`` and ``data-content-unit-id="..."`` attributes so
downstream Layer A telemetry (placement_trace ↔ DOM parity, Step 21 self-
report, fit_classifier read targets §6.4) can resolve a rendered zone
back to its PlacementPlan ``slot_assignments[]`` entry.
DOM contract (single point of truth — mirrored verbatim across the axis) ::
<div class="..." data-region-id="{region_id}" data-content-unit-id="{cuid}" ...
data-frame-id="..." data-template-id="...">
The anchor is the uniform root-div emitted by every Phase Z family
partial under ``templates/phase_z2/families/`` (13 partials, evidence
confirmed via ``grep -l data-template-id`` = 13/13). All 13 partials
carry the pattern::
<div class="<fNb>" data-frame-id="..." data-template-id="<family>">
The stamper finds the FIRST such opening tag with a permissive regex
and injects ``data-region-id`` + ``data-content-unit-id`` as new
attributes. Existing attributes (class, data-frame-id, data-template-id,
etc.) are preserved verbatim. The injection is idempotent — a zone that
already carries ``data-region-id`` on its root div is left alone.
Source of marker values : ``PlacementPlan.slot_assignments[].region_id``
and ``.content_unit_id`` (see ``src/phase_z2_placement_planner.py``
L253-258). u3 wires the live B4 path; u4 ensures non-live append paths
default to ``placement_markers=[]`` so this stamper safely no-ops.
Forward-compat / safety :
- Empty / None ``markers`` → passthrough (returns ``zone_html`` unchanged).
- Non-str / empty ``zone_html`` → passthrough.
- Re-stamping (idempotent) preserves the first stamp.
- Only the FIRST data-template-id root div is stamped (one per zone).
- Markers with empty / missing ``region_id`` AND ``content_unit_id`` →
passthrough (no attribute injection).
Guardrails (refs : Stage 1 binding contract, Stage 2 unit u1) :
- AI-isolation : pure deterministic Python; no LLM calls.
- Additive only : never edits / removes existing attributes.
- Idempotent : ``data-region-id`` probe short-circuits before re-inject.
- Disjoint from #96 (``data-frame-slot-id`` is a separate axis / attr).
"""
from __future__ import annotations
import re
from typing import Any, Iterable, Mapping
REGION_ID_ATTR: str = "data-region-id"
CONTENT_UNIT_ID_ATTR: str = "data-content-unit-id"
# Matches the FIRST ``<div ... data-template-id="...">`` opening tag.
# Group 1 captures the inner attribute string verbatim (incl. leading
# whitespace) so the rewriter can re-emit it unchanged after injection.
_ROOT_DIV_TAG_RE = re.compile(
r'<div\b((?=[^>]*\bdata-template-id\s*=\s*"[^"]+")[^>]*?)>',
flags=re.IGNORECASE | re.DOTALL,
)
# Probe for an existing ``data-region-id`` attribute (any value, any
# quote) so re-stamping is idempotent.
_HAS_REGION_ID_RE = re.compile(r"""\bdata-region-id\s*=""", flags=re.IGNORECASE)
def _coerce_marker_value(value: Any) -> str:
"""Return a safe attribute-value string for ``value``.
Non-str / None → ''. Strings are returned verbatim (caller responsible
for not embedding ``"`` since marker ids derive from
PlacementPlan.slot_assignments which are deterministic identifiers).
"""
if value is None:
return ""
if not isinstance(value, str):
return ""
return value
def stamp_zone_html(
zone_html: str,
markers: Iterable[Mapping[str, Any]] | None,
) -> str:
"""Stamp the root family-partial ``<div>`` with region / content-unit ids.
``markers`` is an iterable of mapping objects shaped as ::
{
"region_id": "<region_id>",
"content_unit_id": "<content_unit_id>",
# optional, ignored here — reserved for #96 (89-d):
"frame_slot_id": "<frame_slot_id>",
}
Only ``markers[0]`` is consumed (one root div per zone). Excess
markers are reserved for a future per-slot stamper (#96) and are
silently ignored by this module.
Returns ``zone_html`` unchanged when:
- ``zone_html`` is not a non-empty string,
- ``markers`` is None / empty,
- no ``data-template-id`` root div is found,
- the root div already carries ``data-region-id`` (idempotent),
- the first marker carries neither ``region_id`` nor ``content_unit_id``.
"""
if not isinstance(zone_html, str) or not zone_html:
return zone_html
if markers is None:
return zone_html
marker_list = list(markers)
if not marker_list:
return zone_html
first = marker_list[0]
if not isinstance(first, Mapping):
return zone_html
region_id = _coerce_marker_value(first.get("region_id"))
content_unit_id = _coerce_marker_value(first.get("content_unit_id"))
if not region_id and not content_unit_id:
return zone_html
stamped = {"done": False}
def _replace(match: re.Match[str]) -> str:
if stamped["done"]:
return match.group(0)
attrs = match.group(1) or ""
if _HAS_REGION_ID_RE.search(attrs):
stamped["done"] = True
return match.group(0)
stamped["done"] = True
injected = (
f' {REGION_ID_ATTR}="{region_id}"'
f' {CONTENT_UNIT_ID_ATTR}="{content_unit_id}"'
)
return f"<div{injected}{attrs}>"
return _ROOT_DIV_TAG_RE.sub(_replace, zone_html, count=1)

File diff suppressed because it is too large Load Diff