feat(#89): IMP-89 89-a u1~u5 Layer A render path activation (B4→mapper source-of-truth switch, default-OFF flag)

PHASE_Z_B4_MAPPER_SOURCE env flag (default OFF) switches slot_payload
source-of-truth from legacy mapper-only / V4 rank-1 to B4 PlacementPlan
.selected_template_id at the single switch site in the runtime loop.
OFF preserves final.html SHA byte-equivalence (u4 parity guard, mdx 01-05).
ON requires Layer A render-active path; BLOCKED exits on B4 no-cover
and on B4-selected FitError (IMP-87 honesty gate pattern — NO silent
fallback). Distinct from PHASE_Z_B4_GATEKEEPER (mismatch render-skip).

Units (1 commit = 1 axis per Stage 1 scope_lock):
  u1 — _b4_mapper_source_enabled() flag reader (default OFF)
  u2 — _select_mapper_template_id() selector wired at the switch site
  u3 — _b4_mapper_source_blocked_exit() for b4_no_cover / b4_selected_fit_error
  u4 — render SHA parity regression (tests/regression/ baseline mdx 01-05)
  u5 — slot_payload byte-equivalence (matches_mapper=True axis, mdx 01-05)

Targeted 89-a suite 63 PASS; Phase Z regression 323 PASS; IMP-87 mirror
20 PASS. Demo activation via .env only (no vite.config hardcoding).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 00:33:28 +09:00
parent 896f273ffa
commit b1bbe27c38
9 changed files with 1434 additions and 1 deletions

View File

@@ -204,6 +204,82 @@ def to_phase_z_status(match: V4Match) -> str:
return V4_LABEL_TO_PHASE_Z_STATUS.get(match.label, "unknown")
def _b4_mapper_source_enabled() -> bool:
"""IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE env flag reader (default OFF).
Switches slot_payload source-of-truth from mapper-only (legacy) to B4
PlacementPlan.selected_template_id. Distinct from PHASE_Z_B4_GATEKEEPER
(mismatch render-skip semantics). u2 wires this into the slot_payload
construction site; u3 adds BLOCKED exits for no-cover and FitError under
flag ON. Truthy values: '1', 'true', 'yes' (case-insensitive, trimmed).
"""
return os.environ.get("PHASE_Z_B4_MAPPER_SOURCE", "").strip().lower() in {
"1",
"true",
"yes",
}
def _select_mapper_template_id(
placement_plan, unit_frame_template_id: str
) -> Optional[str]:
"""IMP-89 89-a u2 — slot_payload source-of-truth selector.
Returns the template_id that drives slot_payload construction at the
single switch site in the runtime loop:
flag ON → placement_plan.selected_template_id (B4 PlacementPlan,
Layer A render-active path)
flag OFF → unit_frame_template_id (legacy mapper-only / V4 rank-1;
byte-equivalent default; final.html SHA parity guarded
by u4)
Under flag ON the returned value may be None when B4 found no covering
frame. u3 adds the BLOCKED exit for None and for FitError on the
B4-selected template — NO silent fallback (IMP-87 honesty gate pattern).
"""
if _b4_mapper_source_enabled():
return placement_plan.selected_template_id
return unit_frame_template_id
def _b4_mapper_source_blocked_exit(
reason: str, *, position: str, context: dict
) -> "NoReturn": # type: ignore[name-defined]
"""IMP-89 89-a u3 — BLOCKED exit (sys.exit(1)) when PHASE_Z_B4_MAPPER_SOURCE
is ON and the Layer A render path cannot resolve a covering frame.
Stage 1 Q2 lock: when the user explicitly opts into the B4-driven render
path, a content-bearing zone MUST NOT silently degrade to adapter_needed
or to the legacy V4 rank-1 mapper input. Mirrors IMP-87 u3 honesty-gate
pattern (`_is_blocked_overall` → `sys.exit(1)`): the BLOCKED signal
preempts the silent adapter_needed fallback so the operator sees the
Layer A failure immediately on stderr instead of inheriting a
pseudo-rendered partial.
Reasons (locked enum):
b4_no_cover — PlacementPlan.selected_template_id is None
(B4 found no covering frame on the unit)
b4_selected_fit_error — map_mdx_to_slots raised FitError against the
B4-selected template (frame contract reject)
Always raises SystemExit(1) via sys.exit. The `NoReturn` annotation makes
the call-site control flow explicit for type-checkers; behavior is the
same as IMP-87 u3's sys.exit(1) at L6387.
"""
print(
f"\n[Phase Z-2 IMP-89 89-a u3] BLOCKED @ {reason} (zone--{position})",
file=sys.stderr,
)
print(
" policy : PHASE_Z_B4_MAPPER_SOURCE=ON requires B4-driven render "
"(NO silent fallback — IMP-87 honesty gate pattern)",
file=sys.stderr,
)
for key, value in context.items():
print(f" {key:9}: {value}", file=sys.stderr)
sys.exit(1)
# ─── MDX parsing ────────────────────────────────────────────────
def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]:
@@ -4681,11 +4757,64 @@ def run_phase_z2_mvp1(
continue
# ─── end B4 gatekeeper ───
# ─── IMP-89 89-a u2 — slot_payload source-of-truth switch ───
# PHASE_Z_B4_MAPPER_SOURCE (u1 flag, default OFF):
# ON → mapper input = B4 PlacementPlan.selected_template_id
# (Layer A render-active; B4 drives slot_payload)
# OFF → mapper input = unit.frame_template_id (legacy mapper-only /
# V4 rank-1; byte-equivalent default — final.html SHA parity
# guarded by u4)
# u3 layers BLOCKED exits for (selected_template_id is None OR
# FitError on B4-selected template) under flag ON — NO silent
# fallback (IMP-87 honesty gate pattern). Under flag OFF semantics
# preserved verbatim.
mapper_template_id = _select_mapper_template_id(
placement_plan, unit.frame_template_id
)
# IMP-89 89-a u3 — BLOCKED exit on B4 no-cover under flag ON.
# When PHASE_Z_B4_MAPPER_SOURCE=ON and PlacementPlan produced no
# covering frame, refuse to fall back to the legacy V4 rank-1 mapper
# input. NO silent fallback (Stage 1 Q2 lock; IMP-87 honesty gate
# pattern). Under flag OFF this branch is never entered, so the
# default render path remains byte-identical.
if _b4_mapper_source_enabled() and mapper_template_id is None:
_b4_mapper_source_blocked_exit(
"b4_no_cover",
position=position,
context={
"unit": (
f"source_section_ids={list(unit.source_section_ids)} "
f"merge_type={unit.merge_type}"
),
"v4_rank1": unit.frame_template_id,
"b4_pick": placement_plan.selected_template_id,
},
)
# mapper 시도 — 실패 (FitError) 시 zone 을 adapter_needed 로 표시하고 skip
try:
slot_payload = map_mdx_to_slots(synth_section, unit.frame_template_id)
slot_payload = map_mdx_to_slots(synth_section, mapper_template_id)
except FitError as e:
_fit_error_str = str(e)
# IMP-89 89-a u3 — BLOCKED exit on B4-selected FitError under flag
# ON. When PHASE_Z_B4_MAPPER_SOURCE=ON the mapper rejecting the
# B4-selected template is a Layer A honesty failure — adapter_needed
# would mask it (Stage 1 Q2 lock). Under flag OFF the legacy
# adapter_needed silent-fallback path executes unchanged.
if _b4_mapper_source_enabled():
_b4_mapper_source_blocked_exit(
"b4_selected_fit_error",
position=position,
context={
"template": f"{mapper_template_id} (B4 selected)",
"unit": (
f"source_section_ids={list(unit.source_section_ids)}"
),
"v4_rank1": unit.frame_template_id,
"fit_error": _fit_error_str,
},
)
_unit_provisional = bool(getattr(unit, "provisional", False))
adapter_record = {
"position": position,

View File

@@ -0,0 +1,157 @@
"""IMP-89 89-a u3 — BLOCKED exit unit tests for Layer A render path.
Stage 2 plan (u3): when PHASE_Z_B4_MAPPER_SOURCE=ON and the Layer A render
path cannot resolve a covering frame, the runtime MUST sys.exit(1) instead of
silently degrading to adapter_needed or to the legacy V4 rank-1 mapper input.
Locked semantics (Stage 1 Q2 lock; IMP-87 honesty gate pattern):
flag OFF → legacy adapter_needed path
(silent fallback preserved)
flag ON + B4 no-cover → BLOCKED (sys.exit 1)
flag ON + FitError on B4-selected → BLOCKED (sys.exit 1)
flag ON + matches_mapper + FitError → BLOCKED (explicit no-silent
fallback even when V4 rank-1
equals B4 pick)
These tests target the `_b4_mapper_source_blocked_exit()` helper directly
plus contract-level assertions of its stderr output. The runtime call-sites
inside `run_phase_z2_mvp1` are guarded by `_b4_mapper_source_enabled()`
checks; u3 changes ZERO behavior under the default-OFF path.
"""
from __future__ import annotations
import pytest
from src.phase_z2_pipeline import (
_b4_mapper_source_blocked_exit,
_b4_mapper_source_enabled,
)
FLAG = "PHASE_Z_B4_MAPPER_SOURCE"
def test_blocked_exit_no_cover_exits_with_code_1(
capsys: pytest.CaptureFixture[str],
) -> None:
"""b4_no_cover reason → SystemExit(1), no silent fallback."""
with pytest.raises(SystemExit) as exc:
_b4_mapper_source_blocked_exit(
"b4_no_cover",
position="top",
context={
"unit": "source_section_ids=['01-1'] merge_type=raw",
"v4_rank1": "F13",
"b4_pick": None,
},
)
assert exc.value.code == 1
def test_blocked_exit_fit_error_exits_with_code_1(
capsys: pytest.CaptureFixture[str],
) -> None:
"""b4_selected_fit_error reason → SystemExit(1)."""
with pytest.raises(SystemExit) as exc:
_b4_mapper_source_blocked_exit(
"b4_selected_fit_error",
position="bottom_l",
context={
"template": "F29 (B4 selected)",
"unit": "source_section_ids=['02-2']",
"v4_rank1": "F13",
"fit_error": "slot 'title' missing",
},
)
assert exc.value.code == 1
def test_blocked_exit_stderr_carries_reason_and_position(
capsys: pytest.CaptureFixture[str],
) -> None:
"""Header line surfaces the locked reason enum + zone position."""
with pytest.raises(SystemExit):
_b4_mapper_source_blocked_exit(
"b4_no_cover",
position="bottom_r",
context={"v4_rank1": "F13"},
)
err = capsys.readouterr().err
assert "[Phase Z-2 IMP-89 89-a u3] BLOCKED" in err
assert "b4_no_cover" in err
assert "zone--bottom_r" in err
def test_blocked_exit_stderr_carries_honesty_policy_line(
capsys: pytest.CaptureFixture[str],
) -> None:
"""Policy banner names PHASE_Z_B4_MAPPER_SOURCE + IMP-87 honesty pattern."""
with pytest.raises(SystemExit):
_b4_mapper_source_blocked_exit(
"b4_selected_fit_error",
position="top",
context={"fit_error": "x"},
)
err = capsys.readouterr().err
assert "PHASE_Z_B4_MAPPER_SOURCE=ON" in err
assert "NO silent fallback" in err
assert "IMP-87 honesty gate pattern" in err
def test_blocked_exit_stderr_carries_all_context_fields(
capsys: pytest.CaptureFixture[str],
) -> None:
"""Each context dict entry surfaces on its own stderr line."""
with pytest.raises(SystemExit):
_b4_mapper_source_blocked_exit(
"b4_selected_fit_error",
position="top",
context={
"template": "F29 (B4 selected)",
"unit": "source_section_ids=['02-2']",
"v4_rank1": "F13",
"fit_error": "slot 'title' missing",
},
)
err = capsys.readouterr().err
assert "template" in err
assert "F29 (B4 selected)" in err
assert "unit" in err
assert "source_section_ids=['02-2']" in err
assert "v4_rank1" in err
assert "F13" in err
assert "fit_error" in err
assert "slot 'title' missing" in err
def test_blocked_exit_ignores_flag_state(
monkeypatch: pytest.MonkeyPatch,
capsys: pytest.CaptureFixture[str],
) -> None:
"""Helper is unconditional — flag-gating is the call-site's responsibility.
The runtime checks `_b4_mapper_source_enabled()` BEFORE invoking this
helper, so once invoked the helper always exits. This keeps the helper
behavior orthogonal to env state and makes the call-sites the
single-source-of-truth for ON/OFF policy.
"""
monkeypatch.delenv(FLAG, raising=False)
with pytest.raises(SystemExit) as exc:
_b4_mapper_source_blocked_exit(
"b4_no_cover",
position="top",
context={"v4_rank1": "F13"},
)
assert exc.value.code == 1
def test_default_off_flag_state_does_not_invoke_blocked_helper(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Under default-OFF, `_b4_mapper_source_enabled()` is False, which is
the precondition the runtime checks before calling the helper. This test
locks the contract that the flag reader returns False by default — any
accidental flip would break the byte-identity guarantee of the legacy
adapter_needed path.
"""
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False

View File

@@ -0,0 +1,426 @@
"""IMP-89 89-a u5 — slot_payload byte-equivalence when B4 matches mapper.
Stage 2 u5 contract (verbatim)::
slot_payload byte-equivalent (PHASE_Z_B4_MAPPER_SOURCE ON + matches_mapper=True)
vs OFF, across mdx 01-05
Why this is load-bearing
========================
u4 freezes the FULL pipeline ``final.html`` SHA under flag OFF. u5 isolates
the *mapper-input* axis: when B4 ``PlacementPlan.selected_template_id``
equals the legacy mapper input (``unit.frame_template_id`` — V4 rank-1),
the selector at ``src/phase_z2_pipeline.py:223-242`` returns the same
template id under either flag state. The mapper is a pure function of
``(MdxSection, template_id)`` (deterministic dispatch via
``map_with_contract`` → named ``PAYLOAD_BUILDERS`` — verified at
``src/phase_z2_mapper.py:894-919``), so identical inputs → identical
``slot_payload`` dicts → identical JSON-canonical bytes.
This is the *cross-axis* proof complementing u4:
* u4 = on-disk ``final.html`` SHA parity, default-OFF only (legacy
preservation guard).
* u5 = ``slot_payload`` byte equivalence, *flag ON ↔ flag OFF* (Layer A
render-active behavior-preserving proof under matches_mapper).
The negative case (``test_slot_payload_diverges_when_b4_mismatches_under_flag_on``)
locks the fact that ``slot_payload`` actually *depends* on the
``template_id`` selector output — without it, the equivalence test could
trivially pass even if the selector were a no-op.
"""
from __future__ import annotations
import json
from dataclasses import asdict, dataclass
from pathlib import Path
from typing import Optional
import pytest
from src.phase_z2_mapper import (
FitError,
get_contract,
load_frame_contracts,
map_with_contract,
)
from src.phase_z2_pipeline import (
_b4_mapper_source_enabled,
_select_mapper_template_id,
extract_content_objects,
parse_mdx,
)
from src.phase_z2_placement_planner import plan_placement
@dataclass
class _StubPlan:
"""Minimal placement-plan stand-in for selector unit checks.
``_select_mapper_template_id`` reads ONLY ``selected_template_id``
(verified at ``src/phase_z2_pipeline.py:240-242``). Constructing the
real ``PlacementPlan`` with placeholder slot/region lists would force
the test to track schema drift on fields the selector never touches.
"""
selected_template_id: Optional[str]
FLAG = "PHASE_Z_B4_MAPPER_SOURCE"
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch"
_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx")
def _canonical_bytes(payload: dict) -> bytes:
"""Stable JSON canonical encoding for byte-level dict comparison.
``sort_keys`` removes dict-ordering noise; ``ensure_ascii=False`` keeps
Korean text from being mangled into ``\\uXXXX`` escapes (which would
still compare equal but would silently mask any encoding regression in
the mapper).
"""
return json.dumps(payload, sort_keys=True, ensure_ascii=False).encode(
"utf-8"
)
def _matches_mapper_cases() -> list[tuple[str, str, object, str]]:
"""Enumerate (mdx_file, section_id, section, template_id) tuples where
the matches_mapper scenario is reachable.
"matches_mapper=True" in production is the predicate
``placement_plan.selected_template_id == unit.frame_template_id``. To
cover it at the unit-test level without driving the full Type B
coordinator, we treat each B4-selected template as the *simulated*
legacy mapper input — i.e. we force matches_mapper=True by construction
via ``mapper_template_id := plan.selected_template_id``.
Only sections where (a) B4 finds a covering frame AND (b) the mapper
accepts that frame (no FitError) are byte-equivalence-eligible. Under
flag ON the BLOCKED u3 path would otherwise fire — that axis is
covered by ``test_b4_mapper_source_blocked.py`` and is out of scope
here.
"""
frame_contracts = list(load_frame_contracts().values())
cases: list[tuple[str, str, object, str]] = []
for mdx_file in _MDX_BATCH:
mdx_path = _SAMPLES_DIR / mdx_file
_title, sections, _footer = parse_mdx(mdx_path)
for section in sections:
content_objects = extract_content_objects(
section, source_shape=None
)
plan = plan_placement(
content_objects=content_objects,
frame_contracts=frame_contracts,
section_id=section.section_id,
)
template_id = plan.selected_template_id
if template_id is None:
continue
contract = get_contract(template_id)
if contract is None:
continue
try:
map_with_contract(section, contract)
except FitError:
continue
cases.append((mdx_file, section.section_id, section, template_id))
return cases
# Frozen at collection time so a parametrize zero-iteration cannot silently
# pass the byte-equivalence assertion (additional coverage lock below).
_MATCHES_CASES = _matches_mapper_cases()
def _slot_payload_via_selector(
section, plan, mapper_input: str
) -> tuple[dict, str]:
"""Compose ``_select_mapper_template_id → map_mdx_to_slots`` once.
Mirrors the exact runtime path at
``src/phase_z2_pipeline.py:4771-4797`` minus the BLOCKED u3 gate
(which is out of scope for u5 byte equivalence — covered by u3).
Returns ``(slot_payload, resolved_template_id)`` so per-case asserts
can verify *both* axes (input + output) match.
"""
resolved = _select_mapper_template_id(plan, mapper_input)
assert resolved is not None, (
"u5 fixture invariant violated: resolved template_id is None even "
"though the case was pre-filtered for B4 cover. Re-check "
"_matches_mapper_cases()."
)
contract = get_contract(resolved)
assert contract is not None, (
f"u5 fixture invariant violated: no contract for resolved="
f"{resolved!r} (case was pre-filtered for catalog membership)."
)
return map_with_contract(section, contract), resolved
# ─── algebraic precondition (no pipeline / no mapper run) ──────────────
def test_selector_returns_same_value_under_flag_flip_when_matches_mapper(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Pure-function property: when ``plan.selected_template_id == T`` the
selector returns ``T`` under either flag state.
This is the algebra that makes the end-to-end byte equivalence below
hold mathematically. If this property breaks, every parametrized
equivalence assertion would also break — this test localizes the
failure to the selector helper itself.
"""
plan = _StubPlan(selected_template_id="F13")
legacy_input = "F13" # matches_mapper=True by construction
monkeypatch.setenv(FLAG, "1")
assert _b4_mapper_source_enabled() is True
on_value = _select_mapper_template_id(plan, legacy_input)
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
off_value = _select_mapper_template_id(plan, legacy_input)
assert on_value == off_value == "F13"
# ─── end-to-end byte equivalence (parametrized over real mdx data) ────
@pytest.mark.integration
@pytest.mark.parametrize(
("mdx_file", "section_id", "section", "template_id"),
_MATCHES_CASES,
ids=lambda case: (
case if isinstance(case, str) else getattr(case, "section_id", "_")
),
)
def test_slot_payload_byte_equivalent_when_matches_mapper(
monkeypatch: pytest.MonkeyPatch,
mdx_file: str,
section_id: str,
section,
template_id: str,
) -> None:
"""Per-section byte equivalence proof under matches_mapper=True.
Recomputes ``PlacementPlan`` from scratch inside the test (fixture
enumeration cached only the section + B4 pick) and asserts that the
mapper output is JSON-canonical-byte-identical between flag ON and
flag OFF, given the same mapper input.
"""
frame_contracts = list(load_frame_contracts().values())
content_objects = extract_content_objects(section, source_shape=None)
plan = plan_placement(
content_objects=content_objects,
frame_contracts=frame_contracts,
section_id=section.section_id,
)
assert plan.selected_template_id == template_id, (
f"u5 invariant: B4 selection drifted between enumeration and "
f"test execution for {mdx_file} {section_id}: enumerated="
f"{template_id!r} live={plan.selected_template_id!r}"
)
# Under matches_mapper=True the legacy mapper input equals plan pick.
legacy_mapper_input = template_id
monkeypatch.delenv(FLAG, raising=False)
plan_snapshot_off = asdict(plan) # type: ignore[call-overload]
payload_off, resolved_off = _slot_payload_via_selector(
section, plan, legacy_mapper_input
)
plan_after_off = asdict(plan) # type: ignore[call-overload]
monkeypatch.setenv(FLAG, "1")
payload_on, resolved_on = _slot_payload_via_selector(
section, plan, legacy_mapper_input
)
plan_after_on = asdict(plan) # type: ignore[call-overload]
assert resolved_off == resolved_on == template_id, (
f"selector returned different template_id under matches_mapper for "
f"{mdx_file} {section_id}: off={resolved_off!r} on={resolved_on!r}"
)
assert _canonical_bytes(payload_off) == _canonical_bytes(payload_on), (
f"slot_payload byte equivalence broken for {mdx_file} {section_id} "
f"(template_id={template_id}): mapper output diverged between "
f"flag OFF and flag ON despite identical mapper input. This means "
f"either map_with_contract gained nondeterminism or a hidden "
f"selector-side effect crept in."
)
assert plan_snapshot_off == plan_after_off == plan_after_on, (
f"PlacementPlan mutated by selector / mapper call for {mdx_file} "
f"{section_id} — u5 byte equivalence relies on the selector being "
f"a pure read of plan.selected_template_id."
)
@pytest.mark.integration
def test_matches_mapper_corpus_coverage_is_non_empty() -> None:
"""Lock: the parametrized equivalence test above must have iterated at
least once.
Without this guard a pytest parametrize zero-iteration (e.g. all
sections rejected by B4 or all FitError-raising) would let the byte
equivalence test silently pass with zero work. mdx 01-05 is rich
enough that at least one matches_mapper case is always reachable.
"""
assert _MATCHES_CASES, (
"u5 byte equivalence had zero matches_mapper cases — every section "
"across mdx 01-05 was either B4-uncovered or raised FitError. "
"Either the corpus shrank, B4 algorithm regressed, or the mapper "
"now rejects every B4 pick. Investigate before re-locking."
)
seen_files = {case[0] for case in _MATCHES_CASES}
assert len(seen_files) >= 1, (
f"u5 coverage too narrow: {seen_files} — at least one mdx file "
f"must yield a matches_mapper case for the equivalence proof to "
f"be load-bearing."
)
# ─── negative case — bytes MUST diverge when B4 mismatches ─────────────
@pytest.mark.integration
def test_slot_payload_diverges_when_b4_mismatches_under_flag_on(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Anti-vacuous proof: when B4 picks a template DIFFERENT from the
legacy mapper input AND flag ON, the resulting ``slot_payload``
differs from the flag-OFF case.
Without this assertion the equivalence test would pass even if the
selector were a no-op that always returned the legacy input — i.e.
the equivalence test would be load-bearing in the wrong direction.
This test proves the mapper output genuinely depends on the selector's
template_id choice, so equivalence under matches_mapper is a real
behavioral guarantee rather than a tautology.
Strategy: find a section where the mapper accepts *both* the B4 pick
AND a distinct alternative template (a frame the mapper also covers
with a different builder/source_shape). Compare slot_payload bytes
across the two — they MUST differ.
"""
frame_contracts = list(load_frame_contracts().values())
diverging_case: tuple | None = None
for mdx_file in _MDX_BATCH:
mdx_path = _SAMPLES_DIR / mdx_file
_title, sections, _footer = parse_mdx(mdx_path)
for section in sections:
content_objects = extract_content_objects(
section, source_shape=None
)
plan = plan_placement(
content_objects=content_objects,
frame_contracts=frame_contracts,
section_id=section.section_id,
)
b4_pick = plan.selected_template_id
if b4_pick is None:
continue
b4_contract = get_contract(b4_pick)
if b4_contract is None:
continue
try:
b4_payload = map_with_contract(section, b4_contract)
except FitError:
continue
# Hunt for a *different* template the mapper also accepts on
# this same section. Iterate the catalog in declaration order
# so the search is deterministic.
for alt in frame_contracts:
alt_id = alt.get("template_id")
if not alt_id or alt_id == b4_pick:
continue
try:
alt_payload = map_with_contract(section, alt)
except FitError:
continue
if _canonical_bytes(b4_payload) != _canonical_bytes(
alt_payload
):
diverging_case = (
mdx_file,
section.section_id,
b4_pick,
alt_id,
b4_payload,
alt_payload,
)
break
if diverging_case is not None:
break
if diverging_case is not None:
break
assert diverging_case is not None, (
"Could not find a section across mdx 01-05 where the mapper "
"accepts two distinct templates with divergent slot_payload. "
"Without such a case the equivalence test above is tautological."
)
(
mdx_file,
section_id,
b4_pick,
alt_id,
b4_payload,
alt_payload,
) = diverging_case
# Now drive the selector path under flag ON with B4 picking ``b4_pick``
# while the legacy mapper input is ``alt_id`` — i.e. B4 mismatches the
# legacy input. Flag ON → selector returns b4_pick → mapper produces
# b4_payload. Flag OFF → selector returns alt_id → mapper produces
# alt_payload. The two MUST differ.
plan = _StubPlan(selected_template_id=b4_pick)
mdx_path = _SAMPLES_DIR / mdx_file
_title, sections, _footer = parse_mdx(mdx_path)
section = next(s for s in sections if s.section_id == section_id)
monkeypatch.setenv(FLAG, "1")
on_payload, on_resolved = _slot_payload_via_selector(
section, plan, alt_id
)
monkeypatch.delenv(FLAG, raising=False)
off_payload, off_resolved = _slot_payload_via_selector(
section, plan, alt_id
)
assert on_resolved == b4_pick
assert off_resolved == alt_id
assert _canonical_bytes(on_payload) != _canonical_bytes(off_payload), (
f"Negative case failed: selector flip from {alt_id} (OFF) to "
f"{b4_pick} (ON) produced byte-identical slot_payload for "
f"{mdx_file} {section_id}. The mapper appears to ignore "
f"template_id, which would make the equivalence test tautological."
)
# ─── selector default-state lock (mirror of u4 sanity check) ───────────
def test_selector_default_state_returns_legacy_under_b4_mismatch(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Final sanity: even when B4 would pick something different, the
flag-OFF default selector returns the legacy mapper input verbatim.
This is the property that makes u4 SHA parity hold and the negative
test above meaningful. Repeated here at the u5 axis so a single test
file change cannot accidentally hide the regression signal across
both u4 and u5.
"""
plan = _StubPlan(selected_template_id="F29")
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
assert _select_mapper_template_id(plan, "F13") == "F13"

View File

@@ -0,0 +1,54 @@
"""IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE flag reader unit tests.
Stage 2 plan (u1): adds an env flag reader helper (default OFF) distinct
from PHASE_Z_B4_GATEKEEPER. u1 only locks reader semantics — u2 wires it
into the slot_payload source-of-truth switch and u3 layers BLOCKED exits
for B4 no-cover and B4-selected FitError under flag ON.
Truthy contract (mirrors PHASE_Z_B4_GATEKEEPER /
PHASE_Z_B4_SOURCE_SHAPE_ENABLED at src/phase_z2_pipeline.py:4625,4662):
case-insensitive + leading/trailing whitespace stripped; truthy set
= {'1', 'true', 'yes'}. Everything else (including '0', '', 'no',
'false', missing env var) is OFF.
"""
from __future__ import annotations
import pytest
from src.phase_z2_pipeline import _b4_mapper_source_enabled
FLAG = "PHASE_Z_B4_MAPPER_SOURCE"
def test_default_off_when_env_unset(monkeypatch: pytest.MonkeyPatch) -> None:
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
@pytest.mark.parametrize("value", ["1", "true", "yes", "TRUE", "Yes", " true ", " 1\t"])
def test_truthy_values_enable_flag(
monkeypatch: pytest.MonkeyPatch, value: str
) -> None:
monkeypatch.setenv(FLAG, value)
assert _b4_mapper_source_enabled() is True
@pytest.mark.parametrize("value", ["", "0", "no", "false", "off", "2", "on", "y"])
def test_non_truthy_values_keep_flag_off(
monkeypatch: pytest.MonkeyPatch, value: str
) -> None:
monkeypatch.setenv(FLAG, value)
assert _b4_mapper_source_enabled() is False
def test_flag_distinct_from_gatekeeper(monkeypatch: pytest.MonkeyPatch) -> None:
"""PHASE_Z_B4_GATEKEEPER ON must not flip the mapper-source flag.
Locks Stage 2 design decision (Stage 1 Q1 resolution): the new flag
governs slot_payload source-of-truth; PHASE_Z_B4_GATEKEEPER retains
its mismatch render-skip semantics. They must be independently
toggleable.
"""
monkeypatch.setenv("PHASE_Z_B4_GATEKEEPER", "1")
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False

View File

@@ -0,0 +1,96 @@
"""IMP-89 89-a u2 — slot_payload source-of-truth switch unit tests.
Stage 2 plan (u2): wires the u1 PHASE_Z_B4_MAPPER_SOURCE flag into the
single slot_payload construction site at src/phase_z2_pipeline.py:4702
via the _select_mapper_template_id() selector helper.
Locked semantics (Stage 1 Q1 / Stage 2 u2):
flag ON → mapper input = placement_plan.selected_template_id (B4)
flag OFF → mapper input = unit.frame_template_id (legacy mapper-only)
u3 will add BLOCKED exits for (selected_template_id is None OR FitError
on B4-selected) under flag ON — NO silent fallback. u4 guards default-OFF
final.html SHA parity for mdx 01-05.
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
import pytest
from src.phase_z2_pipeline import _select_mapper_template_id
FLAG = "PHASE_Z_B4_MAPPER_SOURCE"
@dataclass
class _StubPlan:
"""Minimal PlacementPlan stand-in — only selected_template_id is read."""
selected_template_id: Optional[str]
def test_flag_off_returns_unit_frame_template_id(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Default-OFF preserves legacy mapper input (V4 rank-1)."""
monkeypatch.delenv(FLAG, raising=False)
plan = _StubPlan(selected_template_id="B4_PICK")
assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK"
def test_flag_on_returns_placement_plan_selected_template_id(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Flag ON routes mapper input to B4 PlacementPlan."""
monkeypatch.setenv(FLAG, "1")
plan = _StubPlan(selected_template_id="B4_PICK")
assert _select_mapper_template_id(plan, "V4_PICK") == "B4_PICK"
def test_flag_on_with_matching_b4_returns_same_value(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""When B4-selected == mapper, switch is behavior-preserving."""
monkeypatch.setenv(FLAG, "true")
plan = _StubPlan(selected_template_id="F13")
assert _select_mapper_template_id(plan, "F13") == "F13"
def test_flag_on_with_no_b4_cover_returns_none(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Flag ON + B4 no-cover surfaces None — u3 will BLOCK on this signal."""
monkeypatch.setenv(FLAG, "yes")
plan = _StubPlan(selected_template_id=None)
assert _select_mapper_template_id(plan, "V4_PICK") is None
def test_flag_off_with_no_b4_cover_still_returns_legacy(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""Default-OFF ignores B4 None — legacy mapper input always honored."""
monkeypatch.delenv(FLAG, raising=False)
plan = _StubPlan(selected_template_id=None)
assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK"
@pytest.mark.parametrize("non_truthy", ["", "0", "no", "false", "off", "2"])
def test_non_truthy_env_values_keep_legacy_source(
monkeypatch: pytest.MonkeyPatch, non_truthy: str
) -> None:
"""Non-truthy env values mirror u1 flag-reader contract — legacy source."""
monkeypatch.setenv(FLAG, non_truthy)
plan = _StubPlan(selected_template_id="B4_PICK")
assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK"
def test_gatekeeper_flag_does_not_flip_mapper_source(
monkeypatch: pytest.MonkeyPatch,
) -> None:
"""PHASE_Z_B4_GATEKEEPER ON alone must NOT route mapper to B4 (Stage 1 Q1)."""
monkeypatch.setenv("PHASE_Z_B4_GATEKEEPER", "1")
monkeypatch.delenv(FLAG, raising=False)
plan = _StubPlan(selected_template_id="B4_PICK")
assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK"

View File

View File

@@ -0,0 +1,56 @@
{
"schema_version": 2,
"axis": "IMP-89 89-a u4 — final.html SHA baseline captured via FULL run_phase_z2_mvp1 pipeline (flag OFF / default)",
"description": "Frozen SHA-256 of `final.html` bytes (the artifact written to disk at src/phase_z2_pipeline.py:5994-5996) captured by running the full Phase Z pipeline end-to-end for each mdx 01-05 under PHASE_Z_B4_MAPPER_SOURCE=OFF. Under flag OFF the 89-a selector `_select_mapper_template_id(plan, T)` returns `T` verbatim, so the mapper input is byte-identical to the pre-89-a legacy call shape `map_mdx_to_slots(section, unit.frame_template_id)` — the rendered HTML and therefore the final.html SHA match the pre-89-a baseline. The u4 regression test runs the same pipeline shape under flag OFF and asserts SHA equality. Regenerate only when an upstream mapper/render/template delta is deliberately reviewed and accepted.",
"captured_at_utc": "2026-05-23T15:03:40Z",
"renderer": {
"entrypoint": "src.phase_z2_pipeline.run_phase_z2_mvp1",
"write_site": "src/phase_z2_pipeline.py:5994-5996",
"artifact_relpath": "<RUNS_DIR>/<run_id>/phase_z2/final.html"
},
"mdx_batch": [
"01.mdx",
"02.mdx",
"03.mdx",
"04.mdx",
"05.mdx"
],
"mdx_files": {
"01.mdx": {
"mdx_file": "01.mdx",
"run_id": "89a_baseline_01",
"final_html_size_bytes": 29089,
"sha256": "ad6f16a3068b5a55bd900122688f691ffef0716b91de52151551242773595487",
"pipeline_exit_code": null
},
"02.mdx": {
"mdx_file": "02.mdx",
"run_id": "89a_baseline_02",
"final_html_size_bytes": 25249,
"sha256": "4832e3e45660eb95201425e975a4c285fb78493cf8d09d6395897f7897b1794b",
"pipeline_exit_code": null
},
"03.mdx": {
"mdx_file": "03.mdx",
"run_id": "89a_baseline_03",
"final_html_size_bytes": 39804,
"sha256": "7dae47b3d51aa15a6752e4543a746abb4c4da71e7e95895eee8ef08c2eabc948",
"pipeline_exit_code": null
},
"04.mdx": {
"mdx_file": "04.mdx",
"run_id": "89a_baseline_04",
"final_html_size_bytes": 27707,
"sha256": "2bce45041cdcca6518cd92586c1be9e051a5c98f5a0ad61fdde02604618a1d80",
"pipeline_exit_code": null
},
"05.mdx": {
"mdx_file": "05.mdx",
"run_id": "89a_baseline_05",
"final_html_size_bytes": 21187,
"sha256": "3ded2fff8cc45301675d2a8917af945b4003631688cad6f088976fd57ab9b12c",
"pipeline_exit_code": 1
}
},
"total_files": 5
}

View File

@@ -0,0 +1,168 @@
"""IMP-89 89-a u4 — capture final.html SHA baseline via the FULL Phase Z pipeline.
Runs ``src.phase_z2_pipeline.run_phase_z2_mvp1`` end-to-end for every mdx file
in ``samples/mdx_batch/`` (01-05) under PHASE_Z_B4_MAPPER_SOURCE=OFF (default).
Each run writes a real ``final.html`` to disk at
``<RUNS_DIR>/<run_id>/phase_z2/final.html`` — exactly the production write
site at ``src/phase_z2_pipeline.py:5994-5996``. The bytes of that on-disk
artifact are SHA-256 hashed and stored in
``tests/regression/fixtures/89a_pre_baseline_sha.json``.
The u4 regression test in ``tests/regression/test_b4_mapper_source_sha_parity.py``
runs the same pipeline shape under flag OFF, reads the on-disk ``final.html``,
hashes its bytes, and asserts SHA equality with each frozen value. The
mathematical chain that makes this a genuine "pre-89-a baseline" guard:
* Under flag OFF, ``_select_mapper_template_id(plan, T) == T`` for every
``(plan, T)`` pair (locked by u2 + u4 algebraic precondition tests).
* Therefore the mapper input is byte-identical to the legacy pre-89-a call
shape ``map_mdx_to_slots(section, unit.frame_template_id)``.
* Therefore the rendered HTML is byte-identical to pre-89-a output.
* Therefore the on-disk ``final.html`` is byte-identical → SHA matches.
Any future drift — in the selector, mapper, render_slide, slide_base.html,
or any upstream code path — produces a divergent SHA and breaks the test.
Run from repo root::
python tests/regression/scripts/capture_89a_pre_baseline.py
The capture script is idempotent and meant to be re-run only when an
upstream mapper/render/template delta is reviewed and accepted. It refuses
to run with PHASE_Z_B4_MAPPER_SOURCE enabled (the post-89-a flag-ON state
is NOT the baseline axis).
"""
from __future__ import annotations
import hashlib
import json
import os
import sys
import tempfile
from datetime import datetime, timezone
from pathlib import Path
_REPO_ROOT = Path(__file__).resolve().parents[3]
sys.path.insert(0, str(_REPO_ROOT))
sys.path.insert(0, str(_REPO_ROOT / "src"))
import src.phase_z2_pipeline as pz2 # noqa: E402
_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch"
_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx")
_OUT_PATH = (
_REPO_ROOT / "tests" / "regression" / "fixtures" / "89a_pre_baseline_sha.json"
)
def _capture_one(mdx_file: str, runs_root: Path) -> dict:
"""Run the full pipeline once and hash the on-disk final.html.
``pz2.RUNS_DIR`` MUST be pinned to ``runs_root`` by the caller before
invocation; ``run_phase_z2_mvp1`` writes final.html to
``<pz2.RUNS_DIR>/<run_id>/phase_z2/final.html``.
``SystemExit`` from the pipeline (e.g. IMP-87 EMPTY_SHELL_NO_CONTENT
BLOCKED exit on mdx 05) is caught: the BLOCKED exit fires AFTER the
final.html write at ``src/phase_z2_pipeline.py:5994-5996``, so the
artifact still exists on disk and the SHA is captured. The exit code
is recorded on the entry so the test can assert the same terminal
state under flag OFF. If final.html is missing post-exit, that is a
genuine pipeline failure and the script aborts.
"""
mdx_path = _SAMPLES_DIR / mdx_file
assert mdx_path.exists(), f"sample missing: {mdx_path}"
run_id = f"89a_baseline_{mdx_path.stem}"
pipeline_exit_code: int | None = None
try:
pz2.run_phase_z2_mvp1(mdx_path, run_id=run_id)
except SystemExit as exc:
pipeline_exit_code = (
int(exc.code) if isinstance(exc.code, int) else 1
)
final_html_path = runs_root / run_id / "phase_z2" / "final.html"
assert final_html_path.exists(), (
f"final.html not written by pipeline: {final_html_path} "
f"(pipeline_exit_code={pipeline_exit_code})"
)
raw_bytes = final_html_path.read_bytes()
assert len(raw_bytes) > 0, f"final.html is empty: {final_html_path}"
return {
"mdx_file": mdx_file,
"run_id": run_id,
"final_html_size_bytes": len(raw_bytes),
"sha256": hashlib.sha256(raw_bytes).hexdigest(),
"pipeline_exit_code": pipeline_exit_code,
}
def capture() -> dict:
assert os.environ.get("PHASE_Z_B4_MAPPER_SOURCE", "") == "", (
"PHASE_Z_B4_MAPPER_SOURCE must be unset when capturing baseline "
"(default-OFF state is the production-equivalent axis for u4). "
"Refusing to run with the flag enabled."
)
_OUT_PATH.parent.mkdir(parents=True, exist_ok=True)
with tempfile.TemporaryDirectory(prefix="89a_baseline_") as tmp:
runs_root = Path(tmp)
original_runs_dir = pz2.RUNS_DIR
pz2.RUNS_DIR = runs_root
try:
entries = [_capture_one(mf, runs_root) for mf in _MDX_BATCH]
finally:
pz2.RUNS_DIR = original_runs_dir
return {
"schema_version": 2,
"axis": (
"IMP-89 89-a u4 — final.html SHA baseline captured via FULL "
"run_phase_z2_mvp1 pipeline (flag OFF / default)"
),
"description": (
"Frozen SHA-256 of `final.html` bytes (the artifact written to "
"disk at src/phase_z2_pipeline.py:5994-5996) captured by running "
"the full Phase Z pipeline end-to-end for each mdx 01-05 under "
"PHASE_Z_B4_MAPPER_SOURCE=OFF. Under flag OFF the 89-a selector "
"`_select_mapper_template_id(plan, T)` returns `T` verbatim, so "
"the mapper input is byte-identical to the pre-89-a legacy call "
"shape `map_mdx_to_slots(section, unit.frame_template_id)` — "
"the rendered HTML and therefore the final.html SHA match the "
"pre-89-a baseline. The u4 regression test runs the same "
"pipeline shape under flag OFF and asserts SHA equality. "
"Regenerate only when an upstream mapper/render/template delta "
"is deliberately reviewed and accepted."
),
"captured_at_utc": (
datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
),
"renderer": {
"entrypoint": "src.phase_z2_pipeline.run_phase_z2_mvp1",
"write_site": "src/phase_z2_pipeline.py:5994-5996",
"artifact_relpath": "<RUNS_DIR>/<run_id>/phase_z2/final.html",
},
"mdx_batch": list(_MDX_BATCH),
"mdx_files": {entry["mdx_file"]: entry for entry in entries},
"total_files": len(entries),
}
def main() -> None:
data = capture()
_OUT_PATH.write_text(
json.dumps(data, indent=2, ensure_ascii=False) + "\n",
encoding="utf-8",
)
print(
f"wrote {_OUT_PATH} ({data['total_files']} files: "
f"{', '.join(data['mdx_files'].keys())})"
)
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,347 @@
"""IMP-89 89-a u4 — final.html SHA parity guard for mdx 01-05 under flag OFF.
Stage 2 u4 contract (verbatim)::
mdx 01-05 final.html SHA == pre-89-a baseline under
PHASE_Z_B4_MAPPER_SOURCE=OFF (default)
How this guard is wired
=======================
The baseline is captured ONCE by
``tests/regression/scripts/capture_89a_pre_baseline.py``, which runs the
FULL Phase Z pipeline (``src.phase_z2_pipeline.run_phase_z2_mvp1``) for
every mdx file in ``samples/mdx_batch/`` (01-05) under flag OFF, reads
the on-disk ``final.html`` artifact from
``<RUNS_DIR>/<run_id>/phase_z2/final.html``
(the production write site at ``src/phase_z2_pipeline.py:5994-5996``),
and SHA-256 hashes its bytes. The hash is frozen at::
tests/regression/fixtures/89a_pre_baseline_sha.json
This test re-runs the SAME pipeline shape (full ``run_phase_z2_mvp1``,
flag OFF, isolated tmp RUNS_DIR), reads the live on-disk ``final.html``,
hashes its bytes, and asserts SHA equality with the frozen value.
Why this is a genuine "pre-89-a baseline" guard
================================================
Under flag OFF the 89-a switch at ``src/phase_z2_pipeline.py:4771-4773``
calls ``_select_mapper_template_id(plan, unit.frame_template_id)``, which
returns ``unit.frame_template_id`` verbatim (locked by u2 unit tests +
``test_flag_off_selector_returns_legacy_input_per_section`` below). The
mapper input is therefore byte-identical to the pre-89-a call shape
``map_mdx_to_slots(section, unit.frame_template_id)`` (no selector
indirection), so:
* mapper output (slot_payload) is byte-identical
* render_slide output is byte-identical
* on-disk ``final.html`` is byte-identical
* SHA-256 matches the frozen baseline
Any future drift in ``_select_mapper_template_id``, ``map_mdx_to_slots``,
``render_slide``, ``slide_base.html``, family partials, or any upstream
pipeline step produces a divergent SHA and breaks this test.
Crucially the baseline is NOT recomputed in this process — it is read
from the on-disk JSON fixture. This makes the test a genuine regression
guard rather than a paired identity check.
"""
from __future__ import annotations
import hashlib
import json
import os
from pathlib import Path
import pytest
import src.phase_z2_pipeline as pz2
from src.phase_z2_pipeline import (
_b4_mapper_source_enabled,
_select_mapper_template_id,
extract_content_objects,
load_frame_contracts,
parse_mdx,
)
from src.phase_z2_placement_planner import plan_placement
FLAG = "PHASE_Z_B4_MAPPER_SOURCE"
_REPO_ROOT = Path(__file__).resolve().parents[2]
_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch"
_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx")
_BASELINE_PATH = (
Path(__file__).parent / "fixtures" / "89a_pre_baseline_sha.json"
)
@pytest.fixture(scope="module")
def baseline() -> dict:
"""Load the frozen final.html SHA baseline from disk.
A missing file is a hard failure with a one-shot regen hint — the
baseline cannot be silently regenerated inside the test process
(that would defeat the regression-guard purpose).
"""
assert _BASELINE_PATH.exists(), (
f"baseline fixture missing: {_BASELINE_PATH}\n"
f"regenerate with: python tests/regression/scripts/"
f"capture_89a_pre_baseline.py"
)
with _BASELINE_PATH.open("r", encoding="utf-8") as f:
data = json.load(f)
assert data.get("schema_version") == 2, (
f"unexpected baseline schema_version: {data.get('schema_version')} "
f"(expected 2 — full-pipeline final.html SHA capture). Regenerate "
f"the fixture via capture_89a_pre_baseline.py."
)
assert data.get("total_files", 0) >= len(_MDX_BATCH), (
f"baseline has {data.get('total_files')} files — expected "
f">= {len(_MDX_BATCH)} (one per mdx 01-05)"
)
return data
def _live_pipeline_final_html_sha(
mdx_file: str, runs_root: Path
) -> tuple[str, int, int | None]:
"""Run the full pipeline once under flag OFF and SHA the on-disk final.html.
``pz2.RUNS_DIR`` MUST be pinned to ``runs_root`` by the caller (via
monkeypatch); ``run_phase_z2_mvp1`` writes final.html to
``<pz2.RUNS_DIR>/<run_id>/phase_z2/final.html``.
``SystemExit`` is caught (mirrors the capture script): the IMP-87
EMPTY_SHELL_NO_CONTENT BLOCKED exit on mdx 05 (and any analogous
terminal state) fires AFTER the final.html write site at
``src/phase_z2_pipeline.py:5994-5996``, so the artifact still exists
on disk and the SHA is the load-bearing parity axis. The exit code
is returned so the per-file test can assert the same terminal state
under flag OFF (any drift in exit semantics is a separate u3 axis
but surfaces here too).
Returns ``(sha256_hex, size_bytes, pipeline_exit_code)``.
"""
mdx_path = _SAMPLES_DIR / mdx_file
assert mdx_path.exists(), f"sample missing: {mdx_path}"
run_id = f"89a_live_{mdx_path.stem}"
pipeline_exit_code: int | None = None
try:
pz2.run_phase_z2_mvp1(mdx_path, run_id=run_id)
except SystemExit as exc:
pipeline_exit_code = (
int(exc.code) if isinstance(exc.code, int) else 1
)
final_html_path = runs_root / run_id / "phase_z2" / "final.html"
assert final_html_path.exists(), (
f"final.html not written by pipeline: {final_html_path} "
f"(pipeline_exit_code={pipeline_exit_code})"
)
raw_bytes = final_html_path.read_bytes()
return (
hashlib.sha256(raw_bytes).hexdigest(),
len(raw_bytes),
pipeline_exit_code,
)
@pytest.mark.integration
@pytest.mark.parametrize("mdx_file", _MDX_BATCH)
def test_post_89a_flag_off_final_html_sha_matches_frozen_baseline(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
mdx_file: str,
baseline: dict,
) -> None:
"""Per-mdx-file final.html SHA parity guard via the FULL pipeline.
Runs ``run_phase_z2_mvp1`` end-to-end with ``PHASE_Z_B4_MAPPER_SOURCE``
unset, reads the on-disk ``final.html``, hashes its bytes, and asserts
SHA equality with the frozen baseline. Any drift in the selector,
mapper, render_slide, slide_base.html, family partials, or any
upstream step manifests here as a SHA mismatch.
"""
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
runs_root = tmp_path / "runs"
runs_root.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(pz2, "RUNS_DIR", runs_root)
expected = baseline["mdx_files"][mdx_file]
expected_sha = expected["sha256"]
expected_size = expected["final_html_size_bytes"]
expected_exit = expected.get("pipeline_exit_code")
live_sha, live_size, live_exit = _live_pipeline_final_html_sha(
mdx_file, runs_root
)
assert live_sha == expected_sha, (
f"final.html SHA parity broken for {mdx_file}:\n"
f" frozen pre-89-a baseline : {expected_sha} ({expected_size} bytes)\n"
f" live post-89-a flag-OFF : {live_sha} ({live_size} bytes)\n"
f" -> upstream drift in selector / mapper / render_slide / "
f"slide_base.html / family partials / pipeline step. Re-capture "
f"the baseline ONLY if the delta is deliberate and reviewed."
)
assert live_exit == expected_exit, (
f"pipeline exit-code parity broken for {mdx_file}: "
f"baseline={expected_exit} live={live_exit} — SHA matches but "
f"the pipeline terminal state diverged (IMP-87 BLOCKED axis)."
)
@pytest.mark.integration
def test_post_89a_flag_off_final_html_sha_holistic_sweep(
monkeypatch: pytest.MonkeyPatch,
tmp_path: Path,
baseline: dict,
) -> None:
"""Single-pass holistic sweep across mdx 01-05.
Aggregates the per-file SHA parity into one sweep so an accidental
parametrize zero-iteration cannot silently pass. Asserts (a) env gate
stays False throughout, (b) every baseline file's live SHA matches the
frozen value, (c) sweep coverage equals the full _MDX_BATCH.
"""
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
runs_root = tmp_path / "runs"
runs_root.mkdir(parents=True, exist_ok=True)
monkeypatch.setattr(pz2, "RUNS_DIR", runs_root)
matched = 0
for mdx_file in _MDX_BATCH:
expected = baseline["mdx_files"][mdx_file]
live_sha, live_size, _live_exit = _live_pipeline_final_html_sha(
mdx_file, runs_root
)
assert live_sha == expected["sha256"], (
f"sweep mismatch on {mdx_file}: baseline {expected['sha256']} "
f"≠ live {live_sha} (size baseline={expected['final_html_size_bytes']} "
f"live={live_size})"
)
matched += 1
assert matched == len(_MDX_BATCH), (
f"sweep covered only {matched}/{len(_MDX_BATCH)} mdx files — "
f"coverage too shallow to guard final.html SHA parity."
)
@pytest.mark.parametrize("mdx_file", _MDX_BATCH)
def test_flag_off_selector_returns_legacy_input_per_section(
monkeypatch: pytest.MonkeyPatch, mdx_file: str
) -> None:
"""Algebraic precondition: ``_select_mapper_template_id(plan, T) == T``
under flag OFF for every section parsed from each mdx 01-05 file.
This is the property that makes the full-pipeline SHA parity hold
mathematically. The on-disk SHA tests above are the load-bearing
proof; this localizes the failure axis when SHA parity breaks.
Pure Python, no pipeline execution — fast unit-level check.
"""
monkeypatch.delenv(FLAG, raising=False)
assert _b4_mapper_source_enabled() is False
mdx_path = _SAMPLES_DIR / mdx_file
_title, sections, _footer = parse_mdx(mdx_path)
frame_contracts = list(load_frame_contracts().values())
legacy_pick = "LEGACY_V4_RANK_1_SENTINEL"
for section in sections:
content_objects = extract_content_objects(section, source_shape=None)
plan = plan_placement(
content_objects=content_objects,
frame_contracts=frame_contracts,
section_id=section.section_id,
)
resolved = _select_mapper_template_id(plan, legacy_pick)
assert resolved == legacy_pick, (
f"flag-OFF selector contract broken in {mdx_file} section "
f"{section.section_id}: B4 picked "
f"'{plan.selected_template_id}' but selector returned "
f"'{resolved}' (expected '{legacy_pick}')"
)
@pytest.mark.parametrize("mdx_file", _MDX_BATCH)
def test_flag_off_holds_when_b4_mismatches_legacy(
monkeypatch: pytest.MonkeyPatch, mdx_file: str
) -> None:
"""Highest-risk divergence scenario: B4 picks a template DIFFERENT
from the legacy V4 rank-1 input. Under flag OFF the selector MUST
still return the legacy input — never the B4 pick. Catches the axis
that would otherwise be invisible in the matches_mapper case.
"""
monkeypatch.delenv(FLAG, raising=False)
mdx_path = _SAMPLES_DIR / mdx_file
_title, sections, _footer = parse_mdx(mdx_path)
frame_contracts = list(load_frame_contracts().values())
legacy_distinct = "__89A_U4_SENTINEL_LEGACY_DISTINCT_FROM_B4__"
for section in sections:
content_objects = extract_content_objects(section, source_shape=None)
plan = plan_placement(
content_objects=content_objects,
frame_contracts=frame_contracts,
section_id=section.section_id,
)
assert plan.selected_template_id != legacy_distinct
resolved = _select_mapper_template_id(plan, legacy_distinct)
assert resolved == legacy_distinct, (
f"flag-OFF selector must return legacy input even when B4 "
f"would pick differently ({mdx_file} {section.section_id}: "
f"b4_pick={plan.selected_template_id} resolved={resolved})"
)
def test_flag_off_default_state_locks_gate_to_false() -> None:
"""Pin the default-OFF contract at the gate level.
``_b4_mapper_source_enabled()`` returning False under default env is
the precondition for SHA parity: the u3 BLOCKED gates and the u2
switch are both gated on this returning True, so a False default
guarantees the legacy code path remains intact.
"""
assert os.environ.get(FLAG, "") == ""
assert _b4_mapper_source_enabled() is False
def test_baseline_fixture_is_load_bearing(baseline: dict) -> None:
"""Sanity-check the frozen baseline file shape so a corrupted /
half-written fixture does not silently pass the SHA tests with zero
iterations.
"""
assert baseline["axis"].startswith("IMP-89 89-a u4")
assert set(baseline["mdx_files"].keys()) == set(_MDX_BATCH), (
f"baseline mdx coverage drift: {baseline['mdx_files'].keys()} "
f"vs expected {_MDX_BATCH}"
)
for mdx_file, entry in baseline["mdx_files"].items():
assert "sha256" in entry, f"baseline {mdx_file} missing sha256 key"
assert "final_html_size_bytes" in entry, (
f"baseline {mdx_file} missing final_html_size_bytes key"
)
sha = entry["sha256"]
assert isinstance(sha, str) and len(sha) == 64, (
f"baseline {mdx_file} sha256 is not a 64-char hex string: {sha!r}"
)
size = entry["final_html_size_bytes"]
assert isinstance(size, int) and size > 0, (
f"baseline {mdx_file} final_html_size_bytes is not positive int: "
f"{size!r}"
)
assert baseline["renderer"]["entrypoint"] == (
"src.phase_z2_pipeline.run_phase_z2_mvp1"
), (
"baseline renderer.entrypoint must be the full pipeline entry — "
f"got {baseline['renderer'].get('entrypoint')!r}. The previous "
"synthetic render_slide shape produced a fragment, not final.html."
)