From b1bbe27c38571549af4014f89aca0a8ba0ca1010 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Sun, 24 May 2026 00:33:28 +0900 Subject: [PATCH] =?UTF-8?q?feat(#89):=20IMP-89=2089-a=20u1~u5=20Layer=20A?= =?UTF-8?q?=20render=20path=20activation=20(B4=E2=86=92mapper=20source-of-?= =?UTF-8?q?truth=20switch,=20default-OFF=20flag)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PHASE_Z_B4_MAPPER_SOURCE env flag (default OFF) switches slot_payload source-of-truth from legacy mapper-only / V4 rank-1 to B4 PlacementPlan .selected_template_id at the single switch site in the runtime loop. OFF preserves final.html SHA byte-equivalence (u4 parity guard, mdx 01-05). ON requires Layer A render-active path; BLOCKED exits on B4 no-cover and on B4-selected FitError (IMP-87 honesty gate pattern — NO silent fallback). Distinct from PHASE_Z_B4_GATEKEEPER (mismatch render-skip). Units (1 commit = 1 axis per Stage 1 scope_lock): u1 — _b4_mapper_source_enabled() flag reader (default OFF) u2 — _select_mapper_template_id() selector wired at the switch site u3 — _b4_mapper_source_blocked_exit() for b4_no_cover / b4_selected_fit_error u4 — render SHA parity regression (tests/regression/ baseline mdx 01-05) u5 — slot_payload byte-equivalence (matches_mapper=True axis, mdx 01-05) Targeted 89-a suite 63 PASS; Phase Z regression 323 PASS; IMP-87 mirror 20 PASS. Demo activation via .env only (no vite.config hardcoding). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/phase_z2_pipeline.py | 131 +++++- .../phase_z2/test_b4_mapper_source_blocked.py | 157 +++++++ .../test_b4_mapper_source_equivalence.py | 426 ++++++++++++++++++ tests/phase_z2/test_b4_mapper_source_flag.py | 54 +++ .../phase_z2/test_b4_mapper_source_switch.py | 96 ++++ tests/regression/__init__.py | 0 .../fixtures/89a_pre_baseline_sha.json | 56 +++ .../scripts/capture_89a_pre_baseline.py | 168 +++++++ .../test_b4_mapper_source_sha_parity.py | 347 ++++++++++++++ 9 files changed, 1434 insertions(+), 1 deletion(-) create mode 100644 tests/phase_z2/test_b4_mapper_source_blocked.py create mode 100644 tests/phase_z2/test_b4_mapper_source_equivalence.py create mode 100644 tests/phase_z2/test_b4_mapper_source_flag.py create mode 100644 tests/phase_z2/test_b4_mapper_source_switch.py create mode 100644 tests/regression/__init__.py create mode 100644 tests/regression/fixtures/89a_pre_baseline_sha.json create mode 100644 tests/regression/scripts/capture_89a_pre_baseline.py create mode 100644 tests/regression/test_b4_mapper_source_sha_parity.py diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index be46bc4..73e77f0 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -204,6 +204,82 @@ def to_phase_z_status(match: V4Match) -> str: return V4_LABEL_TO_PHASE_Z_STATUS.get(match.label, "unknown") +def _b4_mapper_source_enabled() -> bool: + """IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE env flag reader (default OFF). + + Switches slot_payload source-of-truth from mapper-only (legacy) to B4 + PlacementPlan.selected_template_id. Distinct from PHASE_Z_B4_GATEKEEPER + (mismatch render-skip semantics). u2 wires this into the slot_payload + construction site; u3 adds BLOCKED exits for no-cover and FitError under + flag ON. Truthy values: '1', 'true', 'yes' (case-insensitive, trimmed). + """ + return os.environ.get("PHASE_Z_B4_MAPPER_SOURCE", "").strip().lower() in { + "1", + "true", + "yes", + } + + +def _select_mapper_template_id( + placement_plan, unit_frame_template_id: str +) -> Optional[str]: + """IMP-89 89-a u2 — slot_payload source-of-truth selector. + + Returns the template_id that drives slot_payload construction at the + single switch site in the runtime loop: + flag ON → placement_plan.selected_template_id (B4 PlacementPlan, + Layer A render-active path) + flag OFF → unit_frame_template_id (legacy mapper-only / V4 rank-1; + byte-equivalent default; final.html SHA parity guarded + by u4) + + Under flag ON the returned value may be None when B4 found no covering + frame. u3 adds the BLOCKED exit for None and for FitError on the + B4-selected template — NO silent fallback (IMP-87 honesty gate pattern). + """ + if _b4_mapper_source_enabled(): + return placement_plan.selected_template_id + return unit_frame_template_id + + +def _b4_mapper_source_blocked_exit( + reason: str, *, position: str, context: dict +) -> "NoReturn": # type: ignore[name-defined] + """IMP-89 89-a u3 — BLOCKED exit (sys.exit(1)) when PHASE_Z_B4_MAPPER_SOURCE + is ON and the Layer A render path cannot resolve a covering frame. + + Stage 1 Q2 lock: when the user explicitly opts into the B4-driven render + path, a content-bearing zone MUST NOT silently degrade to adapter_needed + or to the legacy V4 rank-1 mapper input. Mirrors IMP-87 u3 honesty-gate + pattern (`_is_blocked_overall` → `sys.exit(1)`): the BLOCKED signal + preempts the silent adapter_needed fallback so the operator sees the + Layer A failure immediately on stderr instead of inheriting a + pseudo-rendered partial. + + Reasons (locked enum): + b4_no_cover — PlacementPlan.selected_template_id is None + (B4 found no covering frame on the unit) + b4_selected_fit_error — map_mdx_to_slots raised FitError against the + B4-selected template (frame contract reject) + + Always raises SystemExit(1) via sys.exit. The `NoReturn` annotation makes + the call-site control flow explicit for type-checkers; behavior is the + same as IMP-87 u3's sys.exit(1) at L6387. + """ + print( + f"\n[Phase Z-2 IMP-89 89-a u3] BLOCKED @ {reason} (zone--{position})", + file=sys.stderr, + ) + print( + " policy : PHASE_Z_B4_MAPPER_SOURCE=ON requires B4-driven render " + "(NO silent fallback — IMP-87 honesty gate pattern)", + file=sys.stderr, + ) + for key, value in context.items(): + print(f" {key:9}: {value}", file=sys.stderr) + sys.exit(1) + + # ─── MDX parsing ──────────────────────────────────────────────── def parse_mdx(mdx_path: Path) -> tuple[str, list[MdxSection], Optional[str]]: @@ -4681,11 +4757,64 @@ def run_phase_z2_mvp1( continue # ─── end B4 gatekeeper ─── + # ─── IMP-89 89-a u2 — slot_payload source-of-truth switch ─── + # PHASE_Z_B4_MAPPER_SOURCE (u1 flag, default OFF): + # ON → mapper input = B4 PlacementPlan.selected_template_id + # (Layer A render-active; B4 drives slot_payload) + # OFF → mapper input = unit.frame_template_id (legacy mapper-only / + # V4 rank-1; byte-equivalent default — final.html SHA parity + # guarded by u4) + # u3 layers BLOCKED exits for (selected_template_id is None OR + # FitError on B4-selected template) under flag ON — NO silent + # fallback (IMP-87 honesty gate pattern). Under flag OFF semantics + # preserved verbatim. + mapper_template_id = _select_mapper_template_id( + placement_plan, unit.frame_template_id + ) + + # IMP-89 89-a u3 — BLOCKED exit on B4 no-cover under flag ON. + # When PHASE_Z_B4_MAPPER_SOURCE=ON and PlacementPlan produced no + # covering frame, refuse to fall back to the legacy V4 rank-1 mapper + # input. NO silent fallback (Stage 1 Q2 lock; IMP-87 honesty gate + # pattern). Under flag OFF this branch is never entered, so the + # default render path remains byte-identical. + if _b4_mapper_source_enabled() and mapper_template_id is None: + _b4_mapper_source_blocked_exit( + "b4_no_cover", + position=position, + context={ + "unit": ( + f"source_section_ids={list(unit.source_section_ids)} " + f"merge_type={unit.merge_type}" + ), + "v4_rank1": unit.frame_template_id, + "b4_pick": placement_plan.selected_template_id, + }, + ) + # mapper 시도 — 실패 (FitError) 시 zone 을 adapter_needed 로 표시하고 skip try: - slot_payload = map_mdx_to_slots(synth_section, unit.frame_template_id) + slot_payload = map_mdx_to_slots(synth_section, mapper_template_id) except FitError as e: _fit_error_str = str(e) + # IMP-89 89-a u3 — BLOCKED exit on B4-selected FitError under flag + # ON. When PHASE_Z_B4_MAPPER_SOURCE=ON the mapper rejecting the + # B4-selected template is a Layer A honesty failure — adapter_needed + # would mask it (Stage 1 Q2 lock). Under flag OFF the legacy + # adapter_needed silent-fallback path executes unchanged. + if _b4_mapper_source_enabled(): + _b4_mapper_source_blocked_exit( + "b4_selected_fit_error", + position=position, + context={ + "template": f"{mapper_template_id} (B4 selected)", + "unit": ( + f"source_section_ids={list(unit.source_section_ids)}" + ), + "v4_rank1": unit.frame_template_id, + "fit_error": _fit_error_str, + }, + ) _unit_provisional = bool(getattr(unit, "provisional", False)) adapter_record = { "position": position, diff --git a/tests/phase_z2/test_b4_mapper_source_blocked.py b/tests/phase_z2/test_b4_mapper_source_blocked.py new file mode 100644 index 0000000..5b1fe56 --- /dev/null +++ b/tests/phase_z2/test_b4_mapper_source_blocked.py @@ -0,0 +1,157 @@ +"""IMP-89 89-a u3 — BLOCKED exit unit tests for Layer A render path. + +Stage 2 plan (u3): when PHASE_Z_B4_MAPPER_SOURCE=ON and the Layer A render +path cannot resolve a covering frame, the runtime MUST sys.exit(1) instead of +silently degrading to adapter_needed or to the legacy V4 rank-1 mapper input. + +Locked semantics (Stage 1 Q2 lock; IMP-87 honesty gate pattern): + flag OFF → legacy adapter_needed path + (silent fallback preserved) + flag ON + B4 no-cover → BLOCKED (sys.exit 1) + flag ON + FitError on B4-selected → BLOCKED (sys.exit 1) + flag ON + matches_mapper + FitError → BLOCKED (explicit no-silent + fallback even when V4 rank-1 + equals B4 pick) + +These tests target the `_b4_mapper_source_blocked_exit()` helper directly +plus contract-level assertions of its stderr output. The runtime call-sites +inside `run_phase_z2_mvp1` are guarded by `_b4_mapper_source_enabled()` +checks; u3 changes ZERO behavior under the default-OFF path. +""" +from __future__ import annotations + +import pytest + +from src.phase_z2_pipeline import ( + _b4_mapper_source_blocked_exit, + _b4_mapper_source_enabled, +) + +FLAG = "PHASE_Z_B4_MAPPER_SOURCE" + + +def test_blocked_exit_no_cover_exits_with_code_1( + capsys: pytest.CaptureFixture[str], +) -> None: + """b4_no_cover reason → SystemExit(1), no silent fallback.""" + with pytest.raises(SystemExit) as exc: + _b4_mapper_source_blocked_exit( + "b4_no_cover", + position="top", + context={ + "unit": "source_section_ids=['01-1'] merge_type=raw", + "v4_rank1": "F13", + "b4_pick": None, + }, + ) + assert exc.value.code == 1 + + +def test_blocked_exit_fit_error_exits_with_code_1( + capsys: pytest.CaptureFixture[str], +) -> None: + """b4_selected_fit_error reason → SystemExit(1).""" + with pytest.raises(SystemExit) as exc: + _b4_mapper_source_blocked_exit( + "b4_selected_fit_error", + position="bottom_l", + context={ + "template": "F29 (B4 selected)", + "unit": "source_section_ids=['02-2']", + "v4_rank1": "F13", + "fit_error": "slot 'title' missing", + }, + ) + assert exc.value.code == 1 + + +def test_blocked_exit_stderr_carries_reason_and_position( + capsys: pytest.CaptureFixture[str], +) -> None: + """Header line surfaces the locked reason enum + zone position.""" + with pytest.raises(SystemExit): + _b4_mapper_source_blocked_exit( + "b4_no_cover", + position="bottom_r", + context={"v4_rank1": "F13"}, + ) + err = capsys.readouterr().err + assert "[Phase Z-2 IMP-89 89-a u3] BLOCKED" in err + assert "b4_no_cover" in err + assert "zone--bottom_r" in err + + +def test_blocked_exit_stderr_carries_honesty_policy_line( + capsys: pytest.CaptureFixture[str], +) -> None: + """Policy banner names PHASE_Z_B4_MAPPER_SOURCE + IMP-87 honesty pattern.""" + with pytest.raises(SystemExit): + _b4_mapper_source_blocked_exit( + "b4_selected_fit_error", + position="top", + context={"fit_error": "x"}, + ) + err = capsys.readouterr().err + assert "PHASE_Z_B4_MAPPER_SOURCE=ON" in err + assert "NO silent fallback" in err + assert "IMP-87 honesty gate pattern" in err + + +def test_blocked_exit_stderr_carries_all_context_fields( + capsys: pytest.CaptureFixture[str], +) -> None: + """Each context dict entry surfaces on its own stderr line.""" + with pytest.raises(SystemExit): + _b4_mapper_source_blocked_exit( + "b4_selected_fit_error", + position="top", + context={ + "template": "F29 (B4 selected)", + "unit": "source_section_ids=['02-2']", + "v4_rank1": "F13", + "fit_error": "slot 'title' missing", + }, + ) + err = capsys.readouterr().err + assert "template" in err + assert "F29 (B4 selected)" in err + assert "unit" in err + assert "source_section_ids=['02-2']" in err + assert "v4_rank1" in err + assert "F13" in err + assert "fit_error" in err + assert "slot 'title' missing" in err + + +def test_blocked_exit_ignores_flag_state( + monkeypatch: pytest.MonkeyPatch, + capsys: pytest.CaptureFixture[str], +) -> None: + """Helper is unconditional — flag-gating is the call-site's responsibility. + + The runtime checks `_b4_mapper_source_enabled()` BEFORE invoking this + helper, so once invoked the helper always exits. This keeps the helper + behavior orthogonal to env state and makes the call-sites the + single-source-of-truth for ON/OFF policy. + """ + monkeypatch.delenv(FLAG, raising=False) + with pytest.raises(SystemExit) as exc: + _b4_mapper_source_blocked_exit( + "b4_no_cover", + position="top", + context={"v4_rank1": "F13"}, + ) + assert exc.value.code == 1 + + +def test_default_off_flag_state_does_not_invoke_blocked_helper( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Under default-OFF, `_b4_mapper_source_enabled()` is False, which is + the precondition the runtime checks before calling the helper. This test + locks the contract that the flag reader returns False by default — any + accidental flip would break the byte-identity guarantee of the legacy + adapter_needed path. + """ + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False diff --git a/tests/phase_z2/test_b4_mapper_source_equivalence.py b/tests/phase_z2/test_b4_mapper_source_equivalence.py new file mode 100644 index 0000000..4b53c53 --- /dev/null +++ b/tests/phase_z2/test_b4_mapper_source_equivalence.py @@ -0,0 +1,426 @@ +"""IMP-89 89-a u5 — slot_payload byte-equivalence when B4 matches mapper. + +Stage 2 u5 contract (verbatim):: + + slot_payload byte-equivalent (PHASE_Z_B4_MAPPER_SOURCE ON + matches_mapper=True) + vs OFF, across mdx 01-05 + +Why this is load-bearing +======================== + +u4 freezes the FULL pipeline ``final.html`` SHA under flag OFF. u5 isolates +the *mapper-input* axis: when B4 ``PlacementPlan.selected_template_id`` +equals the legacy mapper input (``unit.frame_template_id`` — V4 rank-1), +the selector at ``src/phase_z2_pipeline.py:223-242`` returns the same +template id under either flag state. The mapper is a pure function of +``(MdxSection, template_id)`` (deterministic dispatch via +``map_with_contract`` → named ``PAYLOAD_BUILDERS`` — verified at +``src/phase_z2_mapper.py:894-919``), so identical inputs → identical +``slot_payload`` dicts → identical JSON-canonical bytes. + +This is the *cross-axis* proof complementing u4: + +* u4 = on-disk ``final.html`` SHA parity, default-OFF only (legacy + preservation guard). +* u5 = ``slot_payload`` byte equivalence, *flag ON ↔ flag OFF* (Layer A + render-active behavior-preserving proof under matches_mapper). + +The negative case (``test_slot_payload_diverges_when_b4_mismatches_under_flag_on``) +locks the fact that ``slot_payload`` actually *depends* on the +``template_id`` selector output — without it, the equivalence test could +trivially pass even if the selector were a no-op. +""" + +from __future__ import annotations + +import json +from dataclasses import asdict, dataclass +from pathlib import Path +from typing import Optional + +import pytest + +from src.phase_z2_mapper import ( + FitError, + get_contract, + load_frame_contracts, + map_with_contract, +) +from src.phase_z2_pipeline import ( + _b4_mapper_source_enabled, + _select_mapper_template_id, + extract_content_objects, + parse_mdx, +) +from src.phase_z2_placement_planner import plan_placement + + +@dataclass +class _StubPlan: + """Minimal placement-plan stand-in for selector unit checks. + + ``_select_mapper_template_id`` reads ONLY ``selected_template_id`` + (verified at ``src/phase_z2_pipeline.py:240-242``). Constructing the + real ``PlacementPlan`` with placeholder slot/region lists would force + the test to track schema drift on fields the selector never touches. + """ + + selected_template_id: Optional[str] + +FLAG = "PHASE_Z_B4_MAPPER_SOURCE" +_REPO_ROOT = Path(__file__).resolve().parents[2] +_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch" +_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx") + + +def _canonical_bytes(payload: dict) -> bytes: + """Stable JSON canonical encoding for byte-level dict comparison. + + ``sort_keys`` removes dict-ordering noise; ``ensure_ascii=False`` keeps + Korean text from being mangled into ``\\uXXXX`` escapes (which would + still compare equal but would silently mask any encoding regression in + the mapper). + """ + return json.dumps(payload, sort_keys=True, ensure_ascii=False).encode( + "utf-8" + ) + + +def _matches_mapper_cases() -> list[tuple[str, str, object, str]]: + """Enumerate (mdx_file, section_id, section, template_id) tuples where + the matches_mapper scenario is reachable. + + "matches_mapper=True" in production is the predicate + ``placement_plan.selected_template_id == unit.frame_template_id``. To + cover it at the unit-test level without driving the full Type B + coordinator, we treat each B4-selected template as the *simulated* + legacy mapper input — i.e. we force matches_mapper=True by construction + via ``mapper_template_id := plan.selected_template_id``. + + Only sections where (a) B4 finds a covering frame AND (b) the mapper + accepts that frame (no FitError) are byte-equivalence-eligible. Under + flag ON the BLOCKED u3 path would otherwise fire — that axis is + covered by ``test_b4_mapper_source_blocked.py`` and is out of scope + here. + """ + frame_contracts = list(load_frame_contracts().values()) + cases: list[tuple[str, str, object, str]] = [] + for mdx_file in _MDX_BATCH: + mdx_path = _SAMPLES_DIR / mdx_file + _title, sections, _footer = parse_mdx(mdx_path) + for section in sections: + content_objects = extract_content_objects( + section, source_shape=None + ) + plan = plan_placement( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section.section_id, + ) + template_id = plan.selected_template_id + if template_id is None: + continue + contract = get_contract(template_id) + if contract is None: + continue + try: + map_with_contract(section, contract) + except FitError: + continue + cases.append((mdx_file, section.section_id, section, template_id)) + return cases + + +# Frozen at collection time so a parametrize zero-iteration cannot silently +# pass the byte-equivalence assertion (additional coverage lock below). +_MATCHES_CASES = _matches_mapper_cases() + + +def _slot_payload_via_selector( + section, plan, mapper_input: str +) -> tuple[dict, str]: + """Compose ``_select_mapper_template_id → map_mdx_to_slots`` once. + + Mirrors the exact runtime path at + ``src/phase_z2_pipeline.py:4771-4797`` minus the BLOCKED u3 gate + (which is out of scope for u5 byte equivalence — covered by u3). + Returns ``(slot_payload, resolved_template_id)`` so per-case asserts + can verify *both* axes (input + output) match. + """ + resolved = _select_mapper_template_id(plan, mapper_input) + assert resolved is not None, ( + "u5 fixture invariant violated: resolved template_id is None even " + "though the case was pre-filtered for B4 cover. Re-check " + "_matches_mapper_cases()." + ) + contract = get_contract(resolved) + assert contract is not None, ( + f"u5 fixture invariant violated: no contract for resolved=" + f"{resolved!r} (case was pre-filtered for catalog membership)." + ) + return map_with_contract(section, contract), resolved + + +# ─── algebraic precondition (no pipeline / no mapper run) ────────────── + + +def test_selector_returns_same_value_under_flag_flip_when_matches_mapper( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Pure-function property: when ``plan.selected_template_id == T`` the + selector returns ``T`` under either flag state. + + This is the algebra that makes the end-to-end byte equivalence below + hold mathematically. If this property breaks, every parametrized + equivalence assertion would also break — this test localizes the + failure to the selector helper itself. + """ + plan = _StubPlan(selected_template_id="F13") + legacy_input = "F13" # matches_mapper=True by construction + + monkeypatch.setenv(FLAG, "1") + assert _b4_mapper_source_enabled() is True + on_value = _select_mapper_template_id(plan, legacy_input) + + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + off_value = _select_mapper_template_id(plan, legacy_input) + + assert on_value == off_value == "F13" + + +# ─── end-to-end byte equivalence (parametrized over real mdx data) ──── + + +@pytest.mark.integration +@pytest.mark.parametrize( + ("mdx_file", "section_id", "section", "template_id"), + _MATCHES_CASES, + ids=lambda case: ( + case if isinstance(case, str) else getattr(case, "section_id", "_") + ), +) +def test_slot_payload_byte_equivalent_when_matches_mapper( + monkeypatch: pytest.MonkeyPatch, + mdx_file: str, + section_id: str, + section, + template_id: str, +) -> None: + """Per-section byte equivalence proof under matches_mapper=True. + + Recomputes ``PlacementPlan`` from scratch inside the test (fixture + enumeration cached only the section + B4 pick) and asserts that the + mapper output is JSON-canonical-byte-identical between flag ON and + flag OFF, given the same mapper input. + """ + frame_contracts = list(load_frame_contracts().values()) + content_objects = extract_content_objects(section, source_shape=None) + plan = plan_placement( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section.section_id, + ) + assert plan.selected_template_id == template_id, ( + f"u5 invariant: B4 selection drifted between enumeration and " + f"test execution for {mdx_file} {section_id}: enumerated=" + f"{template_id!r} live={plan.selected_template_id!r}" + ) + + # Under matches_mapper=True the legacy mapper input equals plan pick. + legacy_mapper_input = template_id + + monkeypatch.delenv(FLAG, raising=False) + plan_snapshot_off = asdict(plan) # type: ignore[call-overload] + payload_off, resolved_off = _slot_payload_via_selector( + section, plan, legacy_mapper_input + ) + plan_after_off = asdict(plan) # type: ignore[call-overload] + + monkeypatch.setenv(FLAG, "1") + payload_on, resolved_on = _slot_payload_via_selector( + section, plan, legacy_mapper_input + ) + plan_after_on = asdict(plan) # type: ignore[call-overload] + + assert resolved_off == resolved_on == template_id, ( + f"selector returned different template_id under matches_mapper for " + f"{mdx_file} {section_id}: off={resolved_off!r} on={resolved_on!r}" + ) + assert _canonical_bytes(payload_off) == _canonical_bytes(payload_on), ( + f"slot_payload byte equivalence broken for {mdx_file} {section_id} " + f"(template_id={template_id}): mapper output diverged between " + f"flag OFF and flag ON despite identical mapper input. This means " + f"either map_with_contract gained nondeterminism or a hidden " + f"selector-side effect crept in." + ) + assert plan_snapshot_off == plan_after_off == plan_after_on, ( + f"PlacementPlan mutated by selector / mapper call for {mdx_file} " + f"{section_id} — u5 byte equivalence relies on the selector being " + f"a pure read of plan.selected_template_id." + ) + + +@pytest.mark.integration +def test_matches_mapper_corpus_coverage_is_non_empty() -> None: + """Lock: the parametrized equivalence test above must have iterated at + least once. + + Without this guard a pytest parametrize zero-iteration (e.g. all + sections rejected by B4 or all FitError-raising) would let the byte + equivalence test silently pass with zero work. mdx 01-05 is rich + enough that at least one matches_mapper case is always reachable. + """ + assert _MATCHES_CASES, ( + "u5 byte equivalence had zero matches_mapper cases — every section " + "across mdx 01-05 was either B4-uncovered or raised FitError. " + "Either the corpus shrank, B4 algorithm regressed, or the mapper " + "now rejects every B4 pick. Investigate before re-locking." + ) + seen_files = {case[0] for case in _MATCHES_CASES} + assert len(seen_files) >= 1, ( + f"u5 coverage too narrow: {seen_files} — at least one mdx file " + f"must yield a matches_mapper case for the equivalence proof to " + f"be load-bearing." + ) + + +# ─── negative case — bytes MUST diverge when B4 mismatches ───────────── + + +@pytest.mark.integration +def test_slot_payload_diverges_when_b4_mismatches_under_flag_on( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Anti-vacuous proof: when B4 picks a template DIFFERENT from the + legacy mapper input AND flag ON, the resulting ``slot_payload`` + differs from the flag-OFF case. + + Without this assertion the equivalence test would pass even if the + selector were a no-op that always returned the legacy input — i.e. + the equivalence test would be load-bearing in the wrong direction. + This test proves the mapper output genuinely depends on the selector's + template_id choice, so equivalence under matches_mapper is a real + behavioral guarantee rather than a tautology. + + Strategy: find a section where the mapper accepts *both* the B4 pick + AND a distinct alternative template (a frame the mapper also covers + with a different builder/source_shape). Compare slot_payload bytes + across the two — they MUST differ. + """ + frame_contracts = list(load_frame_contracts().values()) + diverging_case: tuple | None = None + + for mdx_file in _MDX_BATCH: + mdx_path = _SAMPLES_DIR / mdx_file + _title, sections, _footer = parse_mdx(mdx_path) + for section in sections: + content_objects = extract_content_objects( + section, source_shape=None + ) + plan = plan_placement( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section.section_id, + ) + b4_pick = plan.selected_template_id + if b4_pick is None: + continue + b4_contract = get_contract(b4_pick) + if b4_contract is None: + continue + try: + b4_payload = map_with_contract(section, b4_contract) + except FitError: + continue + # Hunt for a *different* template the mapper also accepts on + # this same section. Iterate the catalog in declaration order + # so the search is deterministic. + for alt in frame_contracts: + alt_id = alt.get("template_id") + if not alt_id or alt_id == b4_pick: + continue + try: + alt_payload = map_with_contract(section, alt) + except FitError: + continue + if _canonical_bytes(b4_payload) != _canonical_bytes( + alt_payload + ): + diverging_case = ( + mdx_file, + section.section_id, + b4_pick, + alt_id, + b4_payload, + alt_payload, + ) + break + if diverging_case is not None: + break + if diverging_case is not None: + break + + assert diverging_case is not None, ( + "Could not find a section across mdx 01-05 where the mapper " + "accepts two distinct templates with divergent slot_payload. " + "Without such a case the equivalence test above is tautological." + ) + + ( + mdx_file, + section_id, + b4_pick, + alt_id, + b4_payload, + alt_payload, + ) = diverging_case + + # Now drive the selector path under flag ON with B4 picking ``b4_pick`` + # while the legacy mapper input is ``alt_id`` — i.e. B4 mismatches the + # legacy input. Flag ON → selector returns b4_pick → mapper produces + # b4_payload. Flag OFF → selector returns alt_id → mapper produces + # alt_payload. The two MUST differ. + plan = _StubPlan(selected_template_id=b4_pick) + + mdx_path = _SAMPLES_DIR / mdx_file + _title, sections, _footer = parse_mdx(mdx_path) + section = next(s for s in sections if s.section_id == section_id) + + monkeypatch.setenv(FLAG, "1") + on_payload, on_resolved = _slot_payload_via_selector( + section, plan, alt_id + ) + monkeypatch.delenv(FLAG, raising=False) + off_payload, off_resolved = _slot_payload_via_selector( + section, plan, alt_id + ) + + assert on_resolved == b4_pick + assert off_resolved == alt_id + assert _canonical_bytes(on_payload) != _canonical_bytes(off_payload), ( + f"Negative case failed: selector flip from {alt_id} (OFF) to " + f"{b4_pick} (ON) produced byte-identical slot_payload for " + f"{mdx_file} {section_id}. The mapper appears to ignore " + f"template_id, which would make the equivalence test tautological." + ) + + +# ─── selector default-state lock (mirror of u4 sanity check) ─────────── + + +def test_selector_default_state_returns_legacy_under_b4_mismatch( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Final sanity: even when B4 would pick something different, the + flag-OFF default selector returns the legacy mapper input verbatim. + + This is the property that makes u4 SHA parity hold and the negative + test above meaningful. Repeated here at the u5 axis so a single test + file change cannot accidentally hide the regression signal across + both u4 and u5. + """ + plan = _StubPlan(selected_template_id="F29") + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + assert _select_mapper_template_id(plan, "F13") == "F13" diff --git a/tests/phase_z2/test_b4_mapper_source_flag.py b/tests/phase_z2/test_b4_mapper_source_flag.py new file mode 100644 index 0000000..4ded95d --- /dev/null +++ b/tests/phase_z2/test_b4_mapper_source_flag.py @@ -0,0 +1,54 @@ +"""IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE flag reader unit tests. + +Stage 2 plan (u1): adds an env flag reader helper (default OFF) distinct +from PHASE_Z_B4_GATEKEEPER. u1 only locks reader semantics — u2 wires it +into the slot_payload source-of-truth switch and u3 layers BLOCKED exits +for B4 no-cover and B4-selected FitError under flag ON. + +Truthy contract (mirrors PHASE_Z_B4_GATEKEEPER / +PHASE_Z_B4_SOURCE_SHAPE_ENABLED at src/phase_z2_pipeline.py:4625,4662): + case-insensitive + leading/trailing whitespace stripped; truthy set + = {'1', 'true', 'yes'}. Everything else (including '0', '', 'no', + 'false', missing env var) is OFF. +""" +from __future__ import annotations + +import pytest + +from src.phase_z2_pipeline import _b4_mapper_source_enabled + +FLAG = "PHASE_Z_B4_MAPPER_SOURCE" + + +def test_default_off_when_env_unset(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + + +@pytest.mark.parametrize("value", ["1", "true", "yes", "TRUE", "Yes", " true ", " 1\t"]) +def test_truthy_values_enable_flag( + monkeypatch: pytest.MonkeyPatch, value: str +) -> None: + monkeypatch.setenv(FLAG, value) + assert _b4_mapper_source_enabled() is True + + +@pytest.mark.parametrize("value", ["", "0", "no", "false", "off", "2", "on", "y"]) +def test_non_truthy_values_keep_flag_off( + monkeypatch: pytest.MonkeyPatch, value: str +) -> None: + monkeypatch.setenv(FLAG, value) + assert _b4_mapper_source_enabled() is False + + +def test_flag_distinct_from_gatekeeper(monkeypatch: pytest.MonkeyPatch) -> None: + """PHASE_Z_B4_GATEKEEPER ON must not flip the mapper-source flag. + + Locks Stage 2 design decision (Stage 1 Q1 resolution): the new flag + governs slot_payload source-of-truth; PHASE_Z_B4_GATEKEEPER retains + its mismatch render-skip semantics. They must be independently + toggleable. + """ + monkeypatch.setenv("PHASE_Z_B4_GATEKEEPER", "1") + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False diff --git a/tests/phase_z2/test_b4_mapper_source_switch.py b/tests/phase_z2/test_b4_mapper_source_switch.py new file mode 100644 index 0000000..4e76bd3 --- /dev/null +++ b/tests/phase_z2/test_b4_mapper_source_switch.py @@ -0,0 +1,96 @@ +"""IMP-89 89-a u2 — slot_payload source-of-truth switch unit tests. + +Stage 2 plan (u2): wires the u1 PHASE_Z_B4_MAPPER_SOURCE flag into the +single slot_payload construction site at src/phase_z2_pipeline.py:4702 +via the _select_mapper_template_id() selector helper. + +Locked semantics (Stage 1 Q1 / Stage 2 u2): + flag ON → mapper input = placement_plan.selected_template_id (B4) + flag OFF → mapper input = unit.frame_template_id (legacy mapper-only) + +u3 will add BLOCKED exits for (selected_template_id is None OR FitError +on B4-selected) under flag ON — NO silent fallback. u4 guards default-OFF +final.html SHA parity for mdx 01-05. +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +import pytest + +from src.phase_z2_pipeline import _select_mapper_template_id + +FLAG = "PHASE_Z_B4_MAPPER_SOURCE" + + +@dataclass +class _StubPlan: + """Minimal PlacementPlan stand-in — only selected_template_id is read.""" + + selected_template_id: Optional[str] + + +def test_flag_off_returns_unit_frame_template_id( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Default-OFF preserves legacy mapper input (V4 rank-1).""" + monkeypatch.delenv(FLAG, raising=False) + plan = _StubPlan(selected_template_id="B4_PICK") + assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK" + + +def test_flag_on_returns_placement_plan_selected_template_id( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Flag ON routes mapper input to B4 PlacementPlan.""" + monkeypatch.setenv(FLAG, "1") + plan = _StubPlan(selected_template_id="B4_PICK") + assert _select_mapper_template_id(plan, "V4_PICK") == "B4_PICK" + + +def test_flag_on_with_matching_b4_returns_same_value( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """When B4-selected == mapper, switch is behavior-preserving.""" + monkeypatch.setenv(FLAG, "true") + plan = _StubPlan(selected_template_id="F13") + assert _select_mapper_template_id(plan, "F13") == "F13" + + +def test_flag_on_with_no_b4_cover_returns_none( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Flag ON + B4 no-cover surfaces None — u3 will BLOCK on this signal.""" + monkeypatch.setenv(FLAG, "yes") + plan = _StubPlan(selected_template_id=None) + assert _select_mapper_template_id(plan, "V4_PICK") is None + + +def test_flag_off_with_no_b4_cover_still_returns_legacy( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """Default-OFF ignores B4 None — legacy mapper input always honored.""" + monkeypatch.delenv(FLAG, raising=False) + plan = _StubPlan(selected_template_id=None) + assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK" + + +@pytest.mark.parametrize("non_truthy", ["", "0", "no", "false", "off", "2"]) +def test_non_truthy_env_values_keep_legacy_source( + monkeypatch: pytest.MonkeyPatch, non_truthy: str +) -> None: + """Non-truthy env values mirror u1 flag-reader contract — legacy source.""" + monkeypatch.setenv(FLAG, non_truthy) + plan = _StubPlan(selected_template_id="B4_PICK") + assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK" + + +def test_gatekeeper_flag_does_not_flip_mapper_source( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """PHASE_Z_B4_GATEKEEPER ON alone must NOT route mapper to B4 (Stage 1 Q1).""" + monkeypatch.setenv("PHASE_Z_B4_GATEKEEPER", "1") + monkeypatch.delenv(FLAG, raising=False) + plan = _StubPlan(selected_template_id="B4_PICK") + assert _select_mapper_template_id(plan, "V4_PICK") == "V4_PICK" diff --git a/tests/regression/__init__.py b/tests/regression/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/regression/fixtures/89a_pre_baseline_sha.json b/tests/regression/fixtures/89a_pre_baseline_sha.json new file mode 100644 index 0000000..b7473f9 --- /dev/null +++ b/tests/regression/fixtures/89a_pre_baseline_sha.json @@ -0,0 +1,56 @@ +{ + "schema_version": 2, + "axis": "IMP-89 89-a u4 — final.html SHA baseline captured via FULL run_phase_z2_mvp1 pipeline (flag OFF / default)", + "description": "Frozen SHA-256 of `final.html` bytes (the artifact written to disk at src/phase_z2_pipeline.py:5994-5996) captured by running the full Phase Z pipeline end-to-end for each mdx 01-05 under PHASE_Z_B4_MAPPER_SOURCE=OFF. Under flag OFF the 89-a selector `_select_mapper_template_id(plan, T)` returns `T` verbatim, so the mapper input is byte-identical to the pre-89-a legacy call shape `map_mdx_to_slots(section, unit.frame_template_id)` — the rendered HTML and therefore the final.html SHA match the pre-89-a baseline. The u4 regression test runs the same pipeline shape under flag OFF and asserts SHA equality. Regenerate only when an upstream mapper/render/template delta is deliberately reviewed and accepted.", + "captured_at_utc": "2026-05-23T15:03:40Z", + "renderer": { + "entrypoint": "src.phase_z2_pipeline.run_phase_z2_mvp1", + "write_site": "src/phase_z2_pipeline.py:5994-5996", + "artifact_relpath": "//phase_z2/final.html" + }, + "mdx_batch": [ + "01.mdx", + "02.mdx", + "03.mdx", + "04.mdx", + "05.mdx" + ], + "mdx_files": { + "01.mdx": { + "mdx_file": "01.mdx", + "run_id": "89a_baseline_01", + "final_html_size_bytes": 29089, + "sha256": "ad6f16a3068b5a55bd900122688f691ffef0716b91de52151551242773595487", + "pipeline_exit_code": null + }, + "02.mdx": { + "mdx_file": "02.mdx", + "run_id": "89a_baseline_02", + "final_html_size_bytes": 25249, + "sha256": "4832e3e45660eb95201425e975a4c285fb78493cf8d09d6395897f7897b1794b", + "pipeline_exit_code": null + }, + "03.mdx": { + "mdx_file": "03.mdx", + "run_id": "89a_baseline_03", + "final_html_size_bytes": 39804, + "sha256": "7dae47b3d51aa15a6752e4543a746abb4c4da71e7e95895eee8ef08c2eabc948", + "pipeline_exit_code": null + }, + "04.mdx": { + "mdx_file": "04.mdx", + "run_id": "89a_baseline_04", + "final_html_size_bytes": 27707, + "sha256": "2bce45041cdcca6518cd92586c1be9e051a5c98f5a0ad61fdde02604618a1d80", + "pipeline_exit_code": null + }, + "05.mdx": { + "mdx_file": "05.mdx", + "run_id": "89a_baseline_05", + "final_html_size_bytes": 21187, + "sha256": "3ded2fff8cc45301675d2a8917af945b4003631688cad6f088976fd57ab9b12c", + "pipeline_exit_code": 1 + } + }, + "total_files": 5 +} diff --git a/tests/regression/scripts/capture_89a_pre_baseline.py b/tests/regression/scripts/capture_89a_pre_baseline.py new file mode 100644 index 0000000..dc963b5 --- /dev/null +++ b/tests/regression/scripts/capture_89a_pre_baseline.py @@ -0,0 +1,168 @@ +"""IMP-89 89-a u4 — capture final.html SHA baseline via the FULL Phase Z pipeline. + +Runs ``src.phase_z2_pipeline.run_phase_z2_mvp1`` end-to-end for every mdx file +in ``samples/mdx_batch/`` (01-05) under PHASE_Z_B4_MAPPER_SOURCE=OFF (default). +Each run writes a real ``final.html`` to disk at +``//phase_z2/final.html`` — exactly the production write +site at ``src/phase_z2_pipeline.py:5994-5996``. The bytes of that on-disk +artifact are SHA-256 hashed and stored in +``tests/regression/fixtures/89a_pre_baseline_sha.json``. + +The u4 regression test in ``tests/regression/test_b4_mapper_source_sha_parity.py`` +runs the same pipeline shape under flag OFF, reads the on-disk ``final.html``, +hashes its bytes, and asserts SHA equality with each frozen value. The +mathematical chain that makes this a genuine "pre-89-a baseline" guard: + +* Under flag OFF, ``_select_mapper_template_id(plan, T) == T`` for every + ``(plan, T)`` pair (locked by u2 + u4 algebraic precondition tests). +* Therefore the mapper input is byte-identical to the legacy pre-89-a call + shape ``map_mdx_to_slots(section, unit.frame_template_id)``. +* Therefore the rendered HTML is byte-identical to pre-89-a output. +* Therefore the on-disk ``final.html`` is byte-identical → SHA matches. + +Any future drift — in the selector, mapper, render_slide, slide_base.html, +or any upstream code path — produces a divergent SHA and breaks the test. + +Run from repo root:: + + python tests/regression/scripts/capture_89a_pre_baseline.py + +The capture script is idempotent and meant to be re-run only when an +upstream mapper/render/template delta is reviewed and accepted. It refuses +to run with PHASE_Z_B4_MAPPER_SOURCE enabled (the post-89-a flag-ON state +is NOT the baseline axis). +""" + +from __future__ import annotations + +import hashlib +import json +import os +import sys +import tempfile +from datetime import datetime, timezone +from pathlib import Path + +_REPO_ROOT = Path(__file__).resolve().parents[3] +sys.path.insert(0, str(_REPO_ROOT)) +sys.path.insert(0, str(_REPO_ROOT / "src")) + +import src.phase_z2_pipeline as pz2 # noqa: E402 + +_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch" +_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx") +_OUT_PATH = ( + _REPO_ROOT / "tests" / "regression" / "fixtures" / "89a_pre_baseline_sha.json" +) + + +def _capture_one(mdx_file: str, runs_root: Path) -> dict: + """Run the full pipeline once and hash the on-disk final.html. + + ``pz2.RUNS_DIR`` MUST be pinned to ``runs_root`` by the caller before + invocation; ``run_phase_z2_mvp1`` writes final.html to + ``//phase_z2/final.html``. + + ``SystemExit`` from the pipeline (e.g. IMP-87 EMPTY_SHELL_NO_CONTENT + BLOCKED exit on mdx 05) is caught: the BLOCKED exit fires AFTER the + final.html write at ``src/phase_z2_pipeline.py:5994-5996``, so the + artifact still exists on disk and the SHA is captured. The exit code + is recorded on the entry so the test can assert the same terminal + state under flag OFF. If final.html is missing post-exit, that is a + genuine pipeline failure and the script aborts. + """ + mdx_path = _SAMPLES_DIR / mdx_file + assert mdx_path.exists(), f"sample missing: {mdx_path}" + + run_id = f"89a_baseline_{mdx_path.stem}" + pipeline_exit_code: int | None = None + try: + pz2.run_phase_z2_mvp1(mdx_path, run_id=run_id) + except SystemExit as exc: + pipeline_exit_code = ( + int(exc.code) if isinstance(exc.code, int) else 1 + ) + + final_html_path = runs_root / run_id / "phase_z2" / "final.html" + assert final_html_path.exists(), ( + f"final.html not written by pipeline: {final_html_path} " + f"(pipeline_exit_code={pipeline_exit_code})" + ) + raw_bytes = final_html_path.read_bytes() + assert len(raw_bytes) > 0, f"final.html is empty: {final_html_path}" + + return { + "mdx_file": mdx_file, + "run_id": run_id, + "final_html_size_bytes": len(raw_bytes), + "sha256": hashlib.sha256(raw_bytes).hexdigest(), + "pipeline_exit_code": pipeline_exit_code, + } + + +def capture() -> dict: + assert os.environ.get("PHASE_Z_B4_MAPPER_SOURCE", "") == "", ( + "PHASE_Z_B4_MAPPER_SOURCE must be unset when capturing baseline " + "(default-OFF state is the production-equivalent axis for u4). " + "Refusing to run with the flag enabled." + ) + + _OUT_PATH.parent.mkdir(parents=True, exist_ok=True) + + with tempfile.TemporaryDirectory(prefix="89a_baseline_") as tmp: + runs_root = Path(tmp) + original_runs_dir = pz2.RUNS_DIR + pz2.RUNS_DIR = runs_root + try: + entries = [_capture_one(mf, runs_root) for mf in _MDX_BATCH] + finally: + pz2.RUNS_DIR = original_runs_dir + + return { + "schema_version": 2, + "axis": ( + "IMP-89 89-a u4 — final.html SHA baseline captured via FULL " + "run_phase_z2_mvp1 pipeline (flag OFF / default)" + ), + "description": ( + "Frozen SHA-256 of `final.html` bytes (the artifact written to " + "disk at src/phase_z2_pipeline.py:5994-5996) captured by running " + "the full Phase Z pipeline end-to-end for each mdx 01-05 under " + "PHASE_Z_B4_MAPPER_SOURCE=OFF. Under flag OFF the 89-a selector " + "`_select_mapper_template_id(plan, T)` returns `T` verbatim, so " + "the mapper input is byte-identical to the pre-89-a legacy call " + "shape `map_mdx_to_slots(section, unit.frame_template_id)` — " + "the rendered HTML and therefore the final.html SHA match the " + "pre-89-a baseline. The u4 regression test runs the same " + "pipeline shape under flag OFF and asserts SHA equality. " + "Regenerate only when an upstream mapper/render/template delta " + "is deliberately reviewed and accepted." + ), + "captured_at_utc": ( + datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") + ), + "renderer": { + "entrypoint": "src.phase_z2_pipeline.run_phase_z2_mvp1", + "write_site": "src/phase_z2_pipeline.py:5994-5996", + "artifact_relpath": "//phase_z2/final.html", + }, + "mdx_batch": list(_MDX_BATCH), + "mdx_files": {entry["mdx_file"]: entry for entry in entries}, + "total_files": len(entries), + } + + +def main() -> None: + data = capture() + _OUT_PATH.write_text( + json.dumps(data, indent=2, ensure_ascii=False) + "\n", + encoding="utf-8", + ) + print( + f"wrote {_OUT_PATH} ({data['total_files']} files: " + f"{', '.join(data['mdx_files'].keys())})" + ) + + +if __name__ == "__main__": + main() diff --git a/tests/regression/test_b4_mapper_source_sha_parity.py b/tests/regression/test_b4_mapper_source_sha_parity.py new file mode 100644 index 0000000..cb1ae85 --- /dev/null +++ b/tests/regression/test_b4_mapper_source_sha_parity.py @@ -0,0 +1,347 @@ +"""IMP-89 89-a u4 — final.html SHA parity guard for mdx 01-05 under flag OFF. + +Stage 2 u4 contract (verbatim):: + + mdx 01-05 final.html SHA == pre-89-a baseline under + PHASE_Z_B4_MAPPER_SOURCE=OFF (default) + +How this guard is wired +======================= + +The baseline is captured ONCE by +``tests/regression/scripts/capture_89a_pre_baseline.py``, which runs the +FULL Phase Z pipeline (``src.phase_z2_pipeline.run_phase_z2_mvp1``) for +every mdx file in ``samples/mdx_batch/`` (01-05) under flag OFF, reads +the on-disk ``final.html`` artifact from +``//phase_z2/final.html`` +(the production write site at ``src/phase_z2_pipeline.py:5994-5996``), +and SHA-256 hashes its bytes. The hash is frozen at:: + + tests/regression/fixtures/89a_pre_baseline_sha.json + +This test re-runs the SAME pipeline shape (full ``run_phase_z2_mvp1``, +flag OFF, isolated tmp RUNS_DIR), reads the live on-disk ``final.html``, +hashes its bytes, and asserts SHA equality with the frozen value. + +Why this is a genuine "pre-89-a baseline" guard +================================================ + +Under flag OFF the 89-a switch at ``src/phase_z2_pipeline.py:4771-4773`` +calls ``_select_mapper_template_id(plan, unit.frame_template_id)``, which +returns ``unit.frame_template_id`` verbatim (locked by u2 unit tests + +``test_flag_off_selector_returns_legacy_input_per_section`` below). The +mapper input is therefore byte-identical to the pre-89-a call shape +``map_mdx_to_slots(section, unit.frame_template_id)`` (no selector +indirection), so: + +* mapper output (slot_payload) is byte-identical +* render_slide output is byte-identical +* on-disk ``final.html`` is byte-identical +* SHA-256 matches the frozen baseline + +Any future drift in ``_select_mapper_template_id``, ``map_mdx_to_slots``, +``render_slide``, ``slide_base.html``, family partials, or any upstream +pipeline step produces a divergent SHA and breaks this test. + +Crucially the baseline is NOT recomputed in this process — it is read +from the on-disk JSON fixture. This makes the test a genuine regression +guard rather than a paired identity check. +""" + +from __future__ import annotations + +import hashlib +import json +import os +from pathlib import Path + +import pytest + +import src.phase_z2_pipeline as pz2 +from src.phase_z2_pipeline import ( + _b4_mapper_source_enabled, + _select_mapper_template_id, + extract_content_objects, + load_frame_contracts, + parse_mdx, +) +from src.phase_z2_placement_planner import plan_placement + +FLAG = "PHASE_Z_B4_MAPPER_SOURCE" +_REPO_ROOT = Path(__file__).resolve().parents[2] +_SAMPLES_DIR = _REPO_ROOT / "samples" / "mdx_batch" +_MDX_BATCH = ("01.mdx", "02.mdx", "03.mdx", "04.mdx", "05.mdx") +_BASELINE_PATH = ( + Path(__file__).parent / "fixtures" / "89a_pre_baseline_sha.json" +) + + +@pytest.fixture(scope="module") +def baseline() -> dict: + """Load the frozen final.html SHA baseline from disk. + + A missing file is a hard failure with a one-shot regen hint — the + baseline cannot be silently regenerated inside the test process + (that would defeat the regression-guard purpose). + """ + assert _BASELINE_PATH.exists(), ( + f"baseline fixture missing: {_BASELINE_PATH}\n" + f"regenerate with: python tests/regression/scripts/" + f"capture_89a_pre_baseline.py" + ) + with _BASELINE_PATH.open("r", encoding="utf-8") as f: + data = json.load(f) + assert data.get("schema_version") == 2, ( + f"unexpected baseline schema_version: {data.get('schema_version')} " + f"(expected 2 — full-pipeline final.html SHA capture). Regenerate " + f"the fixture via capture_89a_pre_baseline.py." + ) + assert data.get("total_files", 0) >= len(_MDX_BATCH), ( + f"baseline has {data.get('total_files')} files — expected " + f">= {len(_MDX_BATCH)} (one per mdx 01-05)" + ) + return data + + +def _live_pipeline_final_html_sha( + mdx_file: str, runs_root: Path +) -> tuple[str, int, int | None]: + """Run the full pipeline once under flag OFF and SHA the on-disk final.html. + + ``pz2.RUNS_DIR`` MUST be pinned to ``runs_root`` by the caller (via + monkeypatch); ``run_phase_z2_mvp1`` writes final.html to + ``//phase_z2/final.html``. + + ``SystemExit`` is caught (mirrors the capture script): the IMP-87 + EMPTY_SHELL_NO_CONTENT BLOCKED exit on mdx 05 (and any analogous + terminal state) fires AFTER the final.html write site at + ``src/phase_z2_pipeline.py:5994-5996``, so the artifact still exists + on disk and the SHA is the load-bearing parity axis. The exit code + is returned so the per-file test can assert the same terminal state + under flag OFF (any drift in exit semantics is a separate u3 axis + but surfaces here too). + + Returns ``(sha256_hex, size_bytes, pipeline_exit_code)``. + """ + mdx_path = _SAMPLES_DIR / mdx_file + assert mdx_path.exists(), f"sample missing: {mdx_path}" + + run_id = f"89a_live_{mdx_path.stem}" + pipeline_exit_code: int | None = None + try: + pz2.run_phase_z2_mvp1(mdx_path, run_id=run_id) + except SystemExit as exc: + pipeline_exit_code = ( + int(exc.code) if isinstance(exc.code, int) else 1 + ) + + final_html_path = runs_root / run_id / "phase_z2" / "final.html" + assert final_html_path.exists(), ( + f"final.html not written by pipeline: {final_html_path} " + f"(pipeline_exit_code={pipeline_exit_code})" + ) + raw_bytes = final_html_path.read_bytes() + return ( + hashlib.sha256(raw_bytes).hexdigest(), + len(raw_bytes), + pipeline_exit_code, + ) + + +@pytest.mark.integration +@pytest.mark.parametrize("mdx_file", _MDX_BATCH) +def test_post_89a_flag_off_final_html_sha_matches_frozen_baseline( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + mdx_file: str, + baseline: dict, +) -> None: + """Per-mdx-file final.html SHA parity guard via the FULL pipeline. + + Runs ``run_phase_z2_mvp1`` end-to-end with ``PHASE_Z_B4_MAPPER_SOURCE`` + unset, reads the on-disk ``final.html``, hashes its bytes, and asserts + SHA equality with the frozen baseline. Any drift in the selector, + mapper, render_slide, slide_base.html, family partials, or any + upstream step manifests here as a SHA mismatch. + """ + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + + runs_root = tmp_path / "runs" + runs_root.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr(pz2, "RUNS_DIR", runs_root) + + expected = baseline["mdx_files"][mdx_file] + expected_sha = expected["sha256"] + expected_size = expected["final_html_size_bytes"] + expected_exit = expected.get("pipeline_exit_code") + + live_sha, live_size, live_exit = _live_pipeline_final_html_sha( + mdx_file, runs_root + ) + + assert live_sha == expected_sha, ( + f"final.html SHA parity broken for {mdx_file}:\n" + f" frozen pre-89-a baseline : {expected_sha} ({expected_size} bytes)\n" + f" live post-89-a flag-OFF : {live_sha} ({live_size} bytes)\n" + f" -> upstream drift in selector / mapper / render_slide / " + f"slide_base.html / family partials / pipeline step. Re-capture " + f"the baseline ONLY if the delta is deliberate and reviewed." + ) + assert live_exit == expected_exit, ( + f"pipeline exit-code parity broken for {mdx_file}: " + f"baseline={expected_exit} live={live_exit} — SHA matches but " + f"the pipeline terminal state diverged (IMP-87 BLOCKED axis)." + ) + + +@pytest.mark.integration +def test_post_89a_flag_off_final_html_sha_holistic_sweep( + monkeypatch: pytest.MonkeyPatch, + tmp_path: Path, + baseline: dict, +) -> None: + """Single-pass holistic sweep across mdx 01-05. + + Aggregates the per-file SHA parity into one sweep so an accidental + parametrize zero-iteration cannot silently pass. Asserts (a) env gate + stays False throughout, (b) every baseline file's live SHA matches the + frozen value, (c) sweep coverage equals the full _MDX_BATCH. + """ + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + + runs_root = tmp_path / "runs" + runs_root.mkdir(parents=True, exist_ok=True) + monkeypatch.setattr(pz2, "RUNS_DIR", runs_root) + + matched = 0 + for mdx_file in _MDX_BATCH: + expected = baseline["mdx_files"][mdx_file] + live_sha, live_size, _live_exit = _live_pipeline_final_html_sha( + mdx_file, runs_root + ) + assert live_sha == expected["sha256"], ( + f"sweep mismatch on {mdx_file}: baseline {expected['sha256']} " + f"≠ live {live_sha} (size baseline={expected['final_html_size_bytes']} " + f"live={live_size})" + ) + matched += 1 + + assert matched == len(_MDX_BATCH), ( + f"sweep covered only {matched}/{len(_MDX_BATCH)} mdx files — " + f"coverage too shallow to guard final.html SHA parity." + ) + + +@pytest.mark.parametrize("mdx_file", _MDX_BATCH) +def test_flag_off_selector_returns_legacy_input_per_section( + monkeypatch: pytest.MonkeyPatch, mdx_file: str +) -> None: + """Algebraic precondition: ``_select_mapper_template_id(plan, T) == T`` + under flag OFF for every section parsed from each mdx 01-05 file. + + This is the property that makes the full-pipeline SHA parity hold + mathematically. The on-disk SHA tests above are the load-bearing + proof; this localizes the failure axis when SHA parity breaks. + Pure Python, no pipeline execution — fast unit-level check. + """ + monkeypatch.delenv(FLAG, raising=False) + assert _b4_mapper_source_enabled() is False + + mdx_path = _SAMPLES_DIR / mdx_file + _title, sections, _footer = parse_mdx(mdx_path) + frame_contracts = list(load_frame_contracts().values()) + + legacy_pick = "LEGACY_V4_RANK_1_SENTINEL" + for section in sections: + content_objects = extract_content_objects(section, source_shape=None) + plan = plan_placement( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section.section_id, + ) + resolved = _select_mapper_template_id(plan, legacy_pick) + assert resolved == legacy_pick, ( + f"flag-OFF selector contract broken in {mdx_file} section " + f"{section.section_id}: B4 picked " + f"'{plan.selected_template_id}' but selector returned " + f"'{resolved}' (expected '{legacy_pick}')" + ) + + +@pytest.mark.parametrize("mdx_file", _MDX_BATCH) +def test_flag_off_holds_when_b4_mismatches_legacy( + monkeypatch: pytest.MonkeyPatch, mdx_file: str +) -> None: + """Highest-risk divergence scenario: B4 picks a template DIFFERENT + from the legacy V4 rank-1 input. Under flag OFF the selector MUST + still return the legacy input — never the B4 pick. Catches the axis + that would otherwise be invisible in the matches_mapper case. + """ + monkeypatch.delenv(FLAG, raising=False) + + mdx_path = _SAMPLES_DIR / mdx_file + _title, sections, _footer = parse_mdx(mdx_path) + frame_contracts = list(load_frame_contracts().values()) + + legacy_distinct = "__89A_U4_SENTINEL_LEGACY_DISTINCT_FROM_B4__" + for section in sections: + content_objects = extract_content_objects(section, source_shape=None) + plan = plan_placement( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section.section_id, + ) + assert plan.selected_template_id != legacy_distinct + resolved = _select_mapper_template_id(plan, legacy_distinct) + assert resolved == legacy_distinct, ( + f"flag-OFF selector must return legacy input even when B4 " + f"would pick differently ({mdx_file} {section.section_id}: " + f"b4_pick={plan.selected_template_id} resolved={resolved})" + ) + + +def test_flag_off_default_state_locks_gate_to_false() -> None: + """Pin the default-OFF contract at the gate level. + + ``_b4_mapper_source_enabled()`` returning False under default env is + the precondition for SHA parity: the u3 BLOCKED gates and the u2 + switch are both gated on this returning True, so a False default + guarantees the legacy code path remains intact. + """ + assert os.environ.get(FLAG, "") == "" + assert _b4_mapper_source_enabled() is False + + +def test_baseline_fixture_is_load_bearing(baseline: dict) -> None: + """Sanity-check the frozen baseline file shape so a corrupted / + half-written fixture does not silently pass the SHA tests with zero + iterations. + """ + assert baseline["axis"].startswith("IMP-89 89-a u4") + assert set(baseline["mdx_files"].keys()) == set(_MDX_BATCH), ( + f"baseline mdx coverage drift: {baseline['mdx_files'].keys()} " + f"vs expected {_MDX_BATCH}" + ) + for mdx_file, entry in baseline["mdx_files"].items(): + assert "sha256" in entry, f"baseline {mdx_file} missing sha256 key" + assert "final_html_size_bytes" in entry, ( + f"baseline {mdx_file} missing final_html_size_bytes key" + ) + sha = entry["sha256"] + assert isinstance(sha, str) and len(sha) == 64, ( + f"baseline {mdx_file} sha256 is not a 64-char hex string: {sha!r}" + ) + size = entry["final_html_size_bytes"] + assert isinstance(size, int) and size > 0, ( + f"baseline {mdx_file} final_html_size_bytes is not positive int: " + f"{size!r}" + ) + assert baseline["renderer"]["entrypoint"] == ( + "src.phase_z2_pipeline.run_phase_z2_mvp1" + ), ( + "baseline renderer.entrypoint must be the full pipeline entry — " + f"got {baseline['renderer'].get('entrypoint')!r}. The previous " + "synthetic render_slide shape produced a fragment, not final.html." + )