Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
494 lines
16 KiB
Python
494 lines
16 KiB
Python
"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``.
|
|
|
|
Scope mirror of the production module (Stage 2 u2):
|
|
|
|
* ``build_snapshot`` shape, provenance, JSON round-trip, required keys.
|
|
* ``serialize_section`` / ``serialize_unit`` field preservation, including
|
|
the duck-typed ``v4_candidates`` shape (template_id / frame_id /
|
|
frame_number / confidence / label).
|
|
* ``validate_snapshot`` fail-closed paths: non-dict input, schema
|
|
version mismatch, missing/empty/non-string ``mdx_sha256``, sha
|
|
mismatch, missing required keys, unwrapped wrapper, wrapper missing
|
|
a provenance field.
|
|
* Module-level constants exposed for u3 / u4 / u4b consumers.
|
|
|
|
The tests use synthetic duck-typed dataclasses so the snapshot module's
|
|
external surface is exercised without coupling to the production
|
|
``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts.
|
|
That mirrors the production module's intentional duck-typing (no
|
|
imports from ``phase_z2_pipeline`` / ``phase_z2_composition``).
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, field
|
|
from typing import Any, Optional
|
|
|
|
import pytest
|
|
|
|
from src.phase_z2_reuse_snapshot import (
|
|
REQUIRED_TOP_LEVEL_KEYS,
|
|
SNAPSHOT_FILENAME,
|
|
SNAPSHOT_VERSION,
|
|
SnapshotValidationError,
|
|
build_snapshot,
|
|
serialize_section,
|
|
serialize_unit,
|
|
validate_snapshot,
|
|
)
|
|
|
|
|
|
# -- synthetic duck-typed inputs ------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class _Section:
|
|
section_id: str
|
|
section_num: int
|
|
title: str
|
|
raw_content: str
|
|
heading_number: Optional[str] = None
|
|
v4_alias_keys: list = field(default_factory=list)
|
|
sub_sections: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class _V4Candidate:
|
|
template_id: str
|
|
frame_id: str
|
|
frame_number: int
|
|
confidence: float
|
|
label: str
|
|
v4_rank: Optional[int] = None
|
|
|
|
|
|
@dataclass
|
|
class _Unit:
|
|
source_section_ids: list
|
|
merge_type: str
|
|
frame_template_id: str
|
|
frame_id: str
|
|
frame_number: int
|
|
confidence: float
|
|
label: str
|
|
phase_z_status: str
|
|
raw_content: str
|
|
title: str
|
|
score: float
|
|
v4_rank: Optional[int] = 1
|
|
selection_path: str = "rank_1"
|
|
fallback_reason: Optional[str] = None
|
|
rationale: dict = field(default_factory=dict)
|
|
auto_selectable: bool = True
|
|
filter_reasons: list = field(default_factory=list)
|
|
notes: list = field(default_factory=list)
|
|
v4_candidates: list = field(default_factory=list)
|
|
provisional: bool = False
|
|
|
|
|
|
def _make_section(**overrides: Any) -> _Section:
|
|
base = dict(
|
|
section_id="03-1",
|
|
section_num=1,
|
|
title="DX status",
|
|
raw_content="- bullet one\n- bullet two",
|
|
)
|
|
base.update(overrides)
|
|
return _Section(**base)
|
|
|
|
|
|
def _make_unit(**overrides: Any) -> _Unit:
|
|
cand = _V4Candidate(
|
|
template_id="tpl_a",
|
|
frame_id="fid_a",
|
|
frame_number=13,
|
|
confidence=0.91,
|
|
label="use_as_is",
|
|
)
|
|
base: dict[str, Any] = dict(
|
|
source_section_ids=["03-1"],
|
|
merge_type="single",
|
|
frame_template_id="tpl_a",
|
|
frame_id="fid_a",
|
|
frame_number=13,
|
|
confidence=0.91,
|
|
label="use_as_is",
|
|
phase_z_status="auto_renderable",
|
|
raw_content="- bullet one\n- bullet two",
|
|
title="DX status",
|
|
score=0.91,
|
|
v4_candidates=[cand],
|
|
)
|
|
base.update(overrides)
|
|
return _Unit(**base)
|
|
|
|
|
|
def _make_build_kwargs(**overrides: Any) -> dict[str, Any]:
|
|
kwargs: dict[str, Any] = dict(
|
|
mdx_sha256="a" * 64,
|
|
slide_title="Title",
|
|
slide_footer="Footer",
|
|
sections=[_make_section()],
|
|
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
|
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
|
v4_evidence=[{"section_id": "03-1", "v4_candidates": []}],
|
|
layout_preset_pre_override="horizontal-2",
|
|
units=[_make_unit()],
|
|
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
|
|
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
|
|
ai_preflight={"enabled": False, "skipped": True},
|
|
)
|
|
kwargs.update(overrides)
|
|
return kwargs
|
|
|
|
|
|
# -- module constants -----------------------------------------------------
|
|
|
|
|
|
def test_snapshot_filename_constant():
|
|
assert SNAPSHOT_FILENAME == "_reuse_snapshot.json"
|
|
|
|
|
|
def test_snapshot_version_is_positive_int():
|
|
assert isinstance(SNAPSHOT_VERSION, int)
|
|
assert SNAPSHOT_VERSION >= 1
|
|
|
|
|
|
def test_required_keys_include_contract_and_payload():
|
|
# Bare contract / integrity keys.
|
|
assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS
|
|
assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS
|
|
# Payload axes per Stage 2 plan.
|
|
for k in (
|
|
"slide_title",
|
|
"slide_footer",
|
|
"sections",
|
|
"stage0_adapter_diagnostics",
|
|
"stage0_normalized_assets",
|
|
"v4_evidence",
|
|
"layout_preset_pre_override",
|
|
"units",
|
|
"comp_debug",
|
|
"v4_fallback_traces",
|
|
"ai_preflight",
|
|
):
|
|
assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}"
|
|
|
|
|
|
# -- build_snapshot -------------------------------------------------------
|
|
|
|
|
|
def test_build_snapshot_round_trips_through_json():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
payload = json.dumps(snap)
|
|
loaded = json.loads(payload)
|
|
assert loaded["schema_version"] == SNAPSHOT_VERSION
|
|
assert loaded["mdx_sha256"] == "a" * 64
|
|
|
|
|
|
def test_build_snapshot_has_all_required_keys():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
for key in REQUIRED_TOP_LEVEL_KEYS:
|
|
assert key in snap, f"build_snapshot missing required key: {key}"
|
|
|
|
|
|
def test_build_snapshot_bare_keys_are_unwrapped_scalars():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
assert snap["schema_version"] == SNAPSHOT_VERSION
|
|
assert snap["mdx_sha256"] == "a" * 64
|
|
# bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly.
|
|
assert not isinstance(snap["schema_version"], dict)
|
|
assert not isinstance(snap["mdx_sha256"], dict)
|
|
|
|
|
|
def test_build_snapshot_provenance_wrapper_shape():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
bare = {"schema_version", "mdx_sha256"}
|
|
for key, entry in snap.items():
|
|
if key in bare:
|
|
continue
|
|
assert isinstance(entry, dict), f"{key} is not wrapped"
|
|
assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key
|
|
assert isinstance(entry["source_path"], str) and entry["source_path"]
|
|
assert isinstance(entry["upstream_step"], str)
|
|
assert entry["upstream_step"].startswith("step"), entry["upstream_step"]
|
|
|
|
|
|
def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary():
|
|
"""No ``upstream_step`` may point outside the Step 0/2/5/6 reuse
|
|
boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would
|
|
silently invite work outside the reuse window — fail loudly.
|
|
|
|
Step 01's contribution is the ``mdx_sha256`` integrity key (a bare
|
|
contract scalar with no wrapper) so step01 does not need to appear
|
|
in payload provenance.
|
|
"""
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
allowed = {"step00", "step02", "step05", "step06"}
|
|
for key, entry in snap.items():
|
|
if key in {"schema_version", "mdx_sha256"}:
|
|
continue
|
|
assert entry["upstream_step"] in allowed, (
|
|
f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary"
|
|
)
|
|
|
|
|
|
def test_build_snapshot_units_carry_v4_candidates():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
units = snap["units"]["value"]
|
|
assert len(units) == 1
|
|
assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a"
|
|
assert units[0]["v4_candidates"][0]["frame_number"] == 13
|
|
assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91)
|
|
|
|
|
|
def test_build_snapshot_sections_preserve_alias_keys_and_subsections():
|
|
sec = _make_section(
|
|
section_id="04-2",
|
|
v4_alias_keys=["04-2.1"],
|
|
sub_sections=[{"id": "04-2-sub-1"}],
|
|
heading_number="2.1",
|
|
)
|
|
snap = build_snapshot(**_make_build_kwargs(sections=[sec]))
|
|
payload = snap["sections"]["value"]
|
|
assert payload[0]["section_id"] == "04-2"
|
|
assert payload[0]["v4_alias_keys"] == ["04-2.1"]
|
|
assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}]
|
|
assert payload[0]["heading_number"] == "2.1"
|
|
|
|
|
|
def test_build_snapshot_units_provenance_points_at_step06():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
assert "step06_composition_plan.json" in snap["units"]["source_path"]
|
|
assert snap["units"]["upstream_step"] == "step06"
|
|
|
|
|
|
def test_build_snapshot_v4_evidence_provenance_points_at_step05():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"]
|
|
assert snap["v4_evidence"]["upstream_step"] == "step05"
|
|
|
|
|
|
def test_build_snapshot_ai_preflight_provenance_points_at_step00():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"]
|
|
assert snap["ai_preflight"]["upstream_step"] == "step00"
|
|
|
|
|
|
def test_build_snapshot_rejects_unjsonable_input():
|
|
bad_unit = _make_unit()
|
|
bad_unit.notes.append(object()) # not JSON-safe
|
|
with pytest.raises(TypeError):
|
|
build_snapshot(**_make_build_kwargs(units=[bad_unit]))
|
|
|
|
|
|
def test_build_snapshot_handles_none_optional_fields():
|
|
snap = build_snapshot(
|
|
**_make_build_kwargs(
|
|
slide_title=None,
|
|
slide_footer=None,
|
|
stage0_adapter_diagnostics=None,
|
|
stage0_normalized_assets=None,
|
|
comp_debug=None,
|
|
v4_fallback_traces=None,
|
|
ai_preflight=None,
|
|
)
|
|
)
|
|
# None inputs land as None / {} consistently — never raise.
|
|
assert snap["slide_title"]["value"] is None
|
|
assert snap["slide_footer"]["value"] is None
|
|
assert snap["stage0_adapter_diagnostics"]["value"] == {}
|
|
assert snap["stage0_normalized_assets"]["value"] == {}
|
|
assert snap["comp_debug"]["value"] == {}
|
|
assert snap["v4_fallback_traces"]["value"] == {}
|
|
assert snap["ai_preflight"]["value"] == {}
|
|
|
|
|
|
# -- serializer helpers ---------------------------------------------------
|
|
|
|
|
|
def test_serialize_section_preserves_all_documented_fields():
|
|
sec = _make_section(
|
|
heading_number="1.1",
|
|
v4_alias_keys=["03-1.x"],
|
|
sub_sections=[{"id": "s"}],
|
|
)
|
|
out = serialize_section(sec)
|
|
assert out["section_id"] == "03-1"
|
|
assert out["section_num"] == 1
|
|
assert out["title"] == "DX status"
|
|
assert out["raw_content"].startswith("- bullet")
|
|
assert out["heading_number"] == "1.1"
|
|
assert out["v4_alias_keys"] == ["03-1.x"]
|
|
assert out["sub_sections"] == [{"id": "s"}]
|
|
|
|
|
|
def test_serialize_section_works_with_missing_optional_attrs():
|
|
class _Minimal:
|
|
section_id = "x"
|
|
section_num = 0
|
|
title = "t"
|
|
raw_content = "r"
|
|
out = serialize_section(_Minimal())
|
|
assert out["heading_number"] is None
|
|
assert out["v4_alias_keys"] == []
|
|
assert out["sub_sections"] == []
|
|
|
|
|
|
def test_serialize_unit_v4_candidates_unwrap_to_named_attrs():
|
|
unit = _make_unit()
|
|
out = serialize_unit(unit)
|
|
cand = out["v4_candidates"][0]
|
|
assert cand == {
|
|
"template_id": "tpl_a",
|
|
"frame_id": "fid_a",
|
|
"frame_number": 13,
|
|
"confidence": pytest.approx(0.91),
|
|
"label": "use_as_is",
|
|
# u4 follow-up — Step 9 application-plan payload reads
|
|
# ``c.v4_rank`` off each rehydrated candidate. Snapshot
|
|
# serializer persists it via ``getattr(c, 'v4_rank', None)`` so
|
|
# legacy duck types (no v4_rank attr) get None and modern V4Match
|
|
# instances carry their rank (1/2/3/...).
|
|
"v4_rank": None,
|
|
}
|
|
|
|
|
|
def test_serialize_unit_v4_candidates_persist_v4_rank_when_present():
|
|
"""A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips."""
|
|
ranked_cand = _V4Candidate(
|
|
template_id="tpl_b",
|
|
frame_id="fid_b",
|
|
frame_number=14,
|
|
confidence=0.82,
|
|
label="light_edit",
|
|
v4_rank=2,
|
|
)
|
|
unit = _make_unit(v4_candidates=[ranked_cand])
|
|
out = serialize_unit(unit)
|
|
assert out["v4_candidates"][0]["v4_rank"] == 2
|
|
|
|
|
|
def test_serialize_unit_handles_empty_v4_candidates():
|
|
unit = _make_unit(v4_candidates=[])
|
|
out = serialize_unit(unit)
|
|
assert out["v4_candidates"] == []
|
|
|
|
|
|
def test_serialize_unit_provisional_default_false():
|
|
unit = _make_unit()
|
|
assert serialize_unit(unit)["provisional"] is False
|
|
|
|
|
|
def test_serialize_unit_provisional_true_preserved():
|
|
unit = _make_unit(provisional=True)
|
|
assert serialize_unit(unit)["provisional"] is True
|
|
|
|
|
|
def test_serialize_unit_round_trips_through_json():
|
|
out = serialize_unit(_make_unit())
|
|
reloaded = json.loads(json.dumps(out))
|
|
assert reloaded["source_section_ids"] == ["03-1"]
|
|
assert reloaded["frame_template_id"] == "tpl_a"
|
|
|
|
|
|
# -- validate_snapshot ----------------------------------------------------
|
|
|
|
|
|
def test_validate_snapshot_accepts_well_formed():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
|
|
|
|
def test_validate_snapshot_rejects_non_dict_input():
|
|
with pytest.raises(SnapshotValidationError):
|
|
validate_snapshot("not a dict", expected_mdx_sha256="a" * 64)
|
|
|
|
|
|
def test_validate_snapshot_rejects_version_mismatch():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["schema_version"] = SNAPSHOT_VERSION + 999
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "schema_version" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_missing_sha():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
del snap["mdx_sha256"]
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "mdx_sha256" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_empty_sha():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["mdx_sha256"] = ""
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "mdx_sha256" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_non_string_sha():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["mdx_sha256"] = 12345
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "mdx_sha256" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_sha_mismatch():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="b" * 64)
|
|
assert "mdx_sha256 mismatch" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_missing_required_key():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
del snap["units"]
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "units" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_unwrapped_payload_key():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["units"] = "not a dict"
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "units" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_wrapper_missing_value():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["units"] = {"source_path": "x", "upstream_step": "step06"}
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "value" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_wrapper_missing_source_path():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["units"] = {"value": [], "upstream_step": "step06"}
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "source_path" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_rejects_wrapper_missing_upstream_step():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["units"] = {"value": [], "source_path": "x"}
|
|
with pytest.raises(SnapshotValidationError) as exc:
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|
|
assert "upstream_step" in str(exc.value)
|
|
|
|
|
|
def test_validate_snapshot_error_subclasses_value_error():
|
|
snap = build_snapshot(**_make_build_kwargs())
|
|
snap["schema_version"] = 999
|
|
# u4b will pre-catch SnapshotValidationError, but the broader
|
|
# `except ValueError` net must still pick this up.
|
|
with pytest.raises(ValueError):
|
|
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
|