Files
C.E.L_Slide_test2/tests/test_phase_z2_reuse_snapshot.py
kyeongmin b4be6c1cd0
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 22:44:27 +09:00

494 lines
16 KiB
Python

"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``.
Scope mirror of the production module (Stage 2 u2):
* ``build_snapshot`` shape, provenance, JSON round-trip, required keys.
* ``serialize_section`` / ``serialize_unit`` field preservation, including
the duck-typed ``v4_candidates`` shape (template_id / frame_id /
frame_number / confidence / label).
* ``validate_snapshot`` fail-closed paths: non-dict input, schema
version mismatch, missing/empty/non-string ``mdx_sha256``, sha
mismatch, missing required keys, unwrapped wrapper, wrapper missing
a provenance field.
* Module-level constants exposed for u3 / u4 / u4b consumers.
The tests use synthetic duck-typed dataclasses so the snapshot module's
external surface is exercised without coupling to the production
``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts.
That mirrors the production module's intentional duck-typing (no
imports from ``phase_z2_pipeline`` / ``phase_z2_composition``).
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from typing import Any, Optional
import pytest
from src.phase_z2_reuse_snapshot import (
REQUIRED_TOP_LEVEL_KEYS,
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
build_snapshot,
serialize_section,
serialize_unit,
validate_snapshot,
)
# -- synthetic duck-typed inputs ------------------------------------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _make_section(**overrides: Any) -> _Section:
base = dict(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
)
base.update(overrides)
return _Section(**base)
def _make_unit(**overrides: Any) -> _Unit:
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
base: dict[str, Any] = dict(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
)
base.update(overrides)
return _Unit(**base)
def _make_build_kwargs(**overrides: Any) -> dict[str, Any]:
kwargs: dict[str, Any] = dict(
mdx_sha256="a" * 64,
slide_title="Title",
slide_footer="Footer",
sections=[_make_section()],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[{"section_id": "03-1", "v4_candidates": []}],
layout_preset_pre_override="horizontal-2",
units=[_make_unit()],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
kwargs.update(overrides)
return kwargs
# -- module constants -----------------------------------------------------
def test_snapshot_filename_constant():
assert SNAPSHOT_FILENAME == "_reuse_snapshot.json"
def test_snapshot_version_is_positive_int():
assert isinstance(SNAPSHOT_VERSION, int)
assert SNAPSHOT_VERSION >= 1
def test_required_keys_include_contract_and_payload():
# Bare contract / integrity keys.
assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS
assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS
# Payload axes per Stage 2 plan.
for k in (
"slide_title",
"slide_footer",
"sections",
"stage0_adapter_diagnostics",
"stage0_normalized_assets",
"v4_evidence",
"layout_preset_pre_override",
"units",
"comp_debug",
"v4_fallback_traces",
"ai_preflight",
):
assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}"
# -- build_snapshot -------------------------------------------------------
def test_build_snapshot_round_trips_through_json():
snap = build_snapshot(**_make_build_kwargs())
payload = json.dumps(snap)
loaded = json.loads(payload)
assert loaded["schema_version"] == SNAPSHOT_VERSION
assert loaded["mdx_sha256"] == "a" * 64
def test_build_snapshot_has_all_required_keys():
snap = build_snapshot(**_make_build_kwargs())
for key in REQUIRED_TOP_LEVEL_KEYS:
assert key in snap, f"build_snapshot missing required key: {key}"
def test_build_snapshot_bare_keys_are_unwrapped_scalars():
snap = build_snapshot(**_make_build_kwargs())
assert snap["schema_version"] == SNAPSHOT_VERSION
assert snap["mdx_sha256"] == "a" * 64
# bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly.
assert not isinstance(snap["schema_version"], dict)
assert not isinstance(snap["mdx_sha256"], dict)
def test_build_snapshot_provenance_wrapper_shape():
snap = build_snapshot(**_make_build_kwargs())
bare = {"schema_version", "mdx_sha256"}
for key, entry in snap.items():
if key in bare:
continue
assert isinstance(entry, dict), f"{key} is not wrapped"
assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key
assert isinstance(entry["source_path"], str) and entry["source_path"]
assert isinstance(entry["upstream_step"], str)
assert entry["upstream_step"].startswith("step"), entry["upstream_step"]
def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary():
"""No ``upstream_step`` may point outside the Step 0/2/5/6 reuse
boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would
silently invite work outside the reuse window — fail loudly.
Step 01's contribution is the ``mdx_sha256`` integrity key (a bare
contract scalar with no wrapper) so step01 does not need to appear
in payload provenance.
"""
snap = build_snapshot(**_make_build_kwargs())
allowed = {"step00", "step02", "step05", "step06"}
for key, entry in snap.items():
if key in {"schema_version", "mdx_sha256"}:
continue
assert entry["upstream_step"] in allowed, (
f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary"
)
def test_build_snapshot_units_carry_v4_candidates():
snap = build_snapshot(**_make_build_kwargs())
units = snap["units"]["value"]
assert len(units) == 1
assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a"
assert units[0]["v4_candidates"][0]["frame_number"] == 13
assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91)
def test_build_snapshot_sections_preserve_alias_keys_and_subsections():
sec = _make_section(
section_id="04-2",
v4_alias_keys=["04-2.1"],
sub_sections=[{"id": "04-2-sub-1"}],
heading_number="2.1",
)
snap = build_snapshot(**_make_build_kwargs(sections=[sec]))
payload = snap["sections"]["value"]
assert payload[0]["section_id"] == "04-2"
assert payload[0]["v4_alias_keys"] == ["04-2.1"]
assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}]
assert payload[0]["heading_number"] == "2.1"
def test_build_snapshot_units_provenance_points_at_step06():
snap = build_snapshot(**_make_build_kwargs())
assert "step06_composition_plan.json" in snap["units"]["source_path"]
assert snap["units"]["upstream_step"] == "step06"
def test_build_snapshot_v4_evidence_provenance_points_at_step05():
snap = build_snapshot(**_make_build_kwargs())
assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"]
assert snap["v4_evidence"]["upstream_step"] == "step05"
def test_build_snapshot_ai_preflight_provenance_points_at_step00():
snap = build_snapshot(**_make_build_kwargs())
assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"]
assert snap["ai_preflight"]["upstream_step"] == "step00"
def test_build_snapshot_rejects_unjsonable_input():
bad_unit = _make_unit()
bad_unit.notes.append(object()) # not JSON-safe
with pytest.raises(TypeError):
build_snapshot(**_make_build_kwargs(units=[bad_unit]))
def test_build_snapshot_handles_none_optional_fields():
snap = build_snapshot(
**_make_build_kwargs(
slide_title=None,
slide_footer=None,
stage0_adapter_diagnostics=None,
stage0_normalized_assets=None,
comp_debug=None,
v4_fallback_traces=None,
ai_preflight=None,
)
)
# None inputs land as None / {} consistently — never raise.
assert snap["slide_title"]["value"] is None
assert snap["slide_footer"]["value"] is None
assert snap["stage0_adapter_diagnostics"]["value"] == {}
assert snap["stage0_normalized_assets"]["value"] == {}
assert snap["comp_debug"]["value"] == {}
assert snap["v4_fallback_traces"]["value"] == {}
assert snap["ai_preflight"]["value"] == {}
# -- serializer helpers ---------------------------------------------------
def test_serialize_section_preserves_all_documented_fields():
sec = _make_section(
heading_number="1.1",
v4_alias_keys=["03-1.x"],
sub_sections=[{"id": "s"}],
)
out = serialize_section(sec)
assert out["section_id"] == "03-1"
assert out["section_num"] == 1
assert out["title"] == "DX status"
assert out["raw_content"].startswith("- bullet")
assert out["heading_number"] == "1.1"
assert out["v4_alias_keys"] == ["03-1.x"]
assert out["sub_sections"] == [{"id": "s"}]
def test_serialize_section_works_with_missing_optional_attrs():
class _Minimal:
section_id = "x"
section_num = 0
title = "t"
raw_content = "r"
out = serialize_section(_Minimal())
assert out["heading_number"] is None
assert out["v4_alias_keys"] == []
assert out["sub_sections"] == []
def test_serialize_unit_v4_candidates_unwrap_to_named_attrs():
unit = _make_unit()
out = serialize_unit(unit)
cand = out["v4_candidates"][0]
assert cand == {
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": pytest.approx(0.91),
"label": "use_as_is",
# u4 follow-up — Step 9 application-plan payload reads
# ``c.v4_rank`` off each rehydrated candidate. Snapshot
# serializer persists it via ``getattr(c, 'v4_rank', None)`` so
# legacy duck types (no v4_rank attr) get None and modern V4Match
# instances carry their rank (1/2/3/...).
"v4_rank": None,
}
def test_serialize_unit_v4_candidates_persist_v4_rank_when_present():
"""A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips."""
ranked_cand = _V4Candidate(
template_id="tpl_b",
frame_id="fid_b",
frame_number=14,
confidence=0.82,
label="light_edit",
v4_rank=2,
)
unit = _make_unit(v4_candidates=[ranked_cand])
out = serialize_unit(unit)
assert out["v4_candidates"][0]["v4_rank"] == 2
def test_serialize_unit_handles_empty_v4_candidates():
unit = _make_unit(v4_candidates=[])
out = serialize_unit(unit)
assert out["v4_candidates"] == []
def test_serialize_unit_provisional_default_false():
unit = _make_unit()
assert serialize_unit(unit)["provisional"] is False
def test_serialize_unit_provisional_true_preserved():
unit = _make_unit(provisional=True)
assert serialize_unit(unit)["provisional"] is True
def test_serialize_unit_round_trips_through_json():
out = serialize_unit(_make_unit())
reloaded = json.loads(json.dumps(out))
assert reloaded["source_section_ids"] == ["03-1"]
assert reloaded["frame_template_id"] == "tpl_a"
# -- validate_snapshot ----------------------------------------------------
def test_validate_snapshot_accepts_well_formed():
snap = build_snapshot(**_make_build_kwargs())
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
def test_validate_snapshot_rejects_non_dict_input():
with pytest.raises(SnapshotValidationError):
validate_snapshot("not a dict", expected_mdx_sha256="a" * 64)
def test_validate_snapshot_rejects_version_mismatch():
snap = build_snapshot(**_make_build_kwargs())
snap["schema_version"] = SNAPSHOT_VERSION + 999
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "schema_version" in str(exc.value)
def test_validate_snapshot_rejects_missing_sha():
snap = build_snapshot(**_make_build_kwargs())
del snap["mdx_sha256"]
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_empty_sha():
snap = build_snapshot(**_make_build_kwargs())
snap["mdx_sha256"] = ""
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_non_string_sha():
snap = build_snapshot(**_make_build_kwargs())
snap["mdx_sha256"] = 12345
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "mdx_sha256" in str(exc.value)
def test_validate_snapshot_rejects_sha_mismatch():
snap = build_snapshot(**_make_build_kwargs())
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="b" * 64)
assert "mdx_sha256 mismatch" in str(exc.value)
def test_validate_snapshot_rejects_missing_required_key():
snap = build_snapshot(**_make_build_kwargs())
del snap["units"]
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "units" in str(exc.value)
def test_validate_snapshot_rejects_unwrapped_payload_key():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = "not a dict"
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "units" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_value():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"source_path": "x", "upstream_step": "step06"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "value" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_source_path():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"value": [], "upstream_step": "step06"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "source_path" in str(exc.value)
def test_validate_snapshot_rejects_wrapper_missing_upstream_step():
snap = build_snapshot(**_make_build_kwargs())
snap["units"] = {"value": [], "source_path": "x"}
with pytest.raises(SnapshotValidationError) as exc:
validate_snapshot(snap, expected_mdx_sha256="a" * 64)
assert "upstream_step" in str(exc.value)
def test_validate_snapshot_error_subclasses_value_error():
snap = build_snapshot(**_make_build_kwargs())
snap["schema_version"] = 999
# u4b will pre-catch SnapshotValidationError, but the broader
# `except ValueError` net must still pick this up.
with pytest.raises(ValueError):
validate_snapshot(snap, expected_mdx_sha256="a" * 64)