Files
C.E.L_Slide_test2/tests/test_phase_z2_reuse_from_entry.py
kyeongmin b4be6c1cd0
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 22:44:27 +09:00

556 lines
19 KiB
Python

"""IMP-43 (#72) u4 — focused tests for the --reuse-from entry helpers.
u4 scope (per the Stage 2 Exit Report):
- Pure path resolution, file copy, snapshot load+validate, MdxSection +
CompositionUnit rehydration, and reuse-marker writing.
- Helpers RAISE on missing artifacts / corrupt snapshot / mdx_sha256
mismatch — u4b adds the stderr + sys.exit(2) translation and the
prev_run_dir == new_run_dir accidental-write guard around them.
- The kwarg threading + the in-``run_phase_z2_mvp1`` branch that
invokes these helpers land in u5.
Tested helpers (``src/phase_z2_pipeline.py``):
* ``_resolve_reuse_from_prev_run_dir``
* ``_copy_reuse_artifacts_from_prev_run``
* ``_load_and_validate_reuse_snapshot``
* ``_rehydrate_mdx_sections_from_snapshot``
* ``_rehydrate_composition_units_from_snapshot``
* ``_write_reuse_marker``
* ``_RehydratedV4Candidate`` (V4Match-shape duck type)
* ``_REUSE_STEP_ARTIFACTS`` / ``REUSE_MARKER_FILENAME`` /
``REUSE_MARKER_SCHEMA_VERSION``
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
import pytest
import src.phase_z2_pipeline as _pz2
from src.phase_z2_composition import CompositionUnit
from src.phase_z2_reuse_snapshot import (
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
build_snapshot,
)
# -- synthetic duck-typed inputs (mirror u3 test fixture) -----------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _mdx_text() -> str:
return "# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n"
def _build_canonical_snapshot(
*,
mdx_source_text: Optional[str] = None,
layout_preset: str = "single",
) -> dict:
text = mdx_source_text if mdx_source_text is not None else _mdx_text()
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
section = _Section(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
heading_number="3.1",
v4_alias_keys=["03-1.1"],
sub_sections=[],
)
unit = _Unit(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
provisional=False,
auto_selectable=True,
filter_reasons=[],
notes=["a note"],
rationale={"weight": 1.0},
)
return build_snapshot(
mdx_sha256=hashlib.sha256(text.encode("utf-8")).hexdigest(),
slide_title="Slide",
slide_footer=None,
sections=[section],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[
{
"section_id": "03-1",
"v4_candidates": [
{
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": 0.91,
"label": "use_as_is",
}
],
"candidate_status": "ok",
}
],
layout_preset_pre_override=layout_preset,
units=[unit],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
def _seed_prev_run_dir(prev_run_dir: Path, *, snapshot: dict) -> None:
"""Populate ``prev_run_dir`` with the Step 0/1/2/5/6 artifacts plus
the reuse snapshot — minimal but valid surface for u4 helpers."""
(prev_run_dir / "steps").mkdir(parents=True, exist_ok=True)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
# JSON-shaped surface — exact shape doesn't matter for u4 (the
# copy helper doesn't introspect contents); just must exist.
(prev_run_dir / "steps" / fname).write_text(
f'{{"name": "{fname}"}}'
if fname.endswith(".json")
else "raw mdx body bytes",
encoding="utf-8",
)
(prev_run_dir / SNAPSHOT_FILENAME).write_text(
json.dumps(snapshot, ensure_ascii=False, indent=2),
encoding="utf-8",
)
# -- _REUSE_STEP_ARTIFACTS constant ---------------------------------------
def test_reuse_step_artifacts_locks_stage2_boundary():
"""Stage 2 boundary lock — Step 0/1/2/5/6 artifacts only.
Step 3/4 deliberately absent: step03 / step04 ARE written after
Step 6 (around src/phase_z2_pipeline.py:5931 / 5964) before the
Step 7 artifact (~6294), but both are emitted with
step_status='trace-only' / pipeline_path_connected=False — they
are diagnostic projections of the Step 6 debug_zones, not
pipeline-path-connected inputs that Step 7+ rehydrate from."""
assert _pz2._REUSE_STEP_ARTIFACTS == (
"step00_preconditions.json",
"step01_mdx_upload.json",
"step01_mdx_source.md",
"step02_normalized.json",
"step05_v4_evidence.json",
"step06_composition_plan.json",
)
def test_reuse_marker_filename_is_dotfile_at_run_dir_root():
assert _pz2.REUSE_MARKER_FILENAME == "_reuse_marker.json"
# -- _resolve_reuse_from_prev_run_dir -------------------------------------
def test_resolve_prev_run_dir_returns_runs_dir_phase_z2_path():
rv = _pz2._resolve_reuse_from_prev_run_dir("20260524_120000_phase_z2")
expected = _pz2.RUNS_DIR / "20260524_120000_phase_z2" / "phase_z2"
assert rv == expected
def test_resolve_prev_run_dir_does_not_check_existence(tmp_path: Path):
"""Pure path computation — must NOT touch the filesystem (u4b
handles the missing-prev-run case)."""
rv = _pz2._resolve_reuse_from_prev_run_dir("never_existed_run_id")
assert isinstance(rv, Path)
# The path does not actually exist; helper still returned cleanly.
assert not rv.exists()
# -- _copy_reuse_artifacts_from_prev_run ----------------------------------
def test_copy_reuse_artifacts_copies_all_step_files(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
assert (new / "steps" / fname).exists(), f"missing copy: {fname}"
assert copied[fname] == f"steps/{fname}"
def test_copy_reuse_artifacts_copies_snapshot_to_run_dir_root(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
copied = _pz2._copy_reuse_artifacts_from_prev_run(prev, new)
# Snapshot lives at run_dir root (NOT under steps/) per u3 contract.
assert (new / SNAPSHOT_FILENAME).exists()
assert copied[SNAPSHOT_FILENAME] == SNAPSHOT_FILENAME
def test_copy_reuse_artifacts_creates_steps_subdir_if_absent(tmp_path: Path):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
# new_run_dir / steps does not yet exist
assert not (new / "steps").exists()
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
assert (new / "steps").is_dir()
def test_copy_reuse_artifacts_missing_step_raises_filenotfound(
tmp_path: Path,
):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
# Delete one of the required step artifacts.
(prev / "steps" / "step05_v4_evidence.json").unlink()
with pytest.raises(FileNotFoundError) as ei:
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
msg = str(ei.value)
assert "step05_v4_evidence.json" in msg
assert "prev_run_dir" in msg
def test_copy_reuse_artifacts_missing_snapshot_raises_filenotfound(
tmp_path: Path,
):
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
(prev / SNAPSHOT_FILENAME).unlink()
with pytest.raises(FileNotFoundError) as ei:
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
assert SNAPSHOT_FILENAME in str(ei.value)
def test_copy_reuse_artifacts_byte_identical_copy(tmp_path: Path):
"""Bytes must match exactly — copy, not transform."""
prev = tmp_path / "prev" / "phase_z2"
new = tmp_path / "new" / "phase_z2"
snap = _build_canonical_snapshot()
_seed_prev_run_dir(prev, snapshot=snap)
_pz2._copy_reuse_artifacts_from_prev_run(prev, new)
for fname in _pz2._REUSE_STEP_ARTIFACTS:
assert (
(prev / "steps" / fname).read_bytes()
== (new / "steps" / fname).read_bytes()
)
assert (
(prev / SNAPSHOT_FILENAME).read_bytes()
== (new / SNAPSHOT_FILENAME).read_bytes()
)
# -- _load_and_validate_reuse_snapshot ------------------------------------
def test_load_and_validate_returns_snapshot_dict(tmp_path: Path):
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
loaded = _pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text
)
assert loaded["schema_version"] == SNAPSHOT_VERSION
assert loaded["slide_title"]["value"] == "Slide"
def test_load_and_validate_mdx_sha256_mismatch_raises(tmp_path: Path):
"""Snapshot was built for ``text_a`` but caller passes ``text_b``;
u2 validator raises ``SnapshotValidationError`` (subclass of
``ValueError``). u4b translates to exit 2 — here we only assert the
raise."""
text_a = "# Slide A\n"
text_b = "# Slide B (different bytes)\n"
snap = _build_canonical_snapshot(mdx_source_text=text_a)
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
with pytest.raises(SnapshotValidationError) as ei:
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text_b
)
assert "mdx_sha256 mismatch" in str(ei.value)
def test_load_and_validate_corrupt_json_raises(tmp_path: Path):
(tmp_path / SNAPSHOT_FILENAME).write_text(
"{ not valid json", encoding="utf-8"
)
with pytest.raises(json.JSONDecodeError):
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=_mdx_text()
)
def test_load_and_validate_missing_snapshot_file_raises(tmp_path: Path):
"""No snapshot at all — bare ``read_text`` raises FileNotFoundError.
u4b translates this to exit 2 with a provenance message."""
with pytest.raises(FileNotFoundError):
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=_mdx_text()
)
def test_load_and_validate_schema_version_mismatch_raises(tmp_path: Path):
text = _mdx_text()
snap = _build_canonical_snapshot(mdx_source_text=text)
snap["schema_version"] = SNAPSHOT_VERSION + 1 # force mismatch
(tmp_path / SNAPSHOT_FILENAME).write_text(
json.dumps(snap, ensure_ascii=False, indent=2), encoding="utf-8"
)
with pytest.raises(SnapshotValidationError) as ei:
_pz2._load_and_validate_reuse_snapshot(
tmp_path, mdx_source_text=text
)
assert "schema_version" in str(ei.value)
# -- _rehydrate_mdx_sections_from_snapshot --------------------------------
def test_rehydrate_sections_returns_mdxsection_instances():
snap = _build_canonical_snapshot()
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
assert len(sections) == 1
assert isinstance(sections[0], _pz2.MdxSection)
assert sections[0].section_id == "03-1"
assert sections[0].title == "DX status"
assert sections[0].raw_content == "- bullet one\n- bullet two"
def test_rehydrate_sections_preserves_heading_number_and_aliases():
snap = _build_canonical_snapshot()
sections = _pz2._rehydrate_mdx_sections_from_snapshot(snap)
assert sections[0].heading_number == "3.1"
assert sections[0].v4_alias_keys == ["03-1.1"]
assert sections[0].sub_sections == []
# -- _rehydrate_composition_units_from_snapshot ---------------------------
def test_rehydrate_units_returns_composition_unit_instances():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert len(units) == 1
assert isinstance(units[0], CompositionUnit)
def test_rehydrate_units_preserves_core_fields():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
u = units[0]
assert u.source_section_ids == ["03-1"]
assert u.merge_type == "single"
assert u.frame_template_id == "tpl_a"
assert u.frame_id == "fid_a"
assert u.frame_number == 13
assert u.confidence == pytest.approx(0.91)
assert u.label == "use_as_is"
assert u.phase_z_status == "auto_renderable"
assert u.title == "DX status"
assert u.score == pytest.approx(0.91)
def test_rehydrate_units_preserves_provisional_and_auto_selectable():
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert units[0].provisional is False
assert units[0].auto_selectable is True
assert units[0].filter_reasons == []
assert units[0].notes == ["a note"]
assert units[0].rationale == {"weight": 1.0}
def test_rehydrate_units_v4_candidates_expose_attribute_access():
"""``_apply_frame_override_to_unit`` reads
``cand.template_id`` / ``cand.frame_id`` / etc. off
``unit.v4_candidates`` — restored entries MUST expose attribute
access, not raw dict access."""
snap = _build_canonical_snapshot()
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
cands = units[0].v4_candidates
assert len(cands) == 1
c = cands[0]
assert isinstance(c, _pz2._RehydratedV4Candidate)
assert c.template_id == "tpl_a"
assert c.frame_id == "fid_a"
assert c.frame_number == 13
assert c.confidence == pytest.approx(0.91)
assert c.label == "use_as_is"
def test_rehydrate_units_empty_v4_candidates_yields_empty_list():
snap = _build_canonical_snapshot()
snap["units"]["value"][0]["v4_candidates"] = []
units = _pz2._rehydrate_composition_units_from_snapshot(snap)
assert units[0].v4_candidates == []
# -- _write_reuse_marker --------------------------------------------------
def test_write_reuse_marker_writes_json_with_prev_run_id(tmp_path: Path):
copied = {
"step00_preconditions.json": "steps/step00_preconditions.json",
SNAPSHOT_FILENAME: SNAPSHOT_FILENAME,
}
rv = _pz2._write_reuse_marker(
tmp_path,
prev_run_id="20260524_010101_phase_z2",
copied_artifacts=copied,
)
assert rv == tmp_path / _pz2.REUSE_MARKER_FILENAME
marker = json.loads(rv.read_text(encoding="utf-8"))
assert marker["schema_version"] == _pz2.REUSE_MARKER_SCHEMA_VERSION
assert marker["reuse_from_prev_run_id"] == "20260524_010101_phase_z2"
assert marker["snapshot_filename"] == SNAPSHOT_FILENAME
def test_write_reuse_marker_records_copied_artifacts_and_boundary(
tmp_path: Path,
):
copied = {
fname: f"steps/{fname}" for fname in _pz2._REUSE_STEP_ARTIFACTS
}
copied[SNAPSHOT_FILENAME] = SNAPSHOT_FILENAME
_pz2._write_reuse_marker(
tmp_path,
prev_run_id="20260524_010101_phase_z2",
copied_artifacts=copied,
)
marker = json.loads(
(tmp_path / _pz2.REUSE_MARKER_FILENAME).read_text(encoding="utf-8")
)
assert marker["copied_artifacts"] == copied
assert marker["boundary_steps"] == list(_pz2._REUSE_STEP_ARTIFACTS)
assert marker["resume_at_step"] == 7
# -- module surface anchors -----------------------------------------------
def test_pipeline_exposes_all_u4_helpers():
"""u5 wires these into ``run_phase_z2_mvp1`` — they must remain
module-level callable surface on ``phase_z2_pipeline``."""
for name in (
"_resolve_reuse_from_prev_run_dir",
"_copy_reuse_artifacts_from_prev_run",
"_load_and_validate_reuse_snapshot",
"_rehydrate_mdx_sections_from_snapshot",
"_rehydrate_composition_units_from_snapshot",
"_write_reuse_marker",
"_RehydratedV4Candidate",
"_REUSE_STEP_ARTIFACTS",
"REUSE_MARKER_FILENAME",
"REUSE_MARKER_SCHEMA_VERSION",
):
assert hasattr(_pz2, name), f"u4 surface missing: {name}"
def test_pipeline_run_signature_reuse_from_is_kw_only_optional_none():
"""u5 — ``reuse_from`` is now part of ``run_phase_z2_mvp1``'s public
signature. The kwarg MUST be keyword-only (after the ``*`` barrier),
default to ``None`` (so absent flag preserves the pre-u5 behaviour),
and sit alongside the existing override kwargs. The locked
``until_u5`` regression has flipped — keep this assertion as the
forward-direction lock so future signature drift (e.g. a positional
promotion or a default change) trips loudly."""
import inspect
sig = inspect.signature(_pz2.run_phase_z2_mvp1)
assert "reuse_from" in sig.parameters, (
"u5 must thread reuse_from into run_phase_z2_mvp1 — kwarg missing. "
f"current params: {list(sig.parameters)}"
)
param = sig.parameters["reuse_from"]
assert param.kind is inspect.Parameter.KEYWORD_ONLY, (
f"reuse_from must be keyword-only (after the ``*`` barrier); "
f"got kind={param.kind}"
)
assert param.default is None, (
f"reuse_from must default to None to preserve pre-u5 behaviour; "
f"got default={param.default!r}"
)