Files
C.E.L_Slide_test2/tests/test_phase_z2_reuse_snapshot_write.py
kyeongmin b4be6c1cd0
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
feat(#72): IMP-43 u1~u8 --reuse-from incremental rerun (Step 0/1/2/5/6 reuse + Step 7+ re-execute)
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 22:44:27 +09:00

283 lines
9.8 KiB
Python

"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer.
u3 scope (per the Stage 2 Exit Report):
- ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after*
the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does
NOT raise out of the main pipeline run).
- The Step 6 artifact data dict records the run_dir-relative sidecar path
as ``data.reuse_snapshot_path`` (additive informational field, always
set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will
fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``).
The helper is tested in isolation (no full pipeline run) — pipeline call
site presence is asserted structurally so we exercise behaviour without
re-running Step 0~6 inside the test process. End-to-end equivalence under
``--reuse-from`` is u7a / u7b scope.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any, Optional
import pytest
import src.phase_z2_pipeline as _pz2
from src.phase_z2_reuse_snapshot import (
SNAPSHOT_FILENAME,
SNAPSHOT_VERSION,
SnapshotValidationError,
validate_snapshot,
)
# -- synthetic duck-typed inputs ------------------------------------------
@dataclass
class _Section:
section_id: str
section_num: int
title: str
raw_content: str
heading_number: Optional[str] = None
v4_alias_keys: list = field(default_factory=list)
sub_sections: list = field(default_factory=list)
@dataclass
class _V4Candidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _Unit:
source_section_ids: list
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
phase_z_status: str
raw_content: str
title: str
score: float
v4_rank: Optional[int] = 1
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
rationale: dict = field(default_factory=dict)
auto_selectable: bool = True
filter_reasons: list = field(default_factory=list)
notes: list = field(default_factory=list)
v4_candidates: list = field(default_factory=list)
provisional: bool = False
def _make_kwargs(**overrides: Any) -> dict[str, Any]:
cand = _V4Candidate(
template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
)
section = _Section(
section_id="03-1",
section_num=1,
title="DX status",
raw_content="- bullet one\n- bullet two",
)
unit = _Unit(
source_section_ids=["03-1"],
merge_type="single",
frame_template_id="tpl_a",
frame_id="fid_a",
frame_number=13,
confidence=0.91,
label="use_as_is",
phase_z_status="auto_renderable",
raw_content="- bullet one\n- bullet two",
title="DX status",
score=0.91,
v4_candidates=[cand],
)
kwargs: dict[str, Any] = dict(
mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n",
slide_title="Slide",
slide_footer=None,
sections=[section],
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
v4_evidence=[
{
"section_id": "03-1",
"v4_candidates": [
{
"template_id": "tpl_a",
"frame_id": "fid_a",
"frame_number": 13,
"confidence": 0.91,
"label": "use_as_is",
}
],
"candidate_status": "ok",
}
],
layout_preset_pre_override="single",
units=[unit],
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
ai_preflight={"enabled": False, "skipped": True},
)
kwargs.update(overrides)
return kwargs
# -- success path ---------------------------------------------------------
def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path):
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
assert rv == SNAPSHOT_FILENAME
fpath = tmp_path / SNAPSHOT_FILENAME
assert fpath.exists(), f"snapshot not written at {fpath}"
def test_written_snapshot_validates(tmp_path: Path):
kwargs = _make_kwargs()
rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs)
assert rv == SNAPSHOT_FILENAME
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
# mdx_sha256 is derived from mdx_source_text — recompute to verify
# the helper is hashing the UTF-8 bytes of the same source we passed.
import hashlib as _hl
expected_sha = _hl.sha256(
kwargs["mdx_source_text"].encode("utf-8")
).hexdigest()
validate_snapshot(snap, expected_mdx_sha256=expected_sha)
def test_snapshot_has_correct_schema_version(tmp_path: Path):
_pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
assert snap["schema_version"] == SNAPSHOT_VERSION
def test_snapshot_records_layout_preset_pre_override(tmp_path: Path):
_pz2._write_reuse_snapshot(
tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2")
)
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
assert snap["layout_preset_pre_override"]["value"] == "horizontal-2"
def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path):
_pz2._write_reuse_snapshot(
tmp_path,
**_make_kwargs(
slide_title="설계 방식의 왜곡",
mdx_source_text="# 설계 방식\n\n- 한글 bullet\n",
),
)
# ensure_ascii=False is intentional so Korean text round-trips
# readable; if a future refactor drops it the bytes change but the
# JSON still parses — we assert the file is decodable AS utf-8 and
# the value survives the round trip.
raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")
snap = json.loads(raw)
assert snap["slide_title"]["value"] == "설계 방식의 왜곡"
# -- failure path ---------------------------------------------------------
def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys):
"""When ``build_snapshot`` raises, the helper must NOT propagate the
exception — it WARNS on stderr and returns ``None`` so the main
pipeline run continues."""
def _boom(**_kwargs):
raise RuntimeError("synthetic build failure")
monkeypatch.setattr(_pz2, "build_snapshot", _boom)
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
assert rv is None
captured = capsys.readouterr()
assert "reuse-snapshot" in captured.err
assert "WARN" in captured.err
assert "RuntimeError" in captured.err
# File MUST NOT exist on failure (no partial JSON on disk).
assert not (tmp_path / SNAPSHOT_FILENAME).exists()
def test_failure_on_unwritable_run_dir_warns_and_returns_none(
tmp_path: Path, monkeypatch, capsys
):
"""Simulate disk write failure: helper warns + returns None, never
raises out to the caller (Stage 2 guardrail: optional sidecar)."""
nonexistent = tmp_path / "does" / "not" / "exist"
# nonexistent.exists() is False — Path.write_text raises FileNotFoundError.
rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs())
assert rv is None
captured = capsys.readouterr()
assert "reuse-snapshot" in captured.err
assert "WARN" in captured.err
# FileNotFoundError specifically — sanity-check the type surfaces in
# the warning so debugging is not blind.
assert "FileNotFoundError" in captured.err
# -- pipeline integration anchors -----------------------------------------
def test_pipeline_imports_helper_and_constant():
"""The pipeline module must expose the helper for the post-Step-6
call site, and the constant must round-trip from the snapshot
module (single source of truth)."""
assert hasattr(_pz2, "_write_reuse_snapshot")
assert callable(_pz2._write_reuse_snapshot)
assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json"
def test_pipeline_call_site_follows_step06_artifact_write():
"""Structural guard: the helper must be invoked AFTER the Step 6
artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next
to ``steps/step06_composition_plan.json`` (Stage 2 spec)."""
source = Path(_pz2.__file__).read_text(encoding="utf-8")
# Locate the step06 artifact write call site by its locked name arg.
step06_marker = '6, "composition_plan"'
idx_step06 = source.find(step06_marker)
assert idx_step06 != -1, "step06 artifact write call site missing"
# The helper call must appear AFTER the step06 marker.
idx_helper = source.find("_write_reuse_snapshot(", idx_step06)
assert idx_helper != -1, "u3 helper call missing after step06 write"
def test_pipeline_step06_artifact_data_records_snapshot_path():
"""Structural guard: the Step 6 artifact data dict must include the
``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer
can locate the expected sidecar via the canonical step artifact
(Stage 2 spec — informational; absence of the file is u4's
fail-closed concern)."""
source = Path(_pz2.__file__).read_text(encoding="utf-8")
step06_marker = '6, "composition_plan"'
idx_step06 = source.find(step06_marker)
assert idx_step06 != -1
# Search a generous window after the marker for the field key.
window = source[idx_step06 : idx_step06 + 8000]
assert '"reuse_snapshot_path"' in window
assert "SNAPSHOT_FILENAME" in window