Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 25s
u1 argparse --reuse-from PREV_RUN_ID + post-merge fail-closed guard (rejects
layout/zone_geometry/zone_section/image override axes by name; only
--override-frame is preserved).
u2 src/phase_z2_reuse_snapshot.py — JSON-only Step 6 snapshot with mdx_sha256
integrity key and {value, source_path, upstream_step} provenance per axis
(pickle forbidden per Stage 2 guardrail).
u3 _write_reuse_snapshot at the Step 6 boundary; soft-fails to stderr without
aborting the seed run.
u4 prev_run_dir RO copy of step00/01/02/05/06 + _reuse_snapshot.json into
new run_dir, state rehydration, reuse marker, frame-override application on
restored units, Step 7+ resume.
u4b fail-closed for missing prev_run_dir / missing/corrupt/invalid snapshot /
mdx_sha256 mismatch / accidental new==prev write, with value+path+upstream
diagnostics per axis.
u5 reuse_from Optional[str] threaded through run_phase_z2_mvp1 signature and
CLI dispatch; default None preserves byte-identical pre-IMP-43 behavior.
u6 Front /api/run optional reuseFromRunId forwarding (vite.config.ts +
designAgentApi.ts + run_pipeline_reuse_from.test.ts).
u7a fast CI equivalence (1 mdx × 1 layout × 2 frames); step13 whitelist =
run_id/timestamps/prev_run_id only. u7b 3 layouts × 3 mdx × 32 frames
sweep gated by pytest.mark.sweep (registered in pyproject.toml; default CI
must use -m 'not sweep').
u8 scripts/measure_reuse_savings.py argv-driven A/B/C harness with frame
pin self-discovery + seed-time exclusion; status board §8 TBD anchor
(issue-body 50-70% / 10-20s→3-8s claim explicitly unverified, not mirrored).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
283 lines
9.8 KiB
Python
283 lines
9.8 KiB
Python
"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer.
|
|
|
|
u3 scope (per the Stage 2 Exit Report):
|
|
|
|
- ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after*
|
|
the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does
|
|
NOT raise out of the main pipeline run).
|
|
- The Step 6 artifact data dict records the run_dir-relative sidecar path
|
|
as ``data.reuse_snapshot_path`` (additive informational field, always
|
|
set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will
|
|
fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``).
|
|
|
|
The helper is tested in isolation (no full pipeline run) — pipeline call
|
|
site presence is asserted structurally so we exercise behaviour without
|
|
re-running Step 0~6 inside the test process. End-to-end equivalence under
|
|
``--reuse-from`` is u7a / u7b scope.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, field
|
|
from pathlib import Path
|
|
from typing import Any, Optional
|
|
|
|
import pytest
|
|
|
|
import src.phase_z2_pipeline as _pz2
|
|
from src.phase_z2_reuse_snapshot import (
|
|
SNAPSHOT_FILENAME,
|
|
SNAPSHOT_VERSION,
|
|
SnapshotValidationError,
|
|
validate_snapshot,
|
|
)
|
|
|
|
|
|
# -- synthetic duck-typed inputs ------------------------------------------
|
|
|
|
|
|
@dataclass
|
|
class _Section:
|
|
section_id: str
|
|
section_num: int
|
|
title: str
|
|
raw_content: str
|
|
heading_number: Optional[str] = None
|
|
v4_alias_keys: list = field(default_factory=list)
|
|
sub_sections: list = field(default_factory=list)
|
|
|
|
|
|
@dataclass
|
|
class _V4Candidate:
|
|
template_id: str
|
|
frame_id: str
|
|
frame_number: int
|
|
confidence: float
|
|
label: str
|
|
|
|
|
|
@dataclass
|
|
class _Unit:
|
|
source_section_ids: list
|
|
merge_type: str
|
|
frame_template_id: str
|
|
frame_id: str
|
|
frame_number: int
|
|
confidence: float
|
|
label: str
|
|
phase_z_status: str
|
|
raw_content: str
|
|
title: str
|
|
score: float
|
|
v4_rank: Optional[int] = 1
|
|
selection_path: str = "rank_1"
|
|
fallback_reason: Optional[str] = None
|
|
rationale: dict = field(default_factory=dict)
|
|
auto_selectable: bool = True
|
|
filter_reasons: list = field(default_factory=list)
|
|
notes: list = field(default_factory=list)
|
|
v4_candidates: list = field(default_factory=list)
|
|
provisional: bool = False
|
|
|
|
|
|
def _make_kwargs(**overrides: Any) -> dict[str, Any]:
|
|
cand = _V4Candidate(
|
|
template_id="tpl_a",
|
|
frame_id="fid_a",
|
|
frame_number=13,
|
|
confidence=0.91,
|
|
label="use_as_is",
|
|
)
|
|
section = _Section(
|
|
section_id="03-1",
|
|
section_num=1,
|
|
title="DX status",
|
|
raw_content="- bullet one\n- bullet two",
|
|
)
|
|
unit = _Unit(
|
|
source_section_ids=["03-1"],
|
|
merge_type="single",
|
|
frame_template_id="tpl_a",
|
|
frame_id="fid_a",
|
|
frame_number=13,
|
|
confidence=0.91,
|
|
label="use_as_is",
|
|
phase_z_status="auto_renderable",
|
|
raw_content="- bullet one\n- bullet two",
|
|
title="DX status",
|
|
score=0.91,
|
|
v4_candidates=[cand],
|
|
)
|
|
kwargs: dict[str, Any] = dict(
|
|
mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n",
|
|
slide_title="Slide",
|
|
slide_footer=None,
|
|
sections=[section],
|
|
stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
|
|
stage0_normalized_assets={"popups": [], "images": [], "tables": []},
|
|
v4_evidence=[
|
|
{
|
|
"section_id": "03-1",
|
|
"v4_candidates": [
|
|
{
|
|
"template_id": "tpl_a",
|
|
"frame_id": "fid_a",
|
|
"frame_number": 13,
|
|
"confidence": 0.91,
|
|
"label": "use_as_is",
|
|
}
|
|
],
|
|
"candidate_status": "ok",
|
|
}
|
|
],
|
|
layout_preset_pre_override="single",
|
|
units=[unit],
|
|
comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
|
|
v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
|
|
ai_preflight={"enabled": False, "skipped": True},
|
|
)
|
|
kwargs.update(overrides)
|
|
return kwargs
|
|
|
|
|
|
# -- success path ---------------------------------------------------------
|
|
|
|
|
|
def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path):
|
|
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
|
assert rv == SNAPSHOT_FILENAME
|
|
fpath = tmp_path / SNAPSHOT_FILENAME
|
|
assert fpath.exists(), f"snapshot not written at {fpath}"
|
|
|
|
|
|
def test_written_snapshot_validates(tmp_path: Path):
|
|
kwargs = _make_kwargs()
|
|
rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs)
|
|
assert rv == SNAPSHOT_FILENAME
|
|
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
|
|
|
# mdx_sha256 is derived from mdx_source_text — recompute to verify
|
|
# the helper is hashing the UTF-8 bytes of the same source we passed.
|
|
import hashlib as _hl
|
|
|
|
expected_sha = _hl.sha256(
|
|
kwargs["mdx_source_text"].encode("utf-8")
|
|
).hexdigest()
|
|
validate_snapshot(snap, expected_mdx_sha256=expected_sha)
|
|
|
|
|
|
def test_snapshot_has_correct_schema_version(tmp_path: Path):
|
|
_pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
|
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
|
assert snap["schema_version"] == SNAPSHOT_VERSION
|
|
|
|
|
|
def test_snapshot_records_layout_preset_pre_override(tmp_path: Path):
|
|
_pz2._write_reuse_snapshot(
|
|
tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2")
|
|
)
|
|
snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8"))
|
|
assert snap["layout_preset_pre_override"]["value"] == "horizontal-2"
|
|
|
|
|
|
def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path):
|
|
_pz2._write_reuse_snapshot(
|
|
tmp_path,
|
|
**_make_kwargs(
|
|
slide_title="설계 방식의 왜곡",
|
|
mdx_source_text="# 설계 방식\n\n- 한글 bullet\n",
|
|
),
|
|
)
|
|
# ensure_ascii=False is intentional so Korean text round-trips
|
|
# readable; if a future refactor drops it the bytes change but the
|
|
# JSON still parses — we assert the file is decodable AS utf-8 and
|
|
# the value survives the round trip.
|
|
raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")
|
|
snap = json.loads(raw)
|
|
assert snap["slide_title"]["value"] == "설계 방식의 왜곡"
|
|
|
|
|
|
# -- failure path ---------------------------------------------------------
|
|
|
|
|
|
def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys):
|
|
"""When ``build_snapshot`` raises, the helper must NOT propagate the
|
|
exception — it WARNS on stderr and returns ``None`` so the main
|
|
pipeline run continues."""
|
|
|
|
def _boom(**_kwargs):
|
|
raise RuntimeError("synthetic build failure")
|
|
|
|
monkeypatch.setattr(_pz2, "build_snapshot", _boom)
|
|
|
|
rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs())
|
|
|
|
assert rv is None
|
|
captured = capsys.readouterr()
|
|
assert "reuse-snapshot" in captured.err
|
|
assert "WARN" in captured.err
|
|
assert "RuntimeError" in captured.err
|
|
# File MUST NOT exist on failure (no partial JSON on disk).
|
|
assert not (tmp_path / SNAPSHOT_FILENAME).exists()
|
|
|
|
|
|
def test_failure_on_unwritable_run_dir_warns_and_returns_none(
|
|
tmp_path: Path, monkeypatch, capsys
|
|
):
|
|
"""Simulate disk write failure: helper warns + returns None, never
|
|
raises out to the caller (Stage 2 guardrail: optional sidecar)."""
|
|
nonexistent = tmp_path / "does" / "not" / "exist"
|
|
# nonexistent.exists() is False — Path.write_text raises FileNotFoundError.
|
|
|
|
rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs())
|
|
|
|
assert rv is None
|
|
captured = capsys.readouterr()
|
|
assert "reuse-snapshot" in captured.err
|
|
assert "WARN" in captured.err
|
|
# FileNotFoundError specifically — sanity-check the type surfaces in
|
|
# the warning so debugging is not blind.
|
|
assert "FileNotFoundError" in captured.err
|
|
|
|
|
|
# -- pipeline integration anchors -----------------------------------------
|
|
|
|
|
|
def test_pipeline_imports_helper_and_constant():
|
|
"""The pipeline module must expose the helper for the post-Step-6
|
|
call site, and the constant must round-trip from the snapshot
|
|
module (single source of truth)."""
|
|
assert hasattr(_pz2, "_write_reuse_snapshot")
|
|
assert callable(_pz2._write_reuse_snapshot)
|
|
assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json"
|
|
|
|
|
|
def test_pipeline_call_site_follows_step06_artifact_write():
|
|
"""Structural guard: the helper must be invoked AFTER the Step 6
|
|
artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next
|
|
to ``steps/step06_composition_plan.json`` (Stage 2 spec)."""
|
|
source = Path(_pz2.__file__).read_text(encoding="utf-8")
|
|
# Locate the step06 artifact write call site by its locked name arg.
|
|
step06_marker = '6, "composition_plan"'
|
|
idx_step06 = source.find(step06_marker)
|
|
assert idx_step06 != -1, "step06 artifact write call site missing"
|
|
# The helper call must appear AFTER the step06 marker.
|
|
idx_helper = source.find("_write_reuse_snapshot(", idx_step06)
|
|
assert idx_helper != -1, "u3 helper call missing after step06 write"
|
|
|
|
|
|
def test_pipeline_step06_artifact_data_records_snapshot_path():
|
|
"""Structural guard: the Step 6 artifact data dict must include the
|
|
``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer
|
|
can locate the expected sidecar via the canonical step artifact
|
|
(Stage 2 spec — informational; absence of the file is u4's
|
|
fail-closed concern)."""
|
|
source = Path(_pz2.__file__).read_text(encoding="utf-8")
|
|
step06_marker = '6, "composition_plan"'
|
|
idx_step06 = source.find(step06_marker)
|
|
assert idx_step06 != -1
|
|
# Search a generous window after the marker for the field key.
|
|
window = source[idx_step06 : idx_step06 + 8000]
|
|
assert '"reuse_snapshot_path"' in window
|
|
assert "SNAPSHOT_FILENAME" in window
|