"""IMP-43 (#72) u3 — focused tests for the Step 6 reuse snapshot writer. u3 scope (per the Stage 2 Exit Report): - ``_write_reuse_snapshot`` writes ``run_dir/_reuse_snapshot.json`` *after* the Step 6 artifact lands; failure WARNS and CONTINUES (the helper does NOT raise out of the main pipeline run). - The Step 6 artifact data dict records the run_dir-relative sidecar path as ``data.reuse_snapshot_path`` (additive informational field, always set to ``SNAPSHOT_FILENAME`` regardless of write success — u4 will fail-closed on missing / invalid sidecar via u2's ``validate_snapshot``). The helper is tested in isolation (no full pipeline run) — pipeline call site presence is asserted structurally so we exercise behaviour without re-running Step 0~6 inside the test process. End-to-end equivalence under ``--reuse-from`` is u7a / u7b scope. """ from __future__ import annotations import json from dataclasses import dataclass, field from pathlib import Path from typing import Any, Optional import pytest import src.phase_z2_pipeline as _pz2 from src.phase_z2_reuse_snapshot import ( SNAPSHOT_FILENAME, SNAPSHOT_VERSION, SnapshotValidationError, validate_snapshot, ) # -- synthetic duck-typed inputs ------------------------------------------ @dataclass class _Section: section_id: str section_num: int title: str raw_content: str heading_number: Optional[str] = None v4_alias_keys: list = field(default_factory=list) sub_sections: list = field(default_factory=list) @dataclass class _V4Candidate: template_id: str frame_id: str frame_number: int confidence: float label: str @dataclass class _Unit: source_section_ids: list merge_type: str frame_template_id: str frame_id: str frame_number: int confidence: float label: str phase_z_status: str raw_content: str title: str score: float v4_rank: Optional[int] = 1 selection_path: str = "rank_1" fallback_reason: Optional[str] = None rationale: dict = field(default_factory=dict) auto_selectable: bool = True filter_reasons: list = field(default_factory=list) notes: list = field(default_factory=list) v4_candidates: list = field(default_factory=list) provisional: bool = False def _make_kwargs(**overrides: Any) -> dict[str, Any]: cand = _V4Candidate( template_id="tpl_a", frame_id="fid_a", frame_number=13, confidence=0.91, label="use_as_is", ) section = _Section( section_id="03-1", section_num=1, title="DX status", raw_content="- bullet one\n- bullet two", ) unit = _Unit( source_section_ids=["03-1"], merge_type="single", frame_template_id="tpl_a", frame_id="fid_a", frame_number=13, confidence=0.91, label="use_as_is", phase_z_status="auto_renderable", raw_content="- bullet one\n- bullet two", title="DX status", score=0.91, v4_candidates=[cand], ) kwargs: dict[str, Any] = dict( mdx_source_text="# Slide\n\n## 03-1 DX status\n\n- bullet one\n- bullet two\n", slide_title="Slide", slide_footer=None, sections=[section], stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, stage0_normalized_assets={"popups": [], "images": [], "tables": []}, v4_evidence=[ { "section_id": "03-1", "v4_candidates": [ { "template_id": "tpl_a", "frame_id": "fid_a", "frame_number": 13, "confidence": 0.91, "label": "use_as_is", } ], "candidate_status": "ok", } ], layout_preset_pre_override="single", units=[unit], comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}}, v4_fallback_traces={"03-1": {"selection_path": "rank_1"}}, ai_preflight={"enabled": False, "skipped": True}, ) kwargs.update(overrides) return kwargs # -- success path --------------------------------------------------------- def test_writes_snapshot_file_at_run_dir_root(tmp_path: Path): rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) assert rv == SNAPSHOT_FILENAME fpath = tmp_path / SNAPSHOT_FILENAME assert fpath.exists(), f"snapshot not written at {fpath}" def test_written_snapshot_validates(tmp_path: Path): kwargs = _make_kwargs() rv = _pz2._write_reuse_snapshot(tmp_path, **kwargs) assert rv == SNAPSHOT_FILENAME snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) # mdx_sha256 is derived from mdx_source_text — recompute to verify # the helper is hashing the UTF-8 bytes of the same source we passed. import hashlib as _hl expected_sha = _hl.sha256( kwargs["mdx_source_text"].encode("utf-8") ).hexdigest() validate_snapshot(snap, expected_mdx_sha256=expected_sha) def test_snapshot_has_correct_schema_version(tmp_path: Path): _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) assert snap["schema_version"] == SNAPSHOT_VERSION def test_snapshot_records_layout_preset_pre_override(tmp_path: Path): _pz2._write_reuse_snapshot( tmp_path, **_make_kwargs(layout_preset_pre_override="horizontal-2") ) snap = json.loads((tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8")) assert snap["layout_preset_pre_override"]["value"] == "horizontal-2" def test_snapshot_is_utf8_encoded_with_non_ascii_content(tmp_path: Path): _pz2._write_reuse_snapshot( tmp_path, **_make_kwargs( slide_title="설계 방식의 왜곡", mdx_source_text="# 설계 방식\n\n- 한글 bullet\n", ), ) # ensure_ascii=False is intentional so Korean text round-trips # readable; if a future refactor drops it the bytes change but the # JSON still parses — we assert the file is decodable AS utf-8 and # the value survives the round trip. raw = (tmp_path / SNAPSHOT_FILENAME).read_text(encoding="utf-8") snap = json.loads(raw) assert snap["slide_title"]["value"] == "설계 방식의 왜곡" # -- failure path --------------------------------------------------------- def test_failure_warns_and_returns_none(tmp_path: Path, monkeypatch, capsys): """When ``build_snapshot`` raises, the helper must NOT propagate the exception — it WARNS on stderr and returns ``None`` so the main pipeline run continues.""" def _boom(**_kwargs): raise RuntimeError("synthetic build failure") monkeypatch.setattr(_pz2, "build_snapshot", _boom) rv = _pz2._write_reuse_snapshot(tmp_path, **_make_kwargs()) assert rv is None captured = capsys.readouterr() assert "reuse-snapshot" in captured.err assert "WARN" in captured.err assert "RuntimeError" in captured.err # File MUST NOT exist on failure (no partial JSON on disk). assert not (tmp_path / SNAPSHOT_FILENAME).exists() def test_failure_on_unwritable_run_dir_warns_and_returns_none( tmp_path: Path, monkeypatch, capsys ): """Simulate disk write failure: helper warns + returns None, never raises out to the caller (Stage 2 guardrail: optional sidecar).""" nonexistent = tmp_path / "does" / "not" / "exist" # nonexistent.exists() is False — Path.write_text raises FileNotFoundError. rv = _pz2._write_reuse_snapshot(nonexistent, **_make_kwargs()) assert rv is None captured = capsys.readouterr() assert "reuse-snapshot" in captured.err assert "WARN" in captured.err # FileNotFoundError specifically — sanity-check the type surfaces in # the warning so debugging is not blind. assert "FileNotFoundError" in captured.err # -- pipeline integration anchors ----------------------------------------- def test_pipeline_imports_helper_and_constant(): """The pipeline module must expose the helper for the post-Step-6 call site, and the constant must round-trip from the snapshot module (single source of truth).""" assert hasattr(_pz2, "_write_reuse_snapshot") assert callable(_pz2._write_reuse_snapshot) assert _pz2.SNAPSHOT_FILENAME == "_reuse_snapshot.json" def test_pipeline_call_site_follows_step06_artifact_write(): """Structural guard: the helper must be invoked AFTER the Step 6 artifact write in ``run_phase_z2_mvp1`` so the sidecar lands next to ``steps/step06_composition_plan.json`` (Stage 2 spec).""" source = Path(_pz2.__file__).read_text(encoding="utf-8") # Locate the step06 artifact write call site by its locked name arg. step06_marker = '6, "composition_plan"' idx_step06 = source.find(step06_marker) assert idx_step06 != -1, "step06 artifact write call site missing" # The helper call must appear AFTER the step06 marker. idx_helper = source.find("_write_reuse_snapshot(", idx_step06) assert idx_helper != -1, "u3 helper call missing after step06 write" def test_pipeline_step06_artifact_data_records_snapshot_path(): """Structural guard: the Step 6 artifact data dict must include the ``reuse_snapshot_path`` field so a future ``--reuse-from`` consumer can locate the expected sidecar via the canonical step artifact (Stage 2 spec — informational; absence of the file is u4's fail-closed concern).""" source = Path(_pz2.__file__).read_text(encoding="utf-8") step06_marker = '6, "composition_plan"' idx_step06 = source.find(step06_marker) assert idx_step06 != -1 # Search a generous window after the marker for the field key. window = source[idx_step06 : idx_step06 + 8000] assert '"reuse_snapshot_path"' in window assert "SNAPSHOT_FILENAME" in window