"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``. Scope mirror of the production module (Stage 2 u2): * ``build_snapshot`` shape, provenance, JSON round-trip, required keys. * ``serialize_section`` / ``serialize_unit`` field preservation, including the duck-typed ``v4_candidates`` shape (template_id / frame_id / frame_number / confidence / label). * ``validate_snapshot`` fail-closed paths: non-dict input, schema version mismatch, missing/empty/non-string ``mdx_sha256``, sha mismatch, missing required keys, unwrapped wrapper, wrapper missing a provenance field. * Module-level constants exposed for u3 / u4 / u4b consumers. The tests use synthetic duck-typed dataclasses so the snapshot module's external surface is exercised without coupling to the production ``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts. That mirrors the production module's intentional duck-typing (no imports from ``phase_z2_pipeline`` / ``phase_z2_composition``). """ from __future__ import annotations import json from dataclasses import dataclass, field from typing import Any, Optional import pytest from src.phase_z2_reuse_snapshot import ( REQUIRED_TOP_LEVEL_KEYS, SNAPSHOT_FILENAME, SNAPSHOT_VERSION, SnapshotValidationError, build_snapshot, serialize_section, serialize_unit, validate_snapshot, ) # -- synthetic duck-typed inputs ------------------------------------------ @dataclass class _Section: section_id: str section_num: int title: str raw_content: str heading_number: Optional[str] = None v4_alias_keys: list = field(default_factory=list) sub_sections: list = field(default_factory=list) @dataclass class _V4Candidate: template_id: str frame_id: str frame_number: int confidence: float label: str v4_rank: Optional[int] = None @dataclass class _Unit: source_section_ids: list merge_type: str frame_template_id: str frame_id: str frame_number: int confidence: float label: str phase_z_status: str raw_content: str title: str score: float v4_rank: Optional[int] = 1 selection_path: str = "rank_1" fallback_reason: Optional[str] = None rationale: dict = field(default_factory=dict) auto_selectable: bool = True filter_reasons: list = field(default_factory=list) notes: list = field(default_factory=list) v4_candidates: list = field(default_factory=list) provisional: bool = False def _make_section(**overrides: Any) -> _Section: base = dict( section_id="03-1", section_num=1, title="DX status", raw_content="- bullet one\n- bullet two", ) base.update(overrides) return _Section(**base) def _make_unit(**overrides: Any) -> _Unit: cand = _V4Candidate( template_id="tpl_a", frame_id="fid_a", frame_number=13, confidence=0.91, label="use_as_is", ) base: dict[str, Any] = dict( source_section_ids=["03-1"], merge_type="single", frame_template_id="tpl_a", frame_id="fid_a", frame_number=13, confidence=0.91, label="use_as_is", phase_z_status="auto_renderable", raw_content="- bullet one\n- bullet two", title="DX status", score=0.91, v4_candidates=[cand], ) base.update(overrides) return _Unit(**base) def _make_build_kwargs(**overrides: Any) -> dict[str, Any]: kwargs: dict[str, Any] = dict( mdx_sha256="a" * 64, slide_title="Title", slide_footer="Footer", sections=[_make_section()], stage0_adapter_diagnostics={"used": True, "fallback_reason": None}, stage0_normalized_assets={"popups": [], "images": [], "tables": []}, v4_evidence=[{"section_id": "03-1", "v4_candidates": []}], layout_preset_pre_override="horizontal-2", units=[_make_unit()], comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}}, v4_fallback_traces={"03-1": {"selection_path": "rank_1"}}, ai_preflight={"enabled": False, "skipped": True}, ) kwargs.update(overrides) return kwargs # -- module constants ----------------------------------------------------- def test_snapshot_filename_constant(): assert SNAPSHOT_FILENAME == "_reuse_snapshot.json" def test_snapshot_version_is_positive_int(): assert isinstance(SNAPSHOT_VERSION, int) assert SNAPSHOT_VERSION >= 1 def test_required_keys_include_contract_and_payload(): # Bare contract / integrity keys. assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS # Payload axes per Stage 2 plan. for k in ( "slide_title", "slide_footer", "sections", "stage0_adapter_diagnostics", "stage0_normalized_assets", "v4_evidence", "layout_preset_pre_override", "units", "comp_debug", "v4_fallback_traces", "ai_preflight", ): assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}" # -- build_snapshot ------------------------------------------------------- def test_build_snapshot_round_trips_through_json(): snap = build_snapshot(**_make_build_kwargs()) payload = json.dumps(snap) loaded = json.loads(payload) assert loaded["schema_version"] == SNAPSHOT_VERSION assert loaded["mdx_sha256"] == "a" * 64 def test_build_snapshot_has_all_required_keys(): snap = build_snapshot(**_make_build_kwargs()) for key in REQUIRED_TOP_LEVEL_KEYS: assert key in snap, f"build_snapshot missing required key: {key}" def test_build_snapshot_bare_keys_are_unwrapped_scalars(): snap = build_snapshot(**_make_build_kwargs()) assert snap["schema_version"] == SNAPSHOT_VERSION assert snap["mdx_sha256"] == "a" * 64 # bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly. assert not isinstance(snap["schema_version"], dict) assert not isinstance(snap["mdx_sha256"], dict) def test_build_snapshot_provenance_wrapper_shape(): snap = build_snapshot(**_make_build_kwargs()) bare = {"schema_version", "mdx_sha256"} for key, entry in snap.items(): if key in bare: continue assert isinstance(entry, dict), f"{key} is not wrapped" assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key assert isinstance(entry["source_path"], str) and entry["source_path"] assert isinstance(entry["upstream_step"], str) assert entry["upstream_step"].startswith("step"), entry["upstream_step"] def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary(): """No ``upstream_step`` may point outside the Step 0/2/5/6 reuse boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would silently invite work outside the reuse window — fail loudly. Step 01's contribution is the ``mdx_sha256`` integrity key (a bare contract scalar with no wrapper) so step01 does not need to appear in payload provenance. """ snap = build_snapshot(**_make_build_kwargs()) allowed = {"step00", "step02", "step05", "step06"} for key, entry in snap.items(): if key in {"schema_version", "mdx_sha256"}: continue assert entry["upstream_step"] in allowed, ( f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary" ) def test_build_snapshot_units_carry_v4_candidates(): snap = build_snapshot(**_make_build_kwargs()) units = snap["units"]["value"] assert len(units) == 1 assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a" assert units[0]["v4_candidates"][0]["frame_number"] == 13 assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91) def test_build_snapshot_sections_preserve_alias_keys_and_subsections(): sec = _make_section( section_id="04-2", v4_alias_keys=["04-2.1"], sub_sections=[{"id": "04-2-sub-1"}], heading_number="2.1", ) snap = build_snapshot(**_make_build_kwargs(sections=[sec])) payload = snap["sections"]["value"] assert payload[0]["section_id"] == "04-2" assert payload[0]["v4_alias_keys"] == ["04-2.1"] assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}] assert payload[0]["heading_number"] == "2.1" def test_build_snapshot_units_provenance_points_at_step06(): snap = build_snapshot(**_make_build_kwargs()) assert "step06_composition_plan.json" in snap["units"]["source_path"] assert snap["units"]["upstream_step"] == "step06" def test_build_snapshot_v4_evidence_provenance_points_at_step05(): snap = build_snapshot(**_make_build_kwargs()) assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"] assert snap["v4_evidence"]["upstream_step"] == "step05" def test_build_snapshot_ai_preflight_provenance_points_at_step00(): snap = build_snapshot(**_make_build_kwargs()) assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"] assert snap["ai_preflight"]["upstream_step"] == "step00" def test_build_snapshot_rejects_unjsonable_input(): bad_unit = _make_unit() bad_unit.notes.append(object()) # not JSON-safe with pytest.raises(TypeError): build_snapshot(**_make_build_kwargs(units=[bad_unit])) def test_build_snapshot_handles_none_optional_fields(): snap = build_snapshot( **_make_build_kwargs( slide_title=None, slide_footer=None, stage0_adapter_diagnostics=None, stage0_normalized_assets=None, comp_debug=None, v4_fallback_traces=None, ai_preflight=None, ) ) # None inputs land as None / {} consistently — never raise. assert snap["slide_title"]["value"] is None assert snap["slide_footer"]["value"] is None assert snap["stage0_adapter_diagnostics"]["value"] == {} assert snap["stage0_normalized_assets"]["value"] == {} assert snap["comp_debug"]["value"] == {} assert snap["v4_fallback_traces"]["value"] == {} assert snap["ai_preflight"]["value"] == {} # -- serializer helpers --------------------------------------------------- def test_serialize_section_preserves_all_documented_fields(): sec = _make_section( heading_number="1.1", v4_alias_keys=["03-1.x"], sub_sections=[{"id": "s"}], ) out = serialize_section(sec) assert out["section_id"] == "03-1" assert out["section_num"] == 1 assert out["title"] == "DX status" assert out["raw_content"].startswith("- bullet") assert out["heading_number"] == "1.1" assert out["v4_alias_keys"] == ["03-1.x"] assert out["sub_sections"] == [{"id": "s"}] def test_serialize_section_works_with_missing_optional_attrs(): class _Minimal: section_id = "x" section_num = 0 title = "t" raw_content = "r" out = serialize_section(_Minimal()) assert out["heading_number"] is None assert out["v4_alias_keys"] == [] assert out["sub_sections"] == [] def test_serialize_unit_v4_candidates_unwrap_to_named_attrs(): unit = _make_unit() out = serialize_unit(unit) cand = out["v4_candidates"][0] assert cand == { "template_id": "tpl_a", "frame_id": "fid_a", "frame_number": 13, "confidence": pytest.approx(0.91), "label": "use_as_is", # u4 follow-up — Step 9 application-plan payload reads # ``c.v4_rank`` off each rehydrated candidate. Snapshot # serializer persists it via ``getattr(c, 'v4_rank', None)`` so # legacy duck types (no v4_rank attr) get None and modern V4Match # instances carry their rank (1/2/3/...). "v4_rank": None, } def test_serialize_unit_v4_candidates_persist_v4_rank_when_present(): """A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips.""" ranked_cand = _V4Candidate( template_id="tpl_b", frame_id="fid_b", frame_number=14, confidence=0.82, label="light_edit", v4_rank=2, ) unit = _make_unit(v4_candidates=[ranked_cand]) out = serialize_unit(unit) assert out["v4_candidates"][0]["v4_rank"] == 2 def test_serialize_unit_handles_empty_v4_candidates(): unit = _make_unit(v4_candidates=[]) out = serialize_unit(unit) assert out["v4_candidates"] == [] def test_serialize_unit_provisional_default_false(): unit = _make_unit() assert serialize_unit(unit)["provisional"] is False def test_serialize_unit_provisional_true_preserved(): unit = _make_unit(provisional=True) assert serialize_unit(unit)["provisional"] is True def test_serialize_unit_round_trips_through_json(): out = serialize_unit(_make_unit()) reloaded = json.loads(json.dumps(out)) assert reloaded["source_section_ids"] == ["03-1"] assert reloaded["frame_template_id"] == "tpl_a" # -- validate_snapshot ---------------------------------------------------- def test_validate_snapshot_accepts_well_formed(): snap = build_snapshot(**_make_build_kwargs()) validate_snapshot(snap, expected_mdx_sha256="a" * 64) def test_validate_snapshot_rejects_non_dict_input(): with pytest.raises(SnapshotValidationError): validate_snapshot("not a dict", expected_mdx_sha256="a" * 64) def test_validate_snapshot_rejects_version_mismatch(): snap = build_snapshot(**_make_build_kwargs()) snap["schema_version"] = SNAPSHOT_VERSION + 999 with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "schema_version" in str(exc.value) def test_validate_snapshot_rejects_missing_sha(): snap = build_snapshot(**_make_build_kwargs()) del snap["mdx_sha256"] with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "mdx_sha256" in str(exc.value) def test_validate_snapshot_rejects_empty_sha(): snap = build_snapshot(**_make_build_kwargs()) snap["mdx_sha256"] = "" with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "mdx_sha256" in str(exc.value) def test_validate_snapshot_rejects_non_string_sha(): snap = build_snapshot(**_make_build_kwargs()) snap["mdx_sha256"] = 12345 with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "mdx_sha256" in str(exc.value) def test_validate_snapshot_rejects_sha_mismatch(): snap = build_snapshot(**_make_build_kwargs()) with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="b" * 64) assert "mdx_sha256 mismatch" in str(exc.value) def test_validate_snapshot_rejects_missing_required_key(): snap = build_snapshot(**_make_build_kwargs()) del snap["units"] with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "units" in str(exc.value) def test_validate_snapshot_rejects_unwrapped_payload_key(): snap = build_snapshot(**_make_build_kwargs()) snap["units"] = "not a dict" with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "units" in str(exc.value) def test_validate_snapshot_rejects_wrapper_missing_value(): snap = build_snapshot(**_make_build_kwargs()) snap["units"] = {"source_path": "x", "upstream_step": "step06"} with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "value" in str(exc.value) def test_validate_snapshot_rejects_wrapper_missing_source_path(): snap = build_snapshot(**_make_build_kwargs()) snap["units"] = {"value": [], "upstream_step": "step06"} with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "source_path" in str(exc.value) def test_validate_snapshot_rejects_wrapper_missing_upstream_step(): snap = build_snapshot(**_make_build_kwargs()) snap["units"] = {"value": [], "source_path": "x"} with pytest.raises(SnapshotValidationError) as exc: validate_snapshot(snap, expected_mdx_sha256="a" * 64) assert "upstream_step" in str(exc.value) def test_validate_snapshot_error_subclasses_value_error(): snap = build_snapshot(**_make_build_kwargs()) snap["schema_version"] = 999 # u4b will pre-catch SnapshotValidationError, but the broader # `except ValueError` net must still pick this up. with pytest.raises(ValueError): validate_snapshot(snap, expected_mdx_sha256="a" * 64)