C.E.L_Slide_test2/tests/test_phase_z2_reuse_snapshot.py

"""IMP-43 (#72) u2 — unit tests for ``src.phase_z2_reuse_snapshot``.

Scope mirror of the production module (Stage 2 u2):

* ``build_snapshot`` shape, provenance, JSON round-trip, required keys.
* ``serialize_section`` / ``serialize_unit`` field preservation, including
  the duck-typed ``v4_candidates`` shape (template_id / frame_id /
  frame_number / confidence / label).
* ``validate_snapshot`` fail-closed paths: non-dict input, schema
  version mismatch, missing/empty/non-string ``mdx_sha256``, sha
  mismatch, missing required keys, unwrapped wrapper, wrapper missing
  a provenance field.
* Module-level constants exposed for u3 / u4 / u4b consumers.

The tests use synthetic duck-typed dataclasses so the snapshot module's
external surface is exercised without coupling to the production
``MdxSection`` / ``CompositionUnit`` / ``V4Match`` dataclass layouts.
That mirrors the production module's intentional duck-typing (no
imports from ``phase_z2_pipeline`` / ``phase_z2_composition``).
"""
from __future__ import annotations

import json
from dataclasses import dataclass, field
from typing import Any, Optional

import pytest

from src.phase_z2_reuse_snapshot import (
    REQUIRED_TOP_LEVEL_KEYS,
    SNAPSHOT_FILENAME,
    SNAPSHOT_VERSION,
    SnapshotValidationError,
    build_snapshot,
    serialize_section,
    serialize_unit,
    validate_snapshot,
)


# -- synthetic duck-typed inputs ------------------------------------------


@dataclass
class _Section:
    section_id: str
    section_num: int
    title: str
    raw_content: str
    heading_number: Optional[str] = None
    v4_alias_keys: list = field(default_factory=list)
    sub_sections: list = field(default_factory=list)


@dataclass
class _V4Candidate:
    template_id: str
    frame_id: str
    frame_number: int
    confidence: float
    label: str
    v4_rank: Optional[int] = None


@dataclass
class _Unit:
    source_section_ids: list
    merge_type: str
    frame_template_id: str
    frame_id: str
    frame_number: int
    confidence: float
    label: str
    phase_z_status: str
    raw_content: str
    title: str
    score: float
    v4_rank: Optional[int] = 1
    selection_path: str = "rank_1"
    fallback_reason: Optional[str] = None
    rationale: dict = field(default_factory=dict)
    auto_selectable: bool = True
    filter_reasons: list = field(default_factory=list)
    notes: list = field(default_factory=list)
    v4_candidates: list = field(default_factory=list)
    provisional: bool = False


def _make_section(**overrides: Any) -> _Section:
    base = dict(
        section_id="03-1",
        section_num=1,
        title="DX status",
        raw_content="- bullet one\n- bullet two",
    )
    base.update(overrides)
    return _Section(**base)


def _make_unit(**overrides: Any) -> _Unit:
    cand = _V4Candidate(
        template_id="tpl_a",
        frame_id="fid_a",
        frame_number=13,
        confidence=0.91,
        label="use_as_is",
    )
    base: dict[str, Any] = dict(
        source_section_ids=["03-1"],
        merge_type="single",
        frame_template_id="tpl_a",
        frame_id="fid_a",
        frame_number=13,
        confidence=0.91,
        label="use_as_is",
        phase_z_status="auto_renderable",
        raw_content="- bullet one\n- bullet two",
        title="DX status",
        score=0.91,
        v4_candidates=[cand],
    )
    base.update(overrides)
    return _Unit(**base)


def _make_build_kwargs(**overrides: Any) -> dict[str, Any]:
    kwargs: dict[str, Any] = dict(
        mdx_sha256="a" * 64,
        slide_title="Title",
        slide_footer="Footer",
        sections=[_make_section()],
        stage0_adapter_diagnostics={"used": True, "fallback_reason": None},
        stage0_normalized_assets={"popups": [], "images": [], "tables": []},
        v4_evidence=[{"section_id": "03-1", "v4_candidates": []}],
        layout_preset_pre_override="horizontal-2",
        units=[_make_unit()],
        comp_debug={"v4_fallback_summary": {"fallback_used_count": 0}},
        v4_fallback_traces={"03-1": {"selection_path": "rank_1"}},
        ai_preflight={"enabled": False, "skipped": True},
    )
    kwargs.update(overrides)
    return kwargs


# -- module constants -----------------------------------------------------


def test_snapshot_filename_constant():
    assert SNAPSHOT_FILENAME == "_reuse_snapshot.json"


def test_snapshot_version_is_positive_int():
    assert isinstance(SNAPSHOT_VERSION, int)
    assert SNAPSHOT_VERSION >= 1


def test_required_keys_include_contract_and_payload():
    # Bare contract / integrity keys.
    assert "schema_version" in REQUIRED_TOP_LEVEL_KEYS
    assert "mdx_sha256" in REQUIRED_TOP_LEVEL_KEYS
    # Payload axes per Stage 2 plan.
    for k in (
        "slide_title",
        "slide_footer",
        "sections",
        "stage0_adapter_diagnostics",
        "stage0_normalized_assets",
        "v4_evidence",
        "layout_preset_pre_override",
        "units",
        "comp_debug",
        "v4_fallback_traces",
        "ai_preflight",
    ):
        assert k in REQUIRED_TOP_LEVEL_KEYS, f"missing from REQUIRED_TOP_LEVEL_KEYS: {k}"


# -- build_snapshot -------------------------------------------------------


def test_build_snapshot_round_trips_through_json():
    snap = build_snapshot(**_make_build_kwargs())
    payload = json.dumps(snap)
    loaded = json.loads(payload)
    assert loaded["schema_version"] == SNAPSHOT_VERSION
    assert loaded["mdx_sha256"] == "a" * 64


def test_build_snapshot_has_all_required_keys():
    snap = build_snapshot(**_make_build_kwargs())
    for key in REQUIRED_TOP_LEVEL_KEYS:
        assert key in snap, f"build_snapshot missing required key: {key}"


def test_build_snapshot_bare_keys_are_unwrapped_scalars():
    snap = build_snapshot(**_make_build_kwargs())
    assert snap["schema_version"] == SNAPSHOT_VERSION
    assert snap["mdx_sha256"] == "a" * 64
    # bare keys MUST NOT be wrapped — u4b mdx_sha256 check reads directly.
    assert not isinstance(snap["schema_version"], dict)
    assert not isinstance(snap["mdx_sha256"], dict)


def test_build_snapshot_provenance_wrapper_shape():
    snap = build_snapshot(**_make_build_kwargs())
    bare = {"schema_version", "mdx_sha256"}
    for key, entry in snap.items():
        if key in bare:
            continue
        assert isinstance(entry, dict), f"{key} is not wrapped"
        assert set(entry.keys()) == {"value", "source_path", "upstream_step"}, key
        assert isinstance(entry["source_path"], str) and entry["source_path"]
        assert isinstance(entry["upstream_step"], str)
        assert entry["upstream_step"].startswith("step"), entry["upstream_step"]


def test_build_snapshot_upstream_steps_stay_inside_reuse_boundary():
    """No ``upstream_step`` may point outside the Step 0/2/5/6 reuse
    boundary (Stage 1 root_cause). A drift to e.g. ``step09`` would
    silently invite work outside the reuse window — fail loudly.

    Step 01's contribution is the ``mdx_sha256`` integrity key (a bare
    contract scalar with no wrapper) so step01 does not need to appear
    in payload provenance.
    """
    snap = build_snapshot(**_make_build_kwargs())
    allowed = {"step00", "step02", "step05", "step06"}
    for key, entry in snap.items():
        if key in {"schema_version", "mdx_sha256"}:
            continue
        assert entry["upstream_step"] in allowed, (
            f"key {key!r}: upstream_step {entry['upstream_step']!r} outside reuse boundary"
        )


def test_build_snapshot_units_carry_v4_candidates():
    snap = build_snapshot(**_make_build_kwargs())
    units = snap["units"]["value"]
    assert len(units) == 1
    assert units[0]["v4_candidates"][0]["template_id"] == "tpl_a"
    assert units[0]["v4_candidates"][0]["frame_number"] == 13
    assert units[0]["v4_candidates"][0]["confidence"] == pytest.approx(0.91)


def test_build_snapshot_sections_preserve_alias_keys_and_subsections():
    sec = _make_section(
        section_id="04-2",
        v4_alias_keys=["04-2.1"],
        sub_sections=[{"id": "04-2-sub-1"}],
        heading_number="2.1",
    )
    snap = build_snapshot(**_make_build_kwargs(sections=[sec]))
    payload = snap["sections"]["value"]
    assert payload[0]["section_id"] == "04-2"
    assert payload[0]["v4_alias_keys"] == ["04-2.1"]
    assert payload[0]["sub_sections"] == [{"id": "04-2-sub-1"}]
    assert payload[0]["heading_number"] == "2.1"


def test_build_snapshot_units_provenance_points_at_step06():
    snap = build_snapshot(**_make_build_kwargs())
    assert "step06_composition_plan.json" in snap["units"]["source_path"]
    assert snap["units"]["upstream_step"] == "step06"


def test_build_snapshot_v4_evidence_provenance_points_at_step05():
    snap = build_snapshot(**_make_build_kwargs())
    assert "step05_v4_evidence.json" in snap["v4_evidence"]["source_path"]
    assert snap["v4_evidence"]["upstream_step"] == "step05"


def test_build_snapshot_ai_preflight_provenance_points_at_step00():
    snap = build_snapshot(**_make_build_kwargs())
    assert "step00_preconditions.json" in snap["ai_preflight"]["source_path"]
    assert snap["ai_preflight"]["upstream_step"] == "step00"


def test_build_snapshot_rejects_unjsonable_input():
    bad_unit = _make_unit()
    bad_unit.notes.append(object())  # not JSON-safe
    with pytest.raises(TypeError):
        build_snapshot(**_make_build_kwargs(units=[bad_unit]))


def test_build_snapshot_handles_none_optional_fields():
    snap = build_snapshot(
        **_make_build_kwargs(
            slide_title=None,
            slide_footer=None,
            stage0_adapter_diagnostics=None,
            stage0_normalized_assets=None,
            comp_debug=None,
            v4_fallback_traces=None,
            ai_preflight=None,
        )
    )
    # None inputs land as None / {} consistently — never raise.
    assert snap["slide_title"]["value"] is None
    assert snap["slide_footer"]["value"] is None
    assert snap["stage0_adapter_diagnostics"]["value"] == {}
    assert snap["stage0_normalized_assets"]["value"] == {}
    assert snap["comp_debug"]["value"] == {}
    assert snap["v4_fallback_traces"]["value"] == {}
    assert snap["ai_preflight"]["value"] == {}


# -- serializer helpers ---------------------------------------------------


def test_serialize_section_preserves_all_documented_fields():
    sec = _make_section(
        heading_number="1.1",
        v4_alias_keys=["03-1.x"],
        sub_sections=[{"id": "s"}],
    )
    out = serialize_section(sec)
    assert out["section_id"] == "03-1"
    assert out["section_num"] == 1
    assert out["title"] == "DX status"
    assert out["raw_content"].startswith("- bullet")
    assert out["heading_number"] == "1.1"
    assert out["v4_alias_keys"] == ["03-1.x"]
    assert out["sub_sections"] == [{"id": "s"}]


def test_serialize_section_works_with_missing_optional_attrs():
    class _Minimal:
        section_id = "x"
        section_num = 0
        title = "t"
        raw_content = "r"
    out = serialize_section(_Minimal())
    assert out["heading_number"] is None
    assert out["v4_alias_keys"] == []
    assert out["sub_sections"] == []


def test_serialize_unit_v4_candidates_unwrap_to_named_attrs():
    unit = _make_unit()
    out = serialize_unit(unit)
    cand = out["v4_candidates"][0]
    assert cand == {
        "template_id": "tpl_a",
        "frame_id": "fid_a",
        "frame_number": 13,
        "confidence": pytest.approx(0.91),
        "label": "use_as_is",
        # u4 follow-up — Step 9 application-plan payload reads
        # ``c.v4_rank`` off each rehydrated candidate. Snapshot
        # serializer persists it via ``getattr(c, 'v4_rank', None)`` so
        # legacy duck types (no v4_rank attr) get None and modern V4Match
        # instances carry their rank (1/2/3/...).
        "v4_rank": None,
    }


def test_serialize_unit_v4_candidates_persist_v4_rank_when_present():
    """A v4_candidate with v4_rank=2 (V4Match-shape duck type) round-trips."""
    ranked_cand = _V4Candidate(
        template_id="tpl_b",
        frame_id="fid_b",
        frame_number=14,
        confidence=0.82,
        label="light_edit",
        v4_rank=2,
    )
    unit = _make_unit(v4_candidates=[ranked_cand])
    out = serialize_unit(unit)
    assert out["v4_candidates"][0]["v4_rank"] == 2


def test_serialize_unit_handles_empty_v4_candidates():
    unit = _make_unit(v4_candidates=[])
    out = serialize_unit(unit)
    assert out["v4_candidates"] == []


def test_serialize_unit_provisional_default_false():
    unit = _make_unit()
    assert serialize_unit(unit)["provisional"] is False


def test_serialize_unit_provisional_true_preserved():
    unit = _make_unit(provisional=True)
    assert serialize_unit(unit)["provisional"] is True


def test_serialize_unit_round_trips_through_json():
    out = serialize_unit(_make_unit())
    reloaded = json.loads(json.dumps(out))
    assert reloaded["source_section_ids"] == ["03-1"]
    assert reloaded["frame_template_id"] == "tpl_a"


# -- validate_snapshot ----------------------------------------------------


def test_validate_snapshot_accepts_well_formed():
    snap = build_snapshot(**_make_build_kwargs())
    validate_snapshot(snap, expected_mdx_sha256="a" * 64)


def test_validate_snapshot_rejects_non_dict_input():
    with pytest.raises(SnapshotValidationError):
        validate_snapshot("not a dict", expected_mdx_sha256="a" * 64)


def test_validate_snapshot_rejects_version_mismatch():
    snap = build_snapshot(**_make_build_kwargs())
    snap["schema_version"] = SNAPSHOT_VERSION + 999
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "schema_version" in str(exc.value)


def test_validate_snapshot_rejects_missing_sha():
    snap = build_snapshot(**_make_build_kwargs())
    del snap["mdx_sha256"]
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "mdx_sha256" in str(exc.value)


def test_validate_snapshot_rejects_empty_sha():
    snap = build_snapshot(**_make_build_kwargs())
    snap["mdx_sha256"] = ""
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "mdx_sha256" in str(exc.value)


def test_validate_snapshot_rejects_non_string_sha():
    snap = build_snapshot(**_make_build_kwargs())
    snap["mdx_sha256"] = 12345
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "mdx_sha256" in str(exc.value)


def test_validate_snapshot_rejects_sha_mismatch():
    snap = build_snapshot(**_make_build_kwargs())
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="b" * 64)
    assert "mdx_sha256 mismatch" in str(exc.value)


def test_validate_snapshot_rejects_missing_required_key():
    snap = build_snapshot(**_make_build_kwargs())
    del snap["units"]
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "units" in str(exc.value)


def test_validate_snapshot_rejects_unwrapped_payload_key():
    snap = build_snapshot(**_make_build_kwargs())
    snap["units"] = "not a dict"
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "units" in str(exc.value)


def test_validate_snapshot_rejects_wrapper_missing_value():
    snap = build_snapshot(**_make_build_kwargs())
    snap["units"] = {"source_path": "x", "upstream_step": "step06"}
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "value" in str(exc.value)


def test_validate_snapshot_rejects_wrapper_missing_source_path():
    snap = build_snapshot(**_make_build_kwargs())
    snap["units"] = {"value": [], "upstream_step": "step06"}
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "source_path" in str(exc.value)


def test_validate_snapshot_rejects_wrapper_missing_upstream_step():
    snap = build_snapshot(**_make_build_kwargs())
    snap["units"] = {"value": [], "source_path": "x"}
    with pytest.raises(SnapshotValidationError) as exc:
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)
    assert "upstream_step" in str(exc.value)


def test_validate_snapshot_error_subclasses_value_error():
    snap = build_snapshot(**_make_build_kwargs())
    snap["schema_version"] = 999
    # u4b will pre-catch SnapshotValidationError, but the broader
    # `except ValueError` net must still pick this up.
    with pytest.raises(ValueError):
        validate_snapshot(snap, expected_mdx_sha256="a" * 64)