feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)

- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook
- u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage)
- u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks)
- u12: coverage_invariant guard
- u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-22 00:17:46 +09:00
parent f358604fb3
commit 1186ad8ae2
23 changed files with 3901 additions and 111 deletions

View File

@@ -36,6 +36,7 @@ _ALLOWED_TOP_LEVEL: frozenset[str] = frozenset(
"ast",
"dataclasses",
"enum",
"hashlib",
"json",
"pathlib",
"random",

View File

@@ -1,32 +1,67 @@
"""IMP-33 u6AI fallback cache gate tests.
"""IMP-46 u2Persistent JSON cache backend tests.
Verifies the IMP-46 gate contract:
* ``read_proposal`` is a stub (returns None until IMP-46).
* ``save_proposal`` enforces both gates before any write attempt.
* Storage itself raises NotImplementedError (IMP-46 marker).
Scope (Stage 2 plan, u2):
* Replaced ``NotImplementedError`` marker with a real persistent backend
at ``data/frame_cache/{frame_id}/{signature_hash}.json``.
* Preserved IMP-33 u6 dual write gate: ``visual_check_passed`` AND
``user_approved`` BOTH required (loud :class:`AiFallbackCacheGateError`
before any filesystem touch).
* Round-trip every :class:`ProposalKind`; round-trip ``slide_css`` None
*and* set; missing or corrupt files miss silently.
* Fingerprint *comparison* is u3; here we only check that the field is
persisted.
All filesystem writes are scoped to ``tmp_path`` via
``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`, so the
production directory is never touched by these tests.
"""
from __future__ import annotations
import json
import pathlib
import pytest
from src.phase_z2_ai_fallback import cache as cache_mod
from src.phase_z2_ai_fallback.cache import (
AiFallbackCacheGateError,
KEY_DELIMITER,
SCHEMA_VERSION,
read_proposal,
save_proposal,
)
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
def _proposal() -> AiFallbackProposal:
_FRAME_ID = "1171281190"
_SIG_HASH = "a" * 64 # SHA256-shaped placeholder; cache is shape-agnostic.
_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}"
def _proposal(
kind: ProposalKind = ProposalKind.BUILDER_OPTIONS_PATCH,
payload: dict | None = None,
) -> AiFallbackProposal:
return AiFallbackProposal(
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
payload={"item_parser": "bullet_v2"},
rationale="u6-test",
proposal_kind=kind,
payload=payload if payload is not None else {"item_parser": "bullet_v2"},
rationale="u2-test",
)
def test_read_proposal_returns_none_for_any_key():
assert read_proposal("frame=foo|cardinality=3") is None
@pytest.fixture(autouse=True)
def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
"""Redirect the cache root to an isolated tmp directory for every test."""
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
yield tmp_path / "frame_cache"
# -- read_proposal --------------------------------------------------------
def test_read_proposal_returns_none_for_missing_file():
assert read_proposal(_KEY) is None
def test_read_proposal_rejects_empty_key():
@@ -34,10 +69,65 @@ def test_read_proposal_rejects_empty_key():
read_proposal("")
def test_read_proposal_rejects_non_string_key():
with pytest.raises(ValueError):
read_proposal(None) # type: ignore[arg-type]
def test_read_proposal_returns_none_for_legacy_key_format():
"""Router back-compat: pre-u4 cache_key (no '::') misses silently."""
assert read_proposal("frame:1171281190:cardinality:many") is None
def test_read_proposal_returns_none_for_corrupt_json(_isolated_cache_root: pathlib.Path):
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("{not valid json", encoding="utf-8")
assert read_proposal(_KEY) is None
def test_read_proposal_returns_none_for_non_dict_root(_isolated_cache_root: pathlib.Path):
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("[]", encoding="utf-8")
assert read_proposal(_KEY) is None
def test_read_proposal_returns_none_when_payload_proposal_missing(
_isolated_cache_root: pathlib.Path,
):
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(json.dumps({"schema_version": 1}), encoding="utf-8")
assert read_proposal(_KEY) is None
def test_read_proposal_returns_none_for_forbidden_proposal_kind(
_isolated_cache_root: pathlib.Path,
):
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"schema_version": 1,
"proposal": {"proposal_kind": "mdx_text", "payload": {}, "rationale": ""},
"slide_css": None,
"fingerprints": {},
}
),
encoding="utf-8",
)
assert read_proposal(_KEY) is None
# -- save_proposal: write gates -------------------------------------------
def test_save_rejects_when_visual_check_failed():
with pytest.raises(AiFallbackCacheGateError) as exc:
save_proposal(
"k", _proposal(), visual_check_passed=False, user_approved=True
_KEY, _proposal(), visual_check_passed=False, user_approved=True
)
assert "visual_check_passed" in str(exc.value)
@@ -45,7 +135,7 @@ def test_save_rejects_when_visual_check_failed():
def test_save_rejects_when_user_not_approved():
with pytest.raises(AiFallbackCacheGateError) as exc:
save_proposal(
"k", _proposal(), visual_check_passed=True, user_approved=False
_KEY, _proposal(), visual_check_passed=True, user_approved=False
)
assert "user_approved" in str(exc.value)
@@ -53,16 +143,20 @@ def test_save_rejects_when_user_not_approved():
def test_save_rejects_when_both_gates_false():
with pytest.raises(AiFallbackCacheGateError):
save_proposal(
"k", _proposal(), visual_check_passed=False, user_approved=False
_KEY, _proposal(), visual_check_passed=False, user_approved=False
)
def test_save_raises_not_implemented_when_both_gates_pass():
with pytest.raises(NotImplementedError) as exc:
def test_save_gate_violation_does_not_touch_filesystem(
_isolated_cache_root: pathlib.Path,
):
with pytest.raises(AiFallbackCacheGateError):
save_proposal(
"k", _proposal(), visual_check_passed=True, user_approved=True
_KEY, _proposal(), visual_check_passed=False, user_approved=True
)
assert "IMP-46" in str(exc.value)
# Cache root may or may not exist depending on fixture order, but the
# frame_id directory must NOT exist when the gate rejects the write.
assert not (_isolated_cache_root / _FRAME_ID).exists()
def test_save_rejects_empty_key():
@@ -75,16 +169,340 @@ def test_save_rejects_empty_key():
def test_save_rejects_non_proposal_object():
with pytest.raises(TypeError):
save_proposal(
"k",
_KEY,
{"proposal_kind": "builder_options_patch"}, # type: ignore[arg-type]
visual_check_passed=True,
user_approved=True,
)
def test_gate_error_is_not_notimplementederror():
with pytest.raises(AiFallbackCacheGateError):
def test_save_rejects_legacy_key_format():
"""Writes must use the structural ``frame_id::signature_hash`` form."""
with pytest.raises(ValueError):
save_proposal(
"k", _proposal(), visual_check_passed=False, user_approved=True
"frame:1171281190:cardinality:many",
_proposal(),
visual_check_passed=True,
user_approved=True,
)
def test_save_rejects_slide_css_non_string():
with pytest.raises(TypeError):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
slide_css=123, # type: ignore[arg-type]
)
def test_save_rejects_fingerprints_non_dict():
with pytest.raises(TypeError):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=["contract_sha", "abc"], # type: ignore[arg-type]
)
def test_gate_error_is_not_notimplementederror():
"""The persistent backend no longer raises ``NotImplementedError`` —
callers must distinguish gate violation from absent persistence."""
assert not issubclass(AiFallbackCacheGateError, NotImplementedError)
# -- save_proposal: persistence + round-trip ------------------------------
def test_save_creates_parent_directories(_isolated_cache_root: pathlib.Path):
assert not (_isolated_cache_root / _FRAME_ID).exists()
save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=True
)
assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file()
def test_save_returns_resolved_path(_isolated_cache_root: pathlib.Path):
path = save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=True
)
assert path == _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
def test_save_payload_includes_schema_version(_isolated_cache_root: pathlib.Path):
path = save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=True
)
data = json.loads(path.read_text(encoding="utf-8"))
assert data["schema_version"] == SCHEMA_VERSION
def test_save_payload_includes_proposal_dump(_isolated_cache_root: pathlib.Path):
proposal = _proposal(payload={"item_parser": "pillar_item"})
path = save_proposal(
_KEY, proposal, visual_check_passed=True, user_approved=True
)
data = json.loads(path.read_text(encoding="utf-8"))
assert data["proposal"] == proposal.model_dump(mode="json")
def test_round_trip_default_slide_css_is_none(_isolated_cache_root: pathlib.Path):
path = save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=True
)
data = json.loads(path.read_text(encoding="utf-8"))
assert data["slide_css"] is None
assert data["fingerprints"] == {}
def test_round_trip_with_slide_css_set(_isolated_cache_root: pathlib.Path):
css = ".slide { padding: 40px; }"
path = save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
slide_css=css,
)
data = json.loads(path.read_text(encoding="utf-8"))
assert data["slide_css"] == css
def test_round_trip_with_fingerprints(_isolated_cache_root: pathlib.Path):
fingerprints = {
"contract_sha": "c" * 64,
"partial_sha": "p" * 64,
"catalog_sha": "x" * 64,
}
path = save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=fingerprints,
)
data = json.loads(path.read_text(encoding="utf-8"))
assert data["fingerprints"] == fingerprints
def test_read_returns_proposal_after_save(_isolated_cache_root: pathlib.Path):
original = _proposal(payload={"key": "value"})
save_proposal(
_KEY, original, visual_check_passed=True, user_approved=True
)
loaded = read_proposal(_KEY)
assert loaded is not None
assert loaded.proposal_kind == original.proposal_kind
assert loaded.payload == original.payload
assert loaded.rationale == original.rationale
@pytest.mark.parametrize("kind", list(ProposalKind))
def test_round_trip_all_proposal_kinds(
kind: ProposalKind, _isolated_cache_root: pathlib.Path
):
"""Every whitelisted ProposalKind survives save → read unchanged."""
if kind is ProposalKind.PARTIAL_OVERRIDES:
payload = {"slots": {"pillar_1": "alpha"}}
elif kind is ProposalKind.SLOT_MAPPING_PROPOSAL:
payload = {"mapping": [{"from": "a", "to": "b"}]}
else:
payload = {"item_parser": "bullet_v2"}
save_proposal(
_KEY,
_proposal(kind=kind, payload=payload),
visual_check_passed=True,
user_approved=True,
)
loaded = read_proposal(_KEY)
assert loaded is not None
assert loaded.proposal_kind is kind
assert loaded.payload == payload
def test_save_overwrites_existing_entry(_isolated_cache_root: pathlib.Path):
save_proposal(
_KEY,
_proposal(payload={"v": 1}),
visual_check_passed=True,
user_approved=True,
)
save_proposal(
_KEY,
_proposal(payload={"v": 2}),
visual_check_passed=True,
user_approved=True,
)
loaded = read_proposal(_KEY)
assert loaded is not None
assert loaded.payload == {"v": 2}
def test_file_layout_uses_frame_id_directory(_isolated_cache_root: pathlib.Path):
"""Storage layout = ``frame_id/`` directory, ``signature_hash.json`` file."""
other_frame_key = f"{_FRAME_ID}_other{KEY_DELIMITER}{_SIG_HASH}"
save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=True
)
save_proposal(
other_frame_key,
_proposal(),
visual_check_passed=True,
user_approved=True,
)
assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file()
assert (
_isolated_cache_root / f"{_FRAME_ID}_other" / f"{_SIG_HASH}.json"
).is_file()
def test_different_signature_hashes_isolated(_isolated_cache_root: pathlib.Path):
"""Two distinct signature hashes under the same frame_id never collide."""
key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}"
key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}"
save_proposal(
key_a,
_proposal(payload={"sig": "a"}),
visual_check_passed=True,
user_approved=True,
)
save_proposal(
key_b,
_proposal(payload={"sig": "b"}),
visual_check_passed=True,
user_approved=True,
)
loaded_a = read_proposal(key_a)
loaded_b = read_proposal(key_b)
assert loaded_a is not None and loaded_a.payload == {"sig": "a"}
assert loaded_b is not None and loaded_b.payload == {"sig": "b"}
def test_parse_key_rejects_triple_delimiter():
"""Two ``::`` markers (extra delimiter inside signature) is rejected."""
assert (
read_proposal(
f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}{KEY_DELIMITER}extra"
)
is None
)
# -- IMP-46 u5: auto_cache gate (2^3 truth table) -------------------------
#
# Three booleans: visual_check_passed (V), user_approved (U), auto_cache (A).
# Contract: V=True AND (U=True OR A=True) -> persist; else gate-raise.
# V is never bypassable; A=True only relaxes U=False.
_GATE_TRUTH_TABLE = [
# (V, U, A, expect_persist)
(False, False, False, False),
(False, False, True, False),
(False, True, False, False),
(False, True, True, False),
(True, False, False, False),
(True, False, True, True),
(True, True, False, True),
(True, True, True, True),
]
@pytest.mark.parametrize("v,u,a,expect_persist", _GATE_TRUTH_TABLE)
def test_save_gate_truth_table(
v: bool,
u: bool,
a: bool,
expect_persist: bool,
_isolated_cache_root: pathlib.Path,
) -> None:
"""IMP-46 u5 — exhaustive 2^3 enumeration of (V, U, A) -> {persist, raise}."""
if expect_persist:
path = save_proposal(
_KEY,
_proposal(payload={"v": int(v), "u": int(u), "a": int(a)}),
visual_check_passed=v,
user_approved=u,
auto_cache=a,
)
assert path.is_file(), f"truth row (V={v}, U={u}, A={a}) must persist"
else:
with pytest.raises(AiFallbackCacheGateError):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=v,
user_approved=u,
auto_cache=a,
)
# Gate violations must never touch the filesystem (parent dir absent).
assert not (_isolated_cache_root / _FRAME_ID).exists(), (
f"truth row (V={v}, U={u}, A={a}) leaked a directory"
)
def test_auto_cache_default_off_preserves_dual_gate_semantics(
_isolated_cache_root: pathlib.Path,
) -> None:
"""Calling save_proposal without ``auto_cache`` keeps the IMP-46 u2 behaviour."""
with pytest.raises(AiFallbackCacheGateError) as exc:
save_proposal(
_KEY, _proposal(), visual_check_passed=True, user_approved=False
)
assert "user_approved" in str(exc.value)
assert not (_isolated_cache_root / _FRAME_ID).exists()
def test_auto_cache_cannot_bypass_visual_check() -> None:
"""``visual_check_passed=False`` raises even with ``auto_cache=True``."""
with pytest.raises(AiFallbackCacheGateError) as exc:
save_proposal(
_KEY,
_proposal(),
visual_check_passed=False,
user_approved=True,
auto_cache=True,
)
assert "visual_check_passed" in str(exc.value)
def test_auto_cache_bypass_user_approved_persists(
_isolated_cache_root: pathlib.Path,
) -> None:
"""``auto_cache=True`` with ``user_approved=False`` persists the proposal."""
path = save_proposal(
_KEY,
_proposal(payload={"bypass": "user"}),
visual_check_passed=True,
user_approved=False,
auto_cache=True,
)
assert path.is_file()
loaded = read_proposal(_KEY)
assert loaded is not None
assert loaded.payload == {"bypass": "user"}
def test_auto_cache_rejects_non_bool() -> None:
"""``auto_cache`` must be a bool (loud TypeError, symmetric with other kwargs)."""
with pytest.raises(TypeError):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
auto_cache="yes", # type: ignore[arg-type]
)
def test_auto_cache_is_keyword_only() -> None:
"""``auto_cache`` must be passed by keyword (positional rejected)."""
import inspect
sig = inspect.signature(save_proposal)
param = sig.parameters["auto_cache"]
assert param.kind is inspect.Parameter.KEYWORD_ONLY
assert param.default is False

View File

@@ -0,0 +1,347 @@
"""IMP-46 u3 — Fingerprint-based cache invalidation tests.
Scope (Stage 2 plan, u3):
* ``save_proposal`` persists ``fingerprints`` verbatim (u2 already covers
the round-trip; this suite re-asserts the read-side comparator).
* ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When
supplied, the stored dict must equal the supplied dict EXACTLY (strict
equality). Mismatch — including missing keys, extra keys, or value
drift — returns ``None``.
* Default ``fingerprints=None`` performs no comparison (back-compat for
legacy callers).
* Fingerprint *computation* stays outside ``cache.py`` — these tests
treat the three declared shas (``contract_sha`` / ``partial_sha`` /
``catalog_sha``) as opaque hex strings, never recomputing them. The
cache layer is a content-addressed *comparator*, not a content
*hasher*.
All filesystem writes are scoped to ``tmp_path`` via
``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`.
"""
from __future__ import annotations
import json
import pathlib
import pytest
from src.phase_z2_ai_fallback import cache as cache_mod
from src.phase_z2_ai_fallback.cache import (
KEY_DELIMITER,
read_proposal,
save_proposal,
)
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
_FRAME_ID = "1171281190"
_SIG_HASH = "f" * 64
_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}"
_FINGERPRINTS_BASELINE: dict[str, str] = {
"contract_sha": "c" * 64,
"partial_sha": "p" * 64,
"catalog_sha": "x" * 64,
}
def _proposal(payload: dict | None = None) -> AiFallbackProposal:
return AiFallbackProposal(
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
payload=payload if payload is not None else {"item_parser": "bullet_v2"},
rationale="u3-test",
)
@pytest.fixture(autouse=True)
def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
yield tmp_path / "frame_cache"
# -- save side: fingerprints persisted verbatim ---------------------------
def test_save_persists_fingerprints_verbatim(
_isolated_cache_root: pathlib.Path,
):
path = save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
stored = json.loads(path.read_text(encoding="utf-8"))["fingerprints"]
assert stored == _FINGERPRINTS_BASELINE
# -- read side: back-compat (no fingerprints kwarg) -----------------------
def test_read_without_fingerprints_kwarg_returns_proposal(
_isolated_cache_root: pathlib.Path,
):
"""Legacy read path (no kwarg) skips invalidation — round-trip succeeds."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
loaded = read_proposal(_KEY)
assert loaded is not None
assert loaded.payload == {"item_parser": "bullet_v2"}
def test_read_without_fingerprints_kwarg_ignores_stored_mismatch(
_isolated_cache_root: pathlib.Path,
):
"""A caller that has not adopted fingerprint-aware lookup must still
see the proposal — invalidation only kicks in when explicitly asked."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints={"contract_sha": "old"},
)
loaded = read_proposal(_KEY)
assert loaded is not None
# -- read side: matching fingerprints -------------------------------------
def test_read_with_matching_fingerprints_returns_proposal(
_isolated_cache_root: pathlib.Path,
):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
loaded = read_proposal(_KEY, fingerprints=dict(_FINGERPRINTS_BASELINE))
assert loaded is not None
assert loaded.proposal_kind is ProposalKind.BUILDER_OPTIONS_PATCH
def test_read_with_empty_fingerprints_matches_empty_stored(
_isolated_cache_root: pathlib.Path,
):
"""Both sides empty is an exact match, not a special-case None."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
# default fingerprints=None → stored as {}
)
loaded = read_proposal(_KEY, fingerprints={})
assert loaded is not None
# -- read side: invalidation on mismatch ----------------------------------
@pytest.mark.parametrize(
"drifted_axis",
["contract_sha", "partial_sha", "catalog_sha"],
)
def test_read_invalidates_on_single_axis_drift(
drifted_axis: str, _isolated_cache_root: pathlib.Path
):
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
supplied = dict(_FINGERPRINTS_BASELINE)
supplied[drifted_axis] = "deadbeef" * 8 # 64-char distinct value
assert read_proposal(_KEY, fingerprints=supplied) is None
def test_read_invalidates_when_caller_supplies_extra_key(
_isolated_cache_root: pathlib.Path,
):
"""Strict equality — extra key on caller side is a mismatch."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
supplied = dict(_FINGERPRINTS_BASELINE)
supplied["future_axis_sha"] = "z" * 64
assert read_proposal(_KEY, fingerprints=supplied) is None
def test_read_invalidates_when_caller_supplies_subset(
_isolated_cache_root: pathlib.Path,
):
"""Strict equality — subset on caller side is a mismatch."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
fingerprints=_FINGERPRINTS_BASELINE,
)
subset = {"contract_sha": _FINGERPRINTS_BASELINE["contract_sha"]}
assert read_proposal(_KEY, fingerprints=subset) is None
def test_read_invalidates_when_entry_saved_without_fingerprints(
_isolated_cache_root: pathlib.Path,
):
"""A pre-invalidation cache entry (empty stored fingerprints) MUST NOT
satisfy a fingerprint-aware lookup — caller demands proof of freshness."""
save_proposal(
_KEY,
_proposal(),
visual_check_passed=True,
user_approved=True,
# default fingerprints=None → stored as {}
)
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
def test_read_invalidates_when_stored_fingerprints_not_dict(
_isolated_cache_root: pathlib.Path,
):
"""Hand-corrupted payload (fingerprints serialized as non-dict) → None."""
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"schema_version": 1,
"proposal": _proposal().model_dump(mode="json"),
"slide_css": None,
"fingerprints": ["contract_sha", "c" * 64],
}
),
encoding="utf-8",
)
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
def test_read_invalidates_when_stored_fingerprints_field_missing(
_isolated_cache_root: pathlib.Path,
):
"""Legacy payload (no ``fingerprints`` field at all) → None when caller
demands fingerprint comparison."""
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text(
json.dumps(
{
"schema_version": 1,
"proposal": _proposal().model_dump(mode="json"),
"slide_css": None,
# fingerprints field deliberately omitted
}
),
encoding="utf-8",
)
assert read_proposal(_KEY, fingerprints={"contract_sha": "c" * 64}) is None
def test_read_with_matching_fingerprints_still_loses_to_missing_file():
"""File missing takes precedence over fingerprint check — no false hit."""
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
def test_read_with_matching_fingerprints_still_loses_to_corrupt_json(
_isolated_cache_root: pathlib.Path,
):
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
path.parent.mkdir(parents=True, exist_ok=True)
path.write_text("{not valid json", encoding="utf-8")
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
# -- read side: input validation symmetry with save -----------------------
def test_read_rejects_non_dict_fingerprints():
with pytest.raises(TypeError):
read_proposal(_KEY, fingerprints=["contract_sha", "c" * 64]) # type: ignore[arg-type]
def test_read_rejects_non_dict_fingerprints_string():
with pytest.raises(TypeError):
read_proposal(_KEY, fingerprints="contract_sha=c" * 8) # type: ignore[arg-type]
def test_read_rejects_non_dict_fingerprints_int():
with pytest.raises(TypeError):
read_proposal(_KEY, fingerprints=42) # type: ignore[arg-type]
# -- isolation: cache.py never computes fingerprints ----------------------
def test_cache_module_has_no_fingerprint_computer():
"""Guardrail: cache.py is a *comparator*, not a *hasher*. The three
declared shas are computed outside this module (step 12 / pipeline
glue). Adding a fingerprint computer here would leak Phase Z runtime
knowledge into the cache layer and violate AI isolation."""
public_surface = [
name
for name in dir(cache_mod)
if not name.startswith("_") and callable(getattr(cache_mod, name))
]
forbidden_substrings = ("hash", "sha", "fingerprint")
leaks = [
name
for name in public_surface
if any(sub in name.lower() for sub in forbidden_substrings)
]
assert leaks == [], (
f"cache.py public surface leaks fingerprint computation: {leaks}; "
"computation must live outside cache.py per IMP-46 u3 contract."
)
# -- isolation across distinct fingerprint sets ---------------------------
def test_distinct_fingerprint_sets_isolated_per_signature(
_isolated_cache_root: pathlib.Path,
):
"""Two entries under different signature hashes keep their own
fingerprints; reading one with the other's fingerprints misses."""
key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}"
key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}"
fps_a = {"contract_sha": "a" * 64}
fps_b = {"contract_sha": "b" * 64}
save_proposal(
key_a,
_proposal(payload={"sig": "a"}),
visual_check_passed=True,
user_approved=True,
fingerprints=fps_a,
)
save_proposal(
key_b,
_proposal(payload={"sig": "b"}),
visual_check_passed=True,
user_approved=True,
fingerprints=fps_b,
)
# Crossed lookups miss.
assert read_proposal(key_a, fingerprints=fps_b) is None
assert read_proposal(key_b, fingerprints=fps_a) is None
# Aligned lookups hit.
a_hit = read_proposal(key_a, fingerprints=fps_a)
b_hit = read_proposal(key_b, fingerprints=fps_b)
assert a_hit is not None and a_hit.payload == {"sig": "a"}
assert b_hit is not None and b_hit.payload == {"sig": "b"}

View File

@@ -0,0 +1,93 @@
"""IMP-46 u6 — repository layout coverage for the persistent frame cache.
This module is a *layout* contract test, not a runtime test. It asserts the
files committed to source control that make ``data/frame_cache/`` exist on a
fresh checkout while keeping cached JSON payloads ignored by git:
* ``data/frame_cache/.gitkeep`` is tracked (so the cache root exists for a
fresh clone before any AI fallback run materialises payloads).
* ``.gitignore`` ignores ``data/*`` broadly, re-includes the
``data/frame_cache/`` directory, ignores its contents, and re-includes
``data/frame_cache/.gitkeep`` so cache payloads under
``data/frame_cache/{frame_id}/{signature_hash}.json`` remain ignored.
If somebody removes the ``.gitkeep`` marker, drops the negation lines from
``.gitignore``, or commits a real cache payload, this test fails. The cache
module surface (cache.py) is exercised by ``test_cache.py`` /
``test_cache_invalidation.py`` and is intentionally *not* re-asserted here —
this file is the layout-only lock that Stage 2 u6 declared.
"""
from __future__ import annotations
from pathlib import Path
import pytest
REPO_ROOT = Path(__file__).resolve().parents[2]
GITIGNORE_PATH = REPO_ROOT / ".gitignore"
CACHE_ROOT = REPO_ROOT / "data" / "frame_cache"
GITKEEP_PATH = CACHE_ROOT / ".gitkeep"
def _gitignore_lines() -> list[str]:
assert GITIGNORE_PATH.is_file(), f".gitignore missing at {GITIGNORE_PATH}"
text = GITIGNORE_PATH.read_text(encoding="utf-8")
return [line.strip() for line in text.splitlines()]
def test_frame_cache_root_directory_exists() -> None:
"""``data/frame_cache/`` must exist on disk as the cache root."""
assert CACHE_ROOT.is_dir(), (
f"frame cache root missing: {CACHE_ROOT}. The directory must exist "
"for save_proposal to write JSON payloads without first conjuring a "
"parent on demand from outside the cache module."
)
def test_gitkeep_marker_is_tracked_file() -> None:
"""``data/frame_cache/.gitkeep`` is the marker that keeps the dir tracked."""
assert GITKEEP_PATH.is_file(), (
f".gitkeep marker missing: {GITKEEP_PATH}. Without it the cache root "
"would disappear on a fresh clone (everything under data/ is "
"ignored by default)."
)
@pytest.mark.parametrize(
"rule",
[
# Broad ignore for everything under data/ (cache payloads, runs/, etc.).
"data/*",
# Re-include the frame_cache directory itself so child negations work.
"!data/frame_cache/",
# Ignore everything inside frame_cache/ (cached JSON payloads).
"data/frame_cache/*",
# Re-include the .gitkeep marker only.
"!data/frame_cache/.gitkeep",
],
)
def test_gitignore_contains_frame_cache_exception(rule: str) -> None:
"""The four ignore rules together pin the 'track marker only' contract."""
lines = _gitignore_lines()
assert rule in lines, (
f".gitignore missing IMP-46 u6 rule: {rule!r}. The four-line block "
"(data/*, !data/frame_cache/, data/frame_cache/*, "
"!data/frame_cache/.gitkeep) together ensure the cache root is "
"tracked while cached payloads remain ignored."
)
def test_gitignore_rule_order_keeps_payloads_ignored() -> None:
"""Rule order matters: the ``data/frame_cache/*`` re-ignore must come
AFTER the ``!data/frame_cache/`` directory re-include, otherwise the
re-include would shadow it and cached JSON payloads would be tracked."""
lines = _gitignore_lines()
reinclude_dir = lines.index("!data/frame_cache/")
reignore_contents = lines.index("data/frame_cache/*")
reinclude_marker = lines.index("!data/frame_cache/.gitkeep")
assert reinclude_dir < reignore_contents < reinclude_marker, (
"gitignore IMP-46 u6 block out of order: expected "
"'!data/frame_cache/' < 'data/frame_cache/*' < "
"'!data/frame_cache/.gitkeep' so cached payloads stay ignored while "
"only the marker is tracked."
)

View File

@@ -0,0 +1,184 @@
"""IMP-46 u1 — Frame cache signature builder tests.
Verifies:
* Determinism — identical inputs yield the same SHA256 digest.
* Axis-change sensitivity — every one of the 8 declared axes mutates the
digest when changed in isolation.
* Public surface — only the 8 declared axes are accepted (no
sample/section identifier leakage).
* char_count bucket boundaries (0-50, 51-150, 151-400, 401-1000, 1001+).
* source_shape enum equivalence (string and SourceShape inputs match).
* schema_version is part of the hashed payload (digest stable for fixture).
"""
from __future__ import annotations
import inspect
import pytest
from src.phase_z2_ai_fallback.signature import (
CHAR_COUNT_BUCKET_LABELS,
SCHEMA_VERSION,
SourceShape,
bucket_char_count,
build_signature,
)
def _base_kwargs() -> dict:
return dict(
frame_id="frame_03",
v4_label="light_edit",
cardinality=3,
source_shape=SourceShape.BULLET,
h3_count=2,
char_count_bucket="51-150",
layout_preset="sidebar-right",
zone_position="top",
)
def test_schema_version_is_one() -> None:
assert SCHEMA_VERSION == 1
def test_bucket_labels_match_spec() -> None:
assert CHAR_COUNT_BUCKET_LABELS == (
"0-50",
"51-150",
"151-400",
"401-1000",
"1001+",
)
def test_signature_is_deterministic() -> None:
a = build_signature(**_base_kwargs())
b = build_signature(**_base_kwargs())
assert a == b
assert len(a) == 64
@pytest.mark.parametrize(
"axis, new_value",
[
("frame_id", "frame_04"),
("v4_label", "restructure"),
("cardinality", 5),
("source_shape", SourceShape.PARAGRAPH),
("h3_count", 3),
("char_count_bucket", "151-400"),
("layout_preset", "two-column"),
("zone_position", "bottom_l"),
],
)
def test_signature_changes_for_each_axis(axis: str, new_value: object) -> None:
base = build_signature(**_base_kwargs())
kwargs = _base_kwargs()
kwargs[axis] = new_value
assert build_signature(**kwargs) != base
def test_signature_accepts_string_source_shape() -> None:
enum_sig = build_signature(**_base_kwargs())
kwargs = _base_kwargs()
kwargs["source_shape"] = "bullet"
assert build_signature(**kwargs) == enum_sig
def test_signature_rejects_unknown_source_shape() -> None:
kwargs = _base_kwargs()
kwargs["source_shape"] = "nonsense"
with pytest.raises(ValueError):
build_signature(**kwargs)
def test_signature_rejects_unknown_char_count_bucket() -> None:
kwargs = _base_kwargs()
kwargs["char_count_bucket"] = "999-1234"
with pytest.raises(ValueError):
build_signature(**kwargs)
def test_signature_handles_none_cardinality() -> None:
kwargs = _base_kwargs()
kwargs["cardinality"] = None
sig = build_signature(**kwargs)
assert len(sig) == 64
kwargs2 = _base_kwargs()
kwargs2["cardinality"] = 0
assert build_signature(**kwargs2) != sig
def test_signature_surface_only_8_declared_axes() -> None:
params = set(inspect.signature(build_signature).parameters)
expected = {
"frame_id",
"v4_label",
"cardinality",
"source_shape",
"h3_count",
"char_count_bucket",
"layout_preset",
"zone_position",
}
assert params == expected
def test_bucket_boundaries() -> None:
assert bucket_char_count(0) == "0-50"
assert bucket_char_count(50) == "0-50"
assert bucket_char_count(51) == "51-150"
assert bucket_char_count(150) == "51-150"
assert bucket_char_count(151) == "151-400"
assert bucket_char_count(400) == "151-400"
assert bucket_char_count(401) == "401-1000"
assert bucket_char_count(1000) == "401-1000"
assert bucket_char_count(1001) == "1001+"
assert bucket_char_count(10_000) == "1001+"
def test_bucket_rejects_negative() -> None:
with pytest.raises(ValueError):
bucket_char_count(-1)
def test_bucket_rejects_non_int() -> None:
with pytest.raises(TypeError):
bucket_char_count(3.14) # type: ignore[arg-type]
with pytest.raises(TypeError):
bucket_char_count(True) # type: ignore[arg-type]
def test_signature_stable_known_fixture() -> None:
"""Lock the digest for a known fixture so a silent payload-shape change
(e.g. a new axis sneaks in, or schema_version drifts) breaks this test.
"""
sig = build_signature(
frame_id="frame_03",
v4_label="light_edit",
cardinality=3,
source_shape=SourceShape.BULLET,
h3_count=2,
char_count_bucket="51-150",
layout_preset="sidebar-right",
zone_position="top",
)
import hashlib
import json
expected_payload = {
"schema_version": 1,
"frame_id": "frame_03",
"v4_label": "light_edit",
"cardinality": 3,
"source_shape": "bullet",
"h3_count": 2,
"char_count_bucket": "51-150",
"layout_preset": "sidebar-right",
"zone_position": "top",
}
expected = hashlib.sha256(
json.dumps(expected_payload, sort_keys=True, ensure_ascii=False).encode("utf-8")
).hexdigest()
assert sig == expected

View File

@@ -1,12 +1,18 @@
"""IMP-33 u8 — Step 12 AI repair wiring tests.
"""IMP-33 u8 + IMP-46 u4 + IMP-47B u2 — Step 12 AI repair wiring tests.
Covers the two structural gates layered on top of the u7 router:
Covers the structural gates layered on top of the u7 router:
* IMP-30 provisional gate (only provisional units may invoke AI repair)
* Reject gate (route_hint=design_reference_only NEVER calls AI)
Plus the record-shape contract returned for downstream Step 12 artifacts.
* Catch-all ``route_not_ai_adaptation:<hint>`` skip — every route_hint
other than ``ai_adaptation_required`` (including the legacy
``design_reference_only`` hint) falls through to a single uniform skip
after the IMP-47B u2 removal of the bespoke reject gate.
Plus the record-shape contract returned for downstream Step 12 artifacts
and the IMP-46 u4 structural cache key + fingerprints contract.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from typing import Any
from unittest.mock import MagicMock
@@ -24,6 +30,12 @@ class FakeUnit:
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
raw_content: str = "raw"
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = ""
zone_position: str = ""
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 0
_ROUTE_HINTS: dict[str | None, str | None] = {
@@ -64,6 +76,25 @@ def _call(
return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs)
def _ai_unit(**overrides: Any) -> FakeUnit:
"""Construct an AI-eligible FakeUnit (provisional + restructure) with sane defaults."""
base: dict[str, Any] = dict(
label="restructure",
provisional=True,
frame_template_id="tmpl_x",
frame_id="fid_123",
source_section_ids=["02-1"],
layout_preset="single_column",
zone_position="zone_a",
source_shape="bullet",
h3_count=3,
char_count=200,
cardinality=5,
)
base.update(overrides)
return FakeUnit(**base)
def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
router = MagicMock()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
@@ -75,13 +106,20 @@ def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
router.assert_not_called()
def test_reject_route_is_skipped_without_ai_call(monkeypatch):
def test_design_reference_route_falls_through_to_route_not_ai_adaptation(monkeypatch):
"""IMP-47B u2 — the bespoke 'design_reference_only_no_ai' skip is gone.
Any non-AI-adaptation route_hint (including the legacy
``design_reference_only`` hint exercised here via the local test mapping
of ``reject``) now flows into the single ``route_not_ai_adaptation:<hint>``
catch-all. Production reject routing is exercised by u9.
"""
router = MagicMock()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="reject", provisional=True)]
records = _call(units)
assert records[0]["ai_called"] is False
assert records[0]["skip_reason"] == "design_reference_only_no_ai"
assert records[0]["skip_reason"] == "route_not_ai_adaptation:design_reference_only"
assert records[0]["route_hint"] == "design_reference_only"
router.assert_not_called()
@@ -153,29 +191,206 @@ def test_mixed_units_each_independently_classified(monkeypatch):
records = _call(units)
assert [r["skip_reason"] for r in records] == [
"not_provisional",
"design_reference_only_no_ai",
"route_not_ai_adaptation:design_reference_only",
"router_short_circuit",
"not_provisional",
]
assert router.call_count == 1
def test_cache_key_includes_template_and_section_ids(monkeypatch):
# ---------------------------------------------------------------------------
# IMP-46 u4 — structural cache key + fingerprints
# ---------------------------------------------------------------------------
def test_cache_key_format_is_frame_id_plus_sha256(monkeypatch):
"""cache_key is '{frame_id}::{64-hex-sha256}', NOT template_id + section_ids."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
cache_key = router.call_args.kwargs["cache_key"]
assert "::" in cache_key
frame_part, _, signature_part = cache_key.partition("::")
assert frame_part == "fid_123"
assert len(signature_part) == 64
assert all(c in "0123456789abcdef" for c in signature_part)
# The legacy "template_id::sorted(section_ids)" form is gone.
assert "tmpl_x" not in cache_key
assert "02-1" not in cache_key
def test_cache_key_invariant_to_section_id_changes(monkeypatch):
"""Same structural axes → same cache_key regardless of source_section_ids."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(source_section_ids=["02-1"])])
key_a = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(source_section_ids=["05-2", "07-3"])])
key_b = router.call_args.kwargs["cache_key"]
assert key_a == key_b
def test_cache_key_invariant_to_template_id_changes(monkeypatch):
"""frame_template_id is NOT part of the structural signature (frame_id is)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(frame_template_id="tmpl_x")])
key_a = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(frame_template_id="tmpl_OTHER")])
key_b = router.call_args.kwargs["cache_key"]
assert key_a == key_b
def test_cache_key_changes_when_any_signature_axis_changes(monkeypatch):
"""Flipping any of the 7 unit-derived signature axes mutates cache_key."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
base_key = router.call_args.kwargs["cache_key"]
perturbations: dict[str, Any] = {
"frame_id": "fid_OTHER",
"label": "use_as_is", # v4_label axis change; still routed to AI via _ROUTE_HINTS? No.
# ↑ "use_as_is" → "direct_render" → would skip. Use another ai-adaptation-mapped label.
# Replace with frame_id-only diff to keep route stable. Drop this entry below.
}
# Rebuild perturbations restricted to axes that don't change routing.
perturbations = {
"frame_id": "fid_OTHER",
"layout_preset": "two_column",
"zone_position": "zone_b",
"source_shape": "paragraph",
"h3_count": 7,
"char_count": 500, # bucket boundary crossing (151-400 → 401-1000)
"cardinality": 4,
}
for axis, value in perturbations.items():
router.reset_mock()
_call([_ai_unit(**{axis: value})])
new_key = router.call_args.kwargs["cache_key"]
assert new_key != base_key, f"signature axis {axis!r} did not mutate cache_key"
def test_char_count_bucket_collapses_within_bucket(monkeypatch):
"""Different char_counts in the SAME bucket → identical cache_key."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(char_count=160)])
key_low = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(char_count=399)])
key_high = router.call_args.kwargs["cache_key"]
assert key_low == key_high # both fall in "151-400"
router.reset_mock()
_call([_ai_unit(char_count=401)])
key_overflow = router.call_args.kwargs["cache_key"]
assert key_overflow != key_low # crossed into "401-1000"
def test_fingerprints_attached_to_ai_record(monkeypatch):
"""AI-called records expose contract_sha + partial_sha + catalog_sha."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
contract = {"frame_id": "fid", "payload": {"x": 1}, "sub_zones": []}
partial = {"some": "partial", "deeper": [1, 2, 3]}
catalog_value = "deadbeef" * 8
recs = _call(
[_ai_unit()],
get_contract_fn=lambda _t: contract,
figma_partial_loader=lambda _t: partial,
catalog_sha_loader=lambda: catalog_value,
)
fps = recs[0]["fingerprints"]
assert isinstance(fps, dict)
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
assert all(isinstance(v, str) for v in fps.values())
assert fps["catalog_sha"] == catalog_value
# contract_sha and partial_sha must be deterministic SHA256 over JSON-sorted payloads.
expected_contract = hashlib.sha256(
json.dumps(contract, sort_keys=True, ensure_ascii=False).encode("utf-8")
).hexdigest()
expected_partial = hashlib.sha256(
json.dumps(partial, sort_keys=True, ensure_ascii=False).encode("utf-8")
).hexdigest()
assert fps["contract_sha"] == expected_contract
assert fps["partial_sha"] == expected_partial
def test_fingerprints_default_catalog_sha_is_empty_string(monkeypatch):
"""No catalog_sha_loader → catalog_sha defaults to '' (sentinel, not missing key)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
fps = recs[0]["fingerprints"]
assert fps["catalog_sha"] == ""
# contract_sha + partial_sha keys still present (always 3 keys).
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
def test_fingerprints_change_when_contract_changes(monkeypatch):
"""Different frame_contract → different contract_sha, partial_sha unchanged."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
fps_a = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 1})[0]["fingerprints"]
fps_b = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 2})[0]["fingerprints"]
assert fps_a["contract_sha"] != fps_b["contract_sha"]
assert fps_a["partial_sha"] == fps_b["partial_sha"]
def test_fingerprints_change_when_partial_changes(monkeypatch):
"""Different figma_partial_json → different partial_sha, contract_sha unchanged."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
fps_a = _call(
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 1}
)[0]["fingerprints"]
fps_b = _call(
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 2}
)[0]["fingerprints"]
assert fps_a["partial_sha"] != fps_b["partial_sha"]
assert fps_a["contract_sha"] == fps_b["contract_sha"]
def test_v4_result_cardinality_uses_unit_value(monkeypatch):
"""v4_result['cardinality'] mirrors the unit's cardinality (no longer hardcoded None)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(cardinality=7)])
assert router.call_args.kwargs["v4_result"]["cardinality"] == 7
router.reset_mock()
_call([_ai_unit(cardinality=None)])
assert router.call_args.kwargs["v4_result"]["cardinality"] is None
def test_skipped_records_have_no_cache_key_or_fingerprints(monkeypatch):
"""Non-AI-eligible records keep cache_key and fingerprints as None."""
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
units = [
FakeUnit(
label="restructure",
provisional=True,
frame_template_id="tmpl_abc",
source_section_ids=["02-1", "02-2"],
)
FakeUnit(label="restructure", provisional=False),
FakeUnit(label="reject", provisional=True),
FakeUnit(label="light_edit", provisional=True),
]
_call(units)
assert router.call_args.kwargs["cache_key"] == "tmpl_abc::02-1,02-2"
recs = _call(units)
for rec in recs:
assert rec["cache_key"] is None
assert rec["fingerprints"] is None
def test_record_shape_contract_is_stable(monkeypatch):
def test_catalog_sha_loader_called_once_per_gather(monkeypatch):
"""catalog_sha is computed once per gather call, not per unit."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
loader = MagicMock(return_value="cafefeed" * 8)
_call(
[_ai_unit(), _ai_unit(frame_id="fid_other"), _ai_unit(frame_id="fid_third")],
catalog_sha_loader=loader,
)
loader.assert_called_once()
def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch):
"""Record schema includes the IMP-46 u4 cache_key + fingerprints fields."""
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
units = [FakeUnit(label="reject", provisional=True)]
rec = _call(units)[0]
@@ -190,4 +405,98 @@ def test_record_shape_contract_is_stable(monkeypatch):
"skip_reason",
"proposal",
"error",
"cache_key",
"fingerprints",
}
def test_cache_key_is_compatible_with_cache_parse_key(monkeypatch):
"""cache_key produced here must round-trip through cache.py's _parse_key."""
from src.phase_z2_ai_fallback.cache import KEY_DELIMITER, _parse_key
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
cache_key = router.call_args.kwargs["cache_key"]
parsed = _parse_key(cache_key)
assert parsed is not None
frame_id, signature_hash = parsed
assert frame_id == "fid_123"
assert len(signature_hash) == 64
assert KEY_DELIMITER not in signature_hash
# ---------------------------------------------------------------------------
# IMP-47B u9 — Step 12 reject eligibility + normal-path AI=0 regression
# ---------------------------------------------------------------------------
# Locks the end-to-end Step 12 contract against the production route helper
# `_imp05_route_hint`. The local `_ROUTE_HINTS` mapping above intentionally
# preserves the legacy ``reject -> design_reference_only`` form to exercise
# the catch-all fall-through branch; u9 instead drives gather with the real
# production map (post-u1 flip) so reject provisional units reach the router
# and normal-path labels stay AI=0.
def test_production_reject_route_reaches_router_when_provisional(monkeypatch):
"""Post-u1, provisional reject units must reach ``route_ai_fallback``."""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
records = step12_mod.gather_step12_ai_repair_proposals(
[FakeUnit(label="reject", provisional=True)],
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["skip_reason"] == "router_short_circuit"
assert records[0]["ai_called"] is False
router.assert_called_once()
def test_production_normal_route_labels_never_reach_router(monkeypatch):
"""Normal-path labels stay AI=0 even when the unit is provisional."""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [
FakeUnit(label="use_as_is", provisional=True),
FakeUnit(label="light_edit", provisional=True),
FakeUnit(label=None, provisional=True),
]
records = step12_mod.gather_step12_ai_repair_proposals(
units,
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["skip_reason"] == "route_not_ai_adaptation:direct_render"
assert records[1]["skip_reason"] == (
"route_not_ai_adaptation:deterministic_minor_adjustment"
)
assert records[2]["skip_reason"] == "route_not_ai_adaptation:None"
router.assert_not_called()
def test_production_non_provisional_reject_skipped_before_route_gate(monkeypatch):
"""The provisional gate fires before the route gate (production routing).
Even with reject routed to ``ai_adaptation_required`` (post-u1), a
non-provisional reject unit must short-circuit at ``not_provisional``
without ever consulting ``route_for_label`` for an AI dispatch.
"""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
records = step12_mod.gather_step12_ai_repair_proposals(
[FakeUnit(label="reject", provisional=False)],
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["skip_reason"] == "not_provisional"
assert records[0]["ai_called"] is False
router.assert_not_called()

View File

@@ -0,0 +1,213 @@
"""IMP-47B u13 — Persist validated proposals through ``save_proposal`` after gates.
Scope (this slice):
Verify the new ``_persist_ai_repair_proposals_to_cache`` helper in
``src/phase_z2_pipeline.py`` honours the IMP-46 dual-gate truth table
on the post-Step-14 cache-save seam. The helper is exercised in
isolation (no Selenium, no full pipeline) with synthetic AI repair
records that mirror the gather → apply → coverage chain shape
produced by IMP-47B u4 / u5 / u7.
Guardrails proven by this test (IMP-46 + IMP-47B policy bullets):
* ``visual_check_passed=False`` always blocks — never bypassable, even
when ``auto_cache=True`` (IMP-46 u5 truth table cell).
* ``user_approved=False`` AND ``auto_cache=False`` → gate blocked
(default pipeline path has no UX approval gate; ``--auto-cache`` is
the documented bypass).
* ``visual_check_passed=True`` AND ``auto_cache=True`` → proposal
persisted on disk under ``data/frame_cache/{frame_id}/{hash}.json``
via ``cache.save_proposal``.
* Non-applied records (no_proposal / no_zone_match / unsupported /
error) → ``cache_save_status='not_applied'`` and NEVER reach
``save_proposal`` (no filesystem touch).
* Settings axis — ``settings.ai_fallback_auto_cache`` sourced through
the helper kwargs, never inlined (hardcoding ban).
"""
from __future__ import annotations
import pathlib
import pytest
from src.phase_z2_ai_fallback import cache as cache_mod
from src.phase_z2_ai_fallback.cache import AiFallbackCacheGateError
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
from src.phase_z2_pipeline import _persist_ai_repair_proposals_to_cache
def _applied_record(
*,
cache_key: str = "MOCK_FRAME::deadbeef" + "0" * 56,
fingerprints: dict | None = None,
slots: dict | None = None,
) -> dict:
"""Build an IMP-47B u4/u5 shaped record marked ``applied:partial_overrides``."""
if fingerprints is None:
fingerprints = {"contract_sha": "c1", "partial_sha": "p1", "catalog_sha": "k1"}
if slots is None:
slots = {"title": "AI repaired", "bullets": ["b1", "b2"]}
proposal = AiFallbackProposal(
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
payload={"slots": slots},
rationale="cache save gate test",
)
return {
"unit_index": 0,
"source_section_ids": ["MOCK_S1"],
"frame_template_id": "MOCK_FRAME",
"label": "reject",
"route_hint": "ai_adaptation_required",
"provisional": True,
"ai_called": True,
"skip_reason": None,
"proposal": proposal.model_dump(),
"error": None,
"cache_key": cache_key,
"fingerprints": fingerprints,
"apply_status": "applied:partial_overrides",
}
@pytest.fixture(autouse=True)
def _isolate_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
"""Redirect ``cache.CACHE_ROOT`` to a per-test tmp dir so save_proposal
writes never touch the real ``data/frame_cache/`` tree."""
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
yield tmp_path / "frame_cache"
def test_visual_check_failed_blocks_save_even_with_auto_cache(_isolate_cache_root):
"""visual_check_passed=False is never bypassable — auto_cache cannot override."""
record = _applied_record()
records = [record]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=False,
user_approved=True,
auto_cache=True,
)
assert record["cache_save_status"].startswith("gate_blocked:")
assert "visual_check_passed=False" in record["cache_save_status"]
# No filesystem write occurred.
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
def test_user_not_approved_and_no_auto_cache_blocks_save(_isolate_cache_root):
"""Default pipeline path (user_approved=False, auto_cache=False) → gate blocked."""
record = _applied_record()
records = [record]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=True,
user_approved=False,
auto_cache=False,
)
assert record["cache_save_status"].startswith("gate_blocked:")
assert "user_approved=False" in record["cache_save_status"]
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
def test_visual_passed_and_auto_cache_persists_proposal(_isolate_cache_root):
"""Happy path — visual_check_passed=True + auto_cache=True persists JSON."""
record = _applied_record()
records = [record]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=True,
user_approved=False,
auto_cache=True,
)
assert record["cache_save_status"] == "saved"
written = list(_isolate_cache_root.rglob("*.json"))
assert len(written) == 1
# Layout = {CACHE_ROOT}/{frame_id}/{signature_hash}.json.
written_path = written[0]
assert written_path.parent.name == "MOCK_FRAME"
def test_non_applied_records_are_skipped_without_filesystem_touch(_isolate_cache_root):
"""no_proposal / no_zone_match / unsupported_kind / error → never reach save_proposal."""
no_proposal_record = {
"unit_index": 0,
"apply_status": "no_proposal",
"proposal": None,
"cache_key": None,
"fingerprints": None,
}
no_zone_record = {
"unit_index": 1,
"apply_status": "no_zone_match",
"proposal": {"proposal_kind": "partial_overrides", "payload": {"slots": {}}, "rationale": ""},
"cache_key": "MOCK::abc",
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
}
unsupported_record = {
"unit_index": 2,
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
"proposal": {"proposal_kind": "builder_options_patch", "payload": {}, "rationale": ""},
"cache_key": "MOCK::def",
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
}
error_record = {
"unit_index": 3,
"apply_status": None,
"proposal": None,
"cache_key": "MOCK::ghi",
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
"error": "RuntimeError: boom",
}
records = [no_proposal_record, no_zone_record, unsupported_record, error_record]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=True,
user_approved=True,
auto_cache=True,
)
for r in records:
assert r["cache_save_status"] == "not_applied"
# Zero JSON files written because none of the records were applied.
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
def test_mixed_records_only_persist_applied_ones(_isolate_cache_root):
"""Mixed batch — only the ``applied:`` record is persisted."""
applied = _applied_record(cache_key="MOCK_FRAME::aaaaaaaa" + "0" * 56)
not_applied = {
"unit_index": 1,
"apply_status": "no_proposal",
"proposal": None,
"cache_key": None,
"fingerprints": None,
}
records = [applied, not_applied]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=True,
user_approved=False,
auto_cache=True,
)
assert applied["cache_save_status"] == "saved"
assert not_applied["cache_save_status"] == "not_applied"
written = list(_isolate_cache_root.rglob("*.json"))
assert len(written) == 1
def test_invalid_proposal_payload_surfaces_without_raising(_isolate_cache_root):
"""Malformed ``proposal`` dict → ``cache_save_status='invalid_proposal:...'``,
no filesystem write, no exception bubbling into the pipeline runtime."""
bad_record = {
"unit_index": 0,
"apply_status": "applied:partial_overrides",
"proposal": {"proposal_kind": "not_a_valid_enum_value", "payload": {}, "rationale": ""},
"cache_key": "MOCK::bad",
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
}
records = [bad_record]
_persist_ai_repair_proposals_to_cache(
records,
visual_check_passed=True,
user_approved=True,
auto_cache=True,
)
assert bad_record["cache_save_status"].startswith("invalid_proposal:")
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))

View File

@@ -0,0 +1,95 @@
"""IMP-47B u7 — Post-AI source_section_ids coverage invariant tests.
Scope (this slice):
* Helper ``_check_post_ai_coverage_invariant(units, ai_repair_records)``
(src/phase_z2_pipeline.py) compares the pre-AI superset (unit
``source_section_ids``) to the post-apply superset present on
gather records. Per the AI isolation contract + dropped 절대 룰
(``feedback_ai_isolation_contract``), AI repair must not silently
drop a section.
* The helper returns a structured dict (``pre_ai_section_ids``,
``post_ai_section_ids``, ``dropped_section_ids``, ``status``) so u8
can surface ``status`` through ``slide_status.ai_repair_status``.
u8 slide_status surfacing and u10 E2E no-text-loss assertion are out
of scope for this unit. The helper is pure (no AI call, no IO) so a
synthetic stub-unit / stub-record fixture exercises it directly.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from src.phase_z2_pipeline import _check_post_ai_coverage_invariant
@dataclass
class _StubUnit:
source_section_ids: list[str] = field(default_factory=list)
def _record(source_section_ids: list[str]) -> dict:
"""Minimal gather-record stub — only the field u7 reads."""
return {"source_section_ids": list(source_section_ids)}
# ─── Case 1 : matched coverage → status='ok' ────────────────────────
def test_coverage_invariant_ok_when_records_match_units():
"""Records carry every unit's source_section_ids → no drop, status='ok'."""
units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])]
records = [_record(["MOCK_S1", "MOCK_S2"]), _record(["MOCK_S3"])]
result = _check_post_ai_coverage_invariant(units, records)
assert result["status"] == "ok"
assert result["dropped_section_ids"] == []
assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert result["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
# ─── Case 2 : record drops a section → status='violated' ────────────
def test_coverage_invariant_violated_when_record_drops_section():
"""If a record loses a unit's section_id (e.g., apply mutation bug),
the invariant reports status='violated' + dropped list (dropped 절대 룰).
"""
units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])]
records = [_record(["MOCK_S1"]), _record(["MOCK_S3"])] # MOCK_S2 dropped
result = _check_post_ai_coverage_invariant(units, records)
assert result["status"] == "violated"
assert result["dropped_section_ids"] == ["MOCK_S2"]
assert "MOCK_S2" in result["pre_ai_section_ids"]
assert "MOCK_S2" not in result["post_ai_section_ids"]
# ─── Case 3 : empty inputs → status='ok' (no false positive) ────────
def test_coverage_invariant_ok_on_empty_units_and_records():
"""Empty pipeline (no units / no records) is a vacuous pass —
avoids false-positive 'violated' on edge-case shapes (no AI work).
"""
result = _check_post_ai_coverage_invariant([], [])
assert result["status"] == "ok"
assert result["dropped_section_ids"] == []
assert result["pre_ai_section_ids"] == []
assert result["post_ai_section_ids"] == []
# ─── Case 4 : multiple drops + dedup ────────────────────────────────
def test_coverage_invariant_lists_all_dropped_sections_sorted_and_deduped():
"""Multiple missing sections → dropped_section_ids is sorted + deduped.
Duplicate ids across units / records collapse to a set comparison.
"""
units = [
_StubUnit(["MOCK_S3", "MOCK_S1"]),
_StubUnit(["MOCK_S2", "MOCK_S1"]), # MOCK_S1 duplicate
]
records: list[dict] = [] # full drop — every unit section missing
result = _check_post_ai_coverage_invariant(units, records)
assert result["status"] == "violated"
assert result["dropped_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert result["post_ai_section_ids"] == []

View File

@@ -0,0 +1,269 @@
"""IMP-47B u10 — End-to-end reject smoke (mocked client + full chain + render).
Scope (this slice):
E2E chain proving the IMP-47B reject route activates, preserves
full coverage, and propagates the AI-repaired ``slot_payload``
into the rendered ``final.html`` artifact when the AI fallback
client returns a deterministic PARTIAL_OVERRIDES proposal. Wires
together the four pipeline helpers introduced by u4 / u5 / u7 / u8
plus the Step 13 render step:
gather → apply → coverage_invariant → ai_repair_status surfacing
→ render_slide → final.html
The chain mirrors the ``run_phase_z2_mvp1`` call sequence between
the Step 12 slot_payload write and the Step 20 ``slide_status``
attach (src/phase_z2_pipeline.py — u4 call site, u5 apply, u6
artifact, u7 invariant, u8 surface). The Step 13 render path
(``render_slide`` at src/phase_z2_pipeline.py:2319, called from the
production write site at src/phase_z2_pipeline.py:5107-5111)
consumes ``zones_data[i]["slot_payload"]`` verbatim, so this test
drives that exact production seam: it calls ``render_slide`` on
the post-apply ``zones_data`` and writes the resulting HTML to a
``final.html`` file inside ``tmp_path``, then asserts the AI
proposal text appears in the on-disk artifact. A heavy
``run_phase_z2_mvp1`` integration variant with Selenium overflow
check remains deferred — this smoke test stops at the rendered
HTML.
Guardrails proven by this test (IMP-47B policy bullets):
* AI 호출 = fallback path only → master flag default OFF preserved
(test enables for itself only, restores after).
* MDX 원문 100% 보존 → coverage_invariant.status == "ok",
source_section_ids identical before/after AI.
* 자동 frame swap 금지 → frame_template_id unchanged.
* frame visual 임의 변경 금지 → frame_contract / partial untouched
(apply only merges proposal.payload.slots into slot_payload).
* dropped 절대 룰 → slot_payload AI keys merged on top
of deterministic keys; pre-existing meta keys survive.
"""
from __future__ import annotations
from dataclasses import dataclass, field
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
from src.phase_z2_pipeline import (
_apply_ai_repair_proposals_to_zones,
_check_post_ai_coverage_invariant,
_run_step12_ai_repair,
_summarize_ai_repair_status,
)
@dataclass
class _StubUnit:
"""Synthetic CompositionUnit stand-in (subset of fields gather reads)."""
label: str | None = "reject"
provisional: bool = True
frame_template_id: str = "MOCK_T_reject"
frame_id: str = "MOCK_F_reject"
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
raw_content: str = "MOCK MDX paragraph that must survive AI repair."
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = "two_zone_vertical"
zone_position: str = "top"
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 48
def _patched_route_ai_fallback(**kwargs):
"""Deterministic stand-in for ``route_ai_fallback`` — returns a
PARTIAL_OVERRIDES proposal that mirrors the declared frame slots.
The validator (src/phase_z2_ai_fallback/validate.py:61-74) is not
re-invoked here because this helper bypasses the router; the
structural slot completeness is asserted by the apply step + the
coverage invariant downstream.
"""
return AiFallbackProposal(
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
payload={
"slots": {
"title": "AI repaired title",
"bullets": ["AI repaired bullet 1", "AI repaired bullet 2"],
}
},
rationale="E2E smoke proposal — deterministic.",
)
def test_e2e_reject_chain_applies_proposal_and_preserves_coverage(monkeypatch):
"""End-to-end reject smoke (synthetic chain, mocked client).
Drives the four IMP-47B u4/u5/u7/u8 helpers in pipeline order with
a single reject+provisional unit. Asserts every guardrail listed
in the module docstring + the four E2E invariants
(final.html-bound slot_payload / full coverage / no text loss /
human_review NOT required on the success path).
"""
# IMP-47B u4 wiring — patch the router seam in src/phase_z2_ai_fallback/step12.py
# so the gather call returns a deterministic PARTIAL_OVERRIDES proposal
# without touching the master flag / network / cache layers.
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
unit = _StubUnit()
units = [unit]
# Step 12 gather (u4) — eligible reject reaches the patched router.
records = _run_step12_ai_repair(units)
assert len(records) == 1
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["ai_called"] is True
assert records[0]["skip_reason"] is None
assert records[0]["proposal"]["proposal_kind"] == "partial_overrides"
assert records[0]["source_section_ids"] == ["MOCK_S1"]
# Step 12 apply (u5) — PARTIAL_OVERRIDES merged into the matching zone.
# zones_data[0]["slot_payload"] is exactly what render_slide consumes
# to emit final.html (src/phase_z2_pipeline.py:5107) — asserting it
# here proves the reject route now flows into the rendered HTML.
zones = [{
"position": "top",
"template_id": "MOCK_T_reject",
"slot_payload": {
"title": "deterministic title",
"bullets": ["deterministic bullet"],
"_truncated_count": 0,
},
}]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert records[0]["apply_status"] == "applied:partial_overrides"
# final.html-bound slot_payload carries AI proposal values
assert zones[0]["slot_payload"]["title"] == "AI repaired title"
assert zones[0]["slot_payload"]["bullets"] == [
"AI repaired bullet 1",
"AI repaired bullet 2",
]
# frame visual / pre-existing meta keys survive (no silent shrink).
assert zones[0]["template_id"] == "MOCK_T_reject"
assert zones[0]["slot_payload"]["_truncated_count"] == 0
# frame_template_id on the unit is byte-identical (no auto frame swap).
assert unit.frame_template_id == "MOCK_T_reject"
# Step 12 coverage invariant (u7) — full coverage, no text loss.
coverage = _check_post_ai_coverage_invariant(units, records)
assert coverage["status"] == "ok"
assert coverage["pre_ai_section_ids"] == ["MOCK_S1"]
assert coverage["post_ai_section_ids"] == ["MOCK_S1"]
assert coverage["dropped_section_ids"] == []
# Step 20 ai_repair_status surfacing (u8) — applied without human review.
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status["counts"]["applied"] == 1
assert status["counts"]["error"] == 0
assert status["counts"]["unsupported_kind"] == 0
assert status["coverage_status"] == "ok"
assert status.get("human_review_required") is not True
def test_e2e_reject_chain_writes_final_html_with_ai_repaired_slot(monkeypatch, tmp_path):
"""End-to-end reject smoke (real render path → final.html on disk).
Drives the full Stage-2 u10 chain INCLUDING ``render_slide``: the
AI-repaired ``slot_payload`` is fed through the same Jinja2
rendering seam the production pipeline uses
(src/phase_z2_pipeline.py:5107-5111), the resulting HTML is
written to ``tmp_path / "final.html"``, and the on-disk artifact
is then asserted to carry the AI proposal value. Uses
``bim_dx_comparison_table`` — a real registered frame partial
(templates/phase_z2/families/bim_dx_comparison_table.html) whose
template emits ``{{ slot_payload.title }}`` verbatim, so a
proposal-overridden title surfaces literally in the HTML output.
"""
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
from src.phase_z2_pipeline import build_layout_css, render_slide
unit = _StubUnit(
frame_template_id="bim_dx_comparison_table",
zone_position="primary",
layout_preset="single",
)
# Step 12 gather + apply. Deterministic non-overridden slots
# (col_a_label, col_b_label, rows[*]) are seeded BEFORE apply so the
# post-render assertions below can prove u5 merge semantics
# (dict.update — not dict-replace) survive the render seam. The
# router proposal only carries ``{title, bullets}`` — every other
# slot must reach final.html untouched.
records = _run_step12_ai_repair([unit])
zones = [{
"position": "primary",
"template_id": "bim_dx_comparison_table",
"slot_payload": {
"title": "deterministic frame title",
"col_a_label": "DETERMINISTIC_COL_A_LABEL",
"col_b_label": "DETERMINISTIC_COL_B_LABEL",
"rows": [
{"label": "DET_ROW_LABEL", "col_a": "DET_ROW_A", "col_b": "DET_ROW_B"},
],
},
}]
_apply_ai_repair_proposals_to_zones(records, ["primary"], zones)
assert records[0]["apply_status"] == "applied:partial_overrides"
# Step 13 render — production seam (src/phase_z2_pipeline.py:5107-5111).
layout_css = build_layout_css("single", zones)
html = render_slide("IMP-47B E2E reject smoke", None, zones, "single", layout_css)
final_html_path = tmp_path / "final.html"
final_html_path.write_text(html, encoding="utf-8")
# final.html artifact exists on disk and is non-empty.
assert final_html_path.is_file()
assert final_html_path.stat().st_size > 0
rendered = final_html_path.read_text(encoding="utf-8")
# AI-repaired slot content appears in the rendered HTML.
assert "AI repaired title" in rendered
# Deterministic pre-apply title was overridden in the HTML output
# (no silent merge that leaves both values visible).
assert "deterministic frame title" not in rendered
# Non-overridden deterministic slots survive merge → render (u5
# dict.update semantics, not dict-replace; dropped 절대 룰 honoured
# at the render seam, not just in slot_payload memory).
assert "DETERMINISTIC_COL_A_LABEL" in rendered
assert "DETERMINISTIC_COL_B_LABEL" in rendered
assert "DET_ROW_LABEL" in rendered
assert "DET_ROW_A" in rendered
assert "DET_ROW_B" in rendered
# Frame template id is preserved end-to-end (no auto frame swap).
assert 'data-template-id="bim_dx_comparison_table"' in rendered
assert unit.frame_template_id == "bim_dx_comparison_table"
# MDX 원문 100% 보존 — coverage invariant + status surfacing.
coverage = _check_post_ai_coverage_invariant([unit], records)
assert coverage["status"] == "ok"
assert coverage["dropped_section_ids"] == []
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status.get("human_review_required") is not True
def test_e2e_reject_chain_no_text_loss_on_multi_section_unit(monkeypatch):
"""Multi-section reject unit — every section id flows through gather,
apply, coverage invariant, and ai_repair_status surfacing without a
drop. Locks the 'MDX 원문 100% 보존' guardrail at unit-multiplicity
granularity (gather copies the list via ``list(...)`` at
src/phase_z2_ai_fallback/step12.py:124 so apply mutations cannot
silently drop it)."""
import src.phase_z2_ai_fallback.step12 as step12_mod
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
unit = _StubUnit(source_section_ids=["MOCK_S1", "MOCK_S2", "MOCK_S3"])
records = _run_step12_ai_repair([unit])
zones = [{
"position": "top",
"template_id": "MOCK_T_reject",
"slot_payload": {"title": "det", "bullets": ["det"]},
}]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
coverage = _check_post_ai_coverage_invariant([unit], records)
assert coverage["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert coverage["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
assert coverage["dropped_section_ids"] == []
status = _summarize_ai_repair_status(records, coverage)
assert status["status"] == "applied"
assert status.get("human_review_required") is not True

View File

@@ -0,0 +1,174 @@
"""IMP-47B u8 — slide_status.ai_repair_status surfacing tests.
Scope (this slice):
Helper ``_summarize_ai_repair_status(ai_repair_records, coverage_invariant)``
(src/phase_z2_pipeline.py) composes u4 gather ``error`` + u5
``apply_status`` + u7 ``coverage_invariant`` into a single
``ai_repair_status`` axis attached to ``slide_status``. Failure-axis
priority (highest → lowest): ``error`` > ``coverage_violated`` >
``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required``
flips True on the three failure axes for u11 frontend surfacing.
The frontend reads ``slide_status.ai_repair_status`` to render a
notification per the IMP-47B policy ("AI 호출 실패 / proposal validation
실패 / coverage 미달 → frontend notification"). u9~u13 are out of scope.
The helper is pure (no IO, no AI call) so synthetic record / invariant
dicts exercise every branch directly.
"""
from __future__ import annotations
from src.phase_z2_pipeline import _summarize_ai_repair_status
def _record(
*,
unit_index: int = 0,
apply_status: str | None = None,
error: str | None = None,
source_section_ids: list[str] | None = None,
) -> dict:
"""Minimal Step 12 AI repair record stub — fields u8 reads."""
return {
"unit_index": unit_index,
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
"apply_status": apply_status,
"error": error,
}
_OK_COVERAGE = {"status": "ok", "dropped_section_ids": []}
_VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]}
# ─── Case 1 : empty pipeline → status='ok' ──────────────────────────
def test_empty_records_returns_ok_no_human_review():
"""No AI work executed → status='ok', human_review_required=False.
The flag-off default (no provisional units) lands here."""
result = _summarize_ai_repair_status([], _OK_COVERAGE)
assert result["status"] == "ok"
assert result["human_review_required"] is False
assert result["counts"]["total"] == 0
assert result["unsupported_kind_records"] == []
assert result["error_records"] == []
assert result["dropped_section_ids"] == []
# ─── Case 2 : applied → status='applied', no human_review ───────────
def test_applied_partial_overrides_marks_applied_no_human_review():
"""Successful AI repair (PARTIAL_OVERRIDES applied) is the happy
path. status='applied', no human_review surfacing."""
records = [_record(apply_status="applied:partial_overrides")]
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
assert result["status"] == "applied"
assert result["human_review_required"] is False
assert result["counts"]["applied"] == 1
assert result["counts"]["error"] == 0
# ─── Case 3 : unsupported kind → status='unsupported_kind' ──────────
def test_unsupported_kind_marks_human_review_required():
"""u5 surfaces ``unsupported_kind_for_reject_route:<kind>`` for
builder_options_patch / slot_mapping_proposal. u8 must classify as
human_review_required so the frontend renders a notification."""
records = [
_record(
unit_index=1,
apply_status="unsupported_kind_for_reject_route:builder_options_patch",
source_section_ids=["MOCK_S1"],
),
]
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
assert result["status"] == "unsupported_kind"
assert result["human_review_required"] is True
assert result["counts"]["unsupported_kind"] == 1
assert result["unsupported_kind_records"] == [
{
"unit_index": 1,
"source_section_ids": ["MOCK_S1"],
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
}
]
# ─── Case 4 : gather error → status='error' (highest priority) ──────
def test_gather_error_marks_status_error_with_records():
"""``record['error']`` set means ``gather_step12_ai_repair_proposals``
caught a router exception (AI call / validator). status='error'
is the highest-priority failure axis."""
records = [_record(
unit_index=2,
error="ValueError: missing slot 'title'",
source_section_ids=["MOCK_S2"],
)]
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
assert result["status"] == "error"
assert result["human_review_required"] is True
assert result["counts"]["error"] == 1
assert result["error_records"] == [
{
"unit_index": 2,
"source_section_ids": ["MOCK_S2"],
"error": "ValueError: missing slot 'title'",
}
]
# ─── Case 5 : coverage violated → status='coverage_violated' ────────
def test_coverage_violation_surfaces_dropped_sections():
"""u7 coverage_invariant 'violated' means the AI repair dropped a
section_id from the post-AI superset. dropped 절대 룰 — surface as
human_review_required."""
records = [_record(apply_status="applied:partial_overrides")]
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
assert result["status"] == "coverage_violated"
assert result["human_review_required"] is True
assert result["coverage_status"] == "violated"
assert result["dropped_section_ids"] == ["MOCK_S2"]
# ─── Case 6 : priority order — error > coverage > unsupported ───────
def test_error_dominates_over_coverage_and_unsupported():
"""When multiple failure axes coexist, priority order is
error > coverage_violated > unsupported_kind > applied > ok."""
records = [
_record(unit_index=0, error="RuntimeError"),
_record(unit_index=1,
apply_status="unsupported_kind_for_reject_route:slot_mapping_proposal"),
_record(unit_index=2, apply_status="applied:partial_overrides"),
]
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
assert result["status"] == "error"
assert result["human_review_required"] is True
assert result["counts"]["error"] == 1
assert result["counts"]["unsupported_kind"] == 1
assert result["counts"]["applied"] == 1
# ─── Case 7 : no_proposal + no_zone_match counted, not failure ──────
def test_no_proposal_and_no_zone_match_do_not_trigger_human_review():
"""Flag-off short-circuit, not_provisional, route_not_ai_adaptation,
and B4-mismatch (no_zone_match) are structural skips — not AI
failures. They count but do not flip human_review_required."""
records = [
_record(unit_index=0, apply_status="no_proposal"),
_record(unit_index=1, apply_status="no_zone_match"),
]
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
assert result["status"] == "ok"
assert result["human_review_required"] is False
assert result["counts"]["no_proposal"] == 1
assert result["counts"]["no_zone_match"] == 1

View File

@@ -0,0 +1,304 @@
"""IMP-47B u12 — Initial plan_composition allow_provisional_fill for mixed direct+reject.
Scope (this slice):
The u12 glue inserted in ``run_phase_z2_mvp1`` (src/phase_z2_pipeline.py,
right after the initial plan_composition + telemetry build, before the
Step 7-A layout override block) detects the mixed direct+reject case
(initial plan_composition returns a viable layout but some sections
remain uncovered) and re-runs plan_composition with:
* a lookup_fn that passes ``allow_provisional=True`` (so chain_exhausted
sections synthesize a provisional rank-1 V4Match), and
* ``allow_provisional_fill=True`` (so uncovered sections receive a
last-resort provisional candidate fill in select_composition_units).
This admits the mixed direct+reject case to the AI repair path
(IMP-47B u4/u5) on first render — the reject section becomes a
provisional unit (``provisional=True`` + ``label="reject"``) which Step
12's reject route gather (u4) routes to AI fallback.
Gate predicates (mirrored from src/phase_z2_pipeline.py u12 block):
* units non-empty (all-reject case is handled by IMP-30 u4 retry below)
* layout_preset is not None
* not override_section_assignments (operator override bypasses the gate)
* at least one section_id is uncovered after initial pass
Guardrails proven by these tests:
* MDX 원문 100% 보존 — every section_id covered after mixed admission
(no silent drop).
* 자동 frame swap 금지 — mixed admission only re-runs plan_composition
with provisional flags; rank-1 reject judgment is preserved as the
provisional V4Match (no template_id swap to a different rank).
* Normal-path AI=0 — the mixed admission still emits the reject label;
AI activation is gated separately in router (config.py:19 default OFF).
* All-direct slides are a no-op — gate skips when no uncovered sections.
This test file exercises ``plan_composition`` directly with synthetic
stub V4 matches + a stub lookup_fn that mirrors the u12 retry seam.
Stub naming follows the IMP-30 u3 convention (MOCK_ prefix mandatory,
no real catalog template_id / frame_id leakage).
"""
from __future__ import annotations
from dataclasses import dataclass
from typing import Optional
from src.phase_z2_composition import plan_composition
# ─── Synthetic V4Match duck-type (mirrors IMP-30 _StubV4Match) ───────────
@dataclass
class _StubV4Match:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
v4_rank: Optional[int] = None
selection_path: str = "rank_1"
fallback_reason: Optional[str] = None
provisional: bool = False
@dataclass
class _StubSection:
section_id: str
title: str = ""
raw_content: str = ""
_LABEL_TO_STATUS = {
"use_as_is": "matched_zone",
"light_edit": "adapt_matched_zone",
"restructure": "extract_matched_zone",
"reject": "fallback_candidate",
}
_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}
def _make_normal_lookup(matches_by_section: dict[str, _StubV4Match]):
"""Lookup_fn that returns the synthetic rank-1 match (no provisional path).
Mirrors the pipeline initial ``lookup_fn`` at
src/phase_z2_pipeline.py:3456-3465 (no ``allow_provisional`` kwarg).
"""
def _fn(section_id: str):
return matches_by_section.get(section_id)
return _fn
def _make_provisional_lookup(matches_by_section: dict[str, _StubV4Match]):
"""Lookup_fn that flags reject rank-1 matches provisional.
Mirrors the pipeline u12 retry ``_lookup_fn_mixed_admission`` at the
inserted block — for reject judgments, returns a provisional=True
rank-1 V4Match-shaped stub so plan_composition's last-resort fill
pool can see it (provisional candidates are otherwise filtered out
of the normal greedy pass).
"""
def _fn(section_id: str):
m = matches_by_section.get(section_id)
if m is not None and m.label == "reject":
# Synthesize the provisional shape that
# lookup_v4_match_with_fallback returns when allow_provisional
# is True: provisional=True + selection_path="provisional_rank_1".
return _StubV4Match(
template_id=m.template_id,
frame_id=m.frame_id,
frame_number=m.frame_number,
confidence=m.confidence,
label=m.label,
v4_rank=1,
selection_path="provisional_rank_1",
provisional=True,
)
return m
return _fn
def _make_candidates_lookup_empty():
def _fn(section_id: str):
return []
return _fn
# ─── u12 case 1 : mechanic — mixed admission via provisional lookup + fill ────
def test_u12_mechanic_mixed_admission_covers_reject_section_via_provisional_fill():
"""Positive proof. Mixed direct+reject (S1=use_as_is, S2=reject).
Without u12 (initial path: normal lookup + allow_provisional_fill=False),
plan_composition returns only the S1 unit and S2 is silently dropped.
With u12 (retry: provisional lookup + allow_provisional_fill=True),
plan_composition returns both units; S2 is a provisional unit with
label="reject" — ready to be picked up by Step 12's reject route
gather (IMP-47B u4).
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.92,
label="use_as_is",
v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_reject_a",
frame_id="MOCK_frame_002",
frame_number=2,
confidence=0.30,
label="reject",
v4_rank=1,
),
}
# Pre-u12 baseline — normal lookup, no provisional fill.
units_pre, preset_pre, _ = plan_composition(
sections,
_make_normal_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
)
covered_pre = {sid for u in units_pre for sid in u.source_section_ids}
assert "S1" in covered_pre, "S1 (use_as_is) must cover pre-u12"
assert "S2" not in covered_pre, (
"Pre-u12 baseline regression: reject S2 should be uncovered (no provisional fill)"
)
# u12 mixed-admission retry — provisional lookup + allow_provisional_fill=True.
units_post, preset_post, _ = plan_composition(
sections,
_make_provisional_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
allow_provisional_fill=True,
)
covered_post = {sid for u in units_post for sid in u.source_section_ids}
assert covered_post == {"S1", "S2"}, (
"u12 mixed admission must cover every section (no text loss)"
)
assert preset_post is not None
# The S2 unit must be marked provisional so the reject route gather
# (src/phase_z2_ai_fallback/step12.py:133-136) admits it.
s2_unit = next(u for u in units_post if "S2" in u.source_section_ids)
assert s2_unit.provisional is True, (
"Reject S2 unit must be provisional so Step 12 reject route admits it"
)
assert s2_unit.label == "reject"
# Frame template id is preserved — no auto frame swap.
assert s2_unit.frame_template_id == "MOCK_template_reject_a"
# ─── u12 case 2 : gate — all-direct slides are a no-op ──────────────────────
def test_u12_gate_all_direct_yields_no_uncovered_sections():
"""No-op proof. When every section is auto-renderable (use_as_is or
light_edit), the initial plan_composition covers everything — the
u12 mixed-admission gate's ``_u12_uncovered_ids`` list is empty and
the retry is skipped.
"""
sections = [_StubSection("S1"), _StubSection("S2")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_direct_a",
frame_id="MOCK_frame_001",
frame_number=1,
confidence=0.92,
label="use_as_is",
v4_rank=1,
),
"S2": _StubV4Match(
template_id="MOCK_template_direct_b",
frame_id="MOCK_frame_002",
frame_number=2,
confidence=0.81,
label="light_edit",
v4_rank=1,
),
}
units, preset, _ = plan_composition(
sections,
_make_normal_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
)
covered = {sid for u in units for sid in u.source_section_ids}
assert covered == {"S1", "S2"}, "All-direct must cover every section pre-u12"
# Predicate from src/phase_z2_pipeline.py u12 block:
uncovered = [s.section_id for s in sections if s.section_id not in covered]
assert uncovered == [], (
"u12 gate must classify all-direct as no-op (uncovered list empty)"
)
assert preset is not None
# ─── u12 case 3 : gate — initial empty units bypass u12 (IMP-30 retry owns it) ──
def test_u12_gate_skips_when_initial_units_empty():
"""All-reject case is owned by IMP-30 u4 retry (units=[] guard at
src/phase_z2_pipeline.py:3646). u12 mixed-admission must NOT compete
with that path; the gate ``units and layout_preset is not None``
short-circuits when the initial plan_composition returns nothing.
"""
sections = [_StubSection("S1")]
matches = {
"S1": _StubV4Match(
template_id="MOCK_template_reject_a",
frame_id="MOCK_frame_002",
frame_number=2,
confidence=0.30,
label="reject",
v4_rank=1,
),
}
units, preset, _ = plan_composition(
sections,
_make_normal_lookup(matches),
_LABEL_TO_STATUS,
_ALLOWED_STATUSES,
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
)
# All-reject initial pass: no auto-renderable units, no layout preset.
assert units == [] and preset is None
# u12 gate predicate would short-circuit on `units` truthiness:
gate_active = bool(units) and preset is not None
assert gate_active is False, (
"u12 mixed-admission gate must skip the all-reject case (IMP-30 u4 owns it)"
)
# ─── u12 case 4 : code-path anchor — pipeline source contains u12 marker ────
def test_u12_pipeline_source_contains_mixed_admission_marker():
"""Anchor test. Ensures the inserted u12 block in src/phase_z2_pipeline.py
is reachable (not silently removed by a future refactor).
Asserts on the marker comment + ``imp47b_u12_mixed_admission`` debug key
+ ``allow_provisional_fill=True`` invocation co-located in the file.
Cheap structural guard — does not run the heavy pipeline.
"""
from pathlib import Path
src_path = Path(__file__).resolve().parent.parent / "src" / "phase_z2_pipeline.py"
text = src_path.read_text(encoding="utf-8")
assert "IMP-47B u12 — mixed direct+reject first-render admission" in text, (
"u12 marker comment missing from pipeline — block may have been removed"
)
assert "imp47b_u12_mixed_admission" in text, (
"u12 comp_debug telemetry key missing"
)
# The mixed-admission retry must pass allow_provisional_fill=True.
# Anchor against the helper function name + the kwarg co-occurrence.
assert "_lookup_fn_mixed_admission" in text
assert "allow_provisional_fill=True" in text

View File

@@ -0,0 +1,180 @@
"""IMP-47B u3 — override-selected reject frames are admitted as provisional.
Scope (this slice):
Helper `_apply_frame_override_to_unit` (src/phase_z2_pipeline.py) covers
the three probe layers used by the `--override-frame` path:
1. ``v4_candidates`` exact match (non-reject; existing behaviour).
2. Full 32 V4 judgments probe (reject inclusive) — when the user
picks a reject frame, the unit is promoted to
``provisional=True`` with ``label="reject"`` so Step 12
(IMP-47B u4) admits the AI repair path.
3. Raw fall-through (template_id only) — no provisional promotion,
no label mutation.
Frame visual / contract stay untouched per the AI isolation contract
(frame auto-swap forbidden — AI re-places content into the existing
frame only). Sibling test confirms a non-reject override still goes
through the v4_candidates path without provisional promotion.
Synthetic naming convention mirrors tests/test_phase_z2_imp30_first_render.py
(MOCK_ prefix mandatory, no real catalog template_id / frame_id leakage).
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
from src.phase_z2_pipeline import _apply_frame_override_to_unit
@dataclass
class _StubCandidate:
template_id: str
frame_id: str
frame_number: int
confidence: float
label: str
@dataclass
class _StubUnit:
source_section_ids: list[str]
frame_template_id: Optional[str] = None
frame_id: Optional[str] = None
frame_number: int = 0
confidence: float = 0.0
label: Optional[str] = None
provisional: bool = False
v4_candidates: list = field(default_factory=list)
def _v4_with_reject(section_id: str, target_tid: str) -> dict:
"""Synthetic V4 dict with target_tid mapped to a reject judgment.
Mirrors the production V4 schema surface (``mdx_sections`` →
``judgments_full32`` → list of judgment dicts with template_id /
frame_id / frame_number / confidence / label). Two judgments so we
can also assert that the helper picks the reject entry rather than
the first non-reject one when the template_ids differ.
"""
return {
"mdx_sections": {
section_id: {
"judgments_full32": [
{
"template_id": "MOCK_T_other",
"frame_id": "F_other",
"frame_number": 1,
"confidence": 0.85,
"label": "use_as_is",
},
{
"template_id": target_tid,
"frame_id": "F_reject",
"frame_number": 32,
"confidence": 0.40,
"label": "reject",
},
],
},
},
}
# ─── Case 1 : reject override → provisional promotion ────────────
def test_override_to_reject_judgment_marks_unit_provisional():
"""User picks a reject frame → unit.label=reject, provisional=True.
Frame metadata is sourced from the reject judgment (frame_id /
frame_number / confidence) so Step 9 metadata stays consistent.
"""
unit = _StubUnit(
source_section_ids=["MOCK_S1"],
frame_template_id="MOCK_T_auto",
frame_id="F_auto",
frame_number=5,
confidence=0.90,
label="use_as_is",
provisional=False,
)
v4 = _v4_with_reject("MOCK_S1", "MOCK_T_reject")
meta = _apply_frame_override_to_unit(unit, "MOCK_T_reject", v4)
assert meta == "v4_reject_judgment_provisional"
assert unit.frame_template_id == "MOCK_T_reject"
assert unit.frame_id == "F_reject"
assert unit.frame_number == 32
assert unit.confidence == 0.40
assert unit.label == "reject"
assert unit.provisional is True
# ─── Case 2 : non-reject override → existing v4_candidates path ───
def test_override_to_v4_candidate_keeps_non_provisional():
"""User picks a non-reject candidate → existing v4_candidates path.
Helper takes the early v4_candidates branch without consulting the
full 32 judgments. provisional remains False (normal-path AI=0
contract — IMP-30 / IMP-47B router gate intact for this unit).
"""
unit = _StubUnit(
source_section_ids=["MOCK_S2"],
frame_template_id="MOCK_T_auto",
frame_id="F_auto",
frame_number=3,
confidence=0.95,
label="use_as_is",
provisional=False,
v4_candidates=[
_StubCandidate(
template_id="MOCK_T_pick",
frame_id="F_pick",
frame_number=2,
confidence=0.85,
label="light_edit",
),
],
)
v4 = {"mdx_sections": {}} # full-judgment probe must NOT be reached
meta = _apply_frame_override_to_unit(unit, "MOCK_T_pick", v4)
assert meta == "v4_candidates"
assert unit.frame_template_id == "MOCK_T_pick"
assert unit.frame_id == "F_pick"
assert unit.label == "light_edit"
assert unit.provisional is False
# ─── Case 3 : unknown template → raw fall-through (no provisional) ─
def test_override_unknown_template_falls_through_without_provisional():
"""Template ID absent from v4_candidates AND from judgments_full32 →
raw_template_id_only path. No provisional flag, no label change.
"""
unit = _StubUnit(
source_section_ids=["MOCK_S3"],
frame_template_id="MOCK_T_auto",
frame_id="F_auto",
frame_number=4,
confidence=0.92,
label="use_as_is",
provisional=False,
)
v4 = {"mdx_sections": {}}
meta = _apply_frame_override_to_unit(unit, "MOCK_T_unknown", v4)
assert meta == "raw_template_id_only"
assert unit.frame_template_id == "MOCK_T_unknown"
# frame_id / label unchanged — caller's print path warns on this case.
assert unit.frame_id == "F_auto"
assert unit.label == "use_as_is"
assert unit.provisional is False

View File

@@ -0,0 +1,223 @@
"""IMP-47B u5 — PARTIAL_OVERRIDES apply tests.
Scope (this slice):
Helper ``_apply_ai_repair_proposals_to_zones`` (src/phase_z2_pipeline.py)
merges ``proposal.payload.slots`` into ``zones_data[k]["slot_payload"]``
for PARTIAL_OVERRIDES proposals only, and loud-fails out-of-scope
proposal kinds (builder_options_patch, slot_mapping_proposal) with an
explicit ``apply_status`` marker.
The IMP-33 u5 validator inside ``route_ai_fallback`` already enforces
declared-slot completeness — the apply helper is therefore a structural
merge over the validator's contract, not a per-slot guard re-implementation.
u6 (step12_ai_repair.json audit), u7 (coverage invariant), and u8
(slide_status surfacing) are out of scope for this unit.
"""
from __future__ import annotations
from src.phase_z2_pipeline import _apply_ai_repair_proposals_to_zones
def _record(
*,
unit_index: int,
proposal: dict | None,
source_section_ids: list[str] | None = None,
) -> dict:
"""Synthetic gather_step12_ai_repair_proposals record."""
return {
"unit_index": unit_index,
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
"frame_template_id": "MOCK_T",
"label": "reject",
"route_hint": "ai_adaptation_required",
"provisional": True,
"ai_called": proposal is not None,
"skip_reason": None,
"proposal": proposal,
"error": None,
"cache_key": "MOCK_F::abc" if proposal is not None else None,
"fingerprints": {"contract_sha": "x", "partial_sha": "y", "catalog_sha": ""}
if proposal is not None
else None,
}
def _zone(*, position: str, slot_payload: dict | None = None) -> dict:
"""Synthetic zones_data entry — only fields the apply helper touches."""
return {
"position": position,
"template_id": "MOCK_T",
"slot_payload": slot_payload if slot_payload is not None else {},
}
# ─── Case 1 : PARTIAL_OVERRIDES → merged + applied marker ──────────
def test_partial_overrides_merges_slots_into_zone_slot_payload():
"""The validator already guarantees declared-slot completeness, so
apply is a structural ``dict.update``. Pre-existing meta keys
(``_truncated_count``) survive; declared slot values are replaced
by the AI proposal values."""
proposal = {
"proposal_kind": "partial_overrides",
"payload": {
"slots": {
"title": "AI title",
"bullets": ["AI bullet 1", "AI bullet 2"],
}
},
"rationale": "MOCK",
}
records = [_record(unit_index=0, proposal=proposal)]
zones = [
_zone(
position="top",
slot_payload={
"title": "deterministic title",
"bullets": ["det bullet"],
"_truncated_count": 0,
},
)
]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert records[0]["apply_status"] == "applied:partial_overrides"
assert zones[0]["slot_payload"]["title"] == "AI title"
assert zones[0]["slot_payload"]["bullets"] == ["AI bullet 1", "AI bullet 2"]
# meta keys not in proposal must survive the merge
assert zones[0]["slot_payload"]["_truncated_count"] == 0
# ─── Case 2 : BUILDER_OPTIONS_PATCH → loud-fail unsupported_kind ───
def test_builder_options_patch_is_unsupported_for_reject_route():
"""Builder-options application is out-of-scope for IMP-47B reject
route (see Stage 2 plan). u5 must mark, not apply — the zone
slot_payload stays byte-identical and the record carries the
``unsupported_kind_for_reject_route:<kind>`` marker so u8 can
surface human_review downstream."""
proposal = {
"proposal_kind": "builder_options_patch",
"payload": {"font_size_px": 14},
"rationale": "MOCK",
}
records = [_record(unit_index=0, proposal=proposal)]
original_slot_payload = {"title": "deterministic"}
zones = [_zone(position="top", slot_payload=dict(original_slot_payload))]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert (
records[0]["apply_status"]
== "unsupported_kind_for_reject_route:builder_options_patch"
)
assert zones[0]["slot_payload"] == original_slot_payload
# ─── Case 3 : SLOT_MAPPING_PROPOSAL → loud-fail unsupported_kind ───
def test_slot_mapping_proposal_is_unsupported_for_reject_route():
"""Slot-mapping (restructuring) application is also out-of-scope —
builder-options + slot-mapping share the same marker path."""
proposal = {
"proposal_kind": "slot_mapping_proposal",
"payload": {"slots": {"title": "x"}},
"rationale": "MOCK",
}
records = [_record(unit_index=0, proposal=proposal)]
zones = [_zone(position="top", slot_payload={"title": "deterministic"})]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert (
records[0]["apply_status"]
== "unsupported_kind_for_reject_route:slot_mapping_proposal"
)
assert zones[0]["slot_payload"] == {"title": "deterministic"}
# ─── Case 4 : no proposal (router short-circuit / not_provisional) ──
def test_record_without_proposal_marked_no_proposal_and_zone_untouched():
"""Flag-off short-circuit and non-AI-route units carry
``proposal=None``. apply_status must distinguish "no proposal to
apply" from real apply outcomes so u8 can categorise the per-unit
status without re-reading skip_reason."""
records = [_record(unit_index=0, proposal=None)]
zones = [_zone(position="top", slot_payload={"title": "deterministic"})]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert records[0]["apply_status"] == "no_proposal"
assert zones[0]["slot_payload"] == {"title": "deterministic"}
# ─── Case 5 : proposal exists but no matching zone (B4 mismatch) ────
def test_proposal_for_unit_without_zone_match_marked_no_zone_match():
"""When a unit is dropped from zones_data (B4 mismatch or FitError
in the Step 12 render loop) but still gathered an AI proposal,
apply must surface the mismatch via ``no_zone_match`` rather than
silently dropping the proposal or writing into a wrong zone."""
proposal = {
"proposal_kind": "partial_overrides",
"payload": {"slots": {"title": "AI title"}},
"rationale": "MOCK",
}
records = [_record(unit_index=0, proposal=proposal)]
# unit_positions[0]="top" but zones_data has only the bottom zone
# → no match for the dropped unit's position.
zones = [_zone(position="bottom", slot_payload={"title": "other zone"})]
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
assert records[0]["apply_status"] == "no_zone_match"
# untouched zone — apply must not bleed into a different position
assert zones[0]["slot_payload"] == {"title": "other zone"}
# ─── Case 6 : mixed records — independent per-record classification ──
def test_mixed_records_classified_independently():
"""All five apply_status branches coexist in one batch — confirms
the helper does not short-circuit on the first non-applied record."""
records = [
_record(unit_index=0, proposal={
"proposal_kind": "partial_overrides",
"payload": {"slots": {"title": "AI"}},
"rationale": "",
}),
_record(unit_index=1, proposal={
"proposal_kind": "builder_options_patch",
"payload": {"font_size_px": 14},
"rationale": "",
}),
_record(unit_index=2, proposal=None),
]
zones = [
_zone(position="top", slot_payload={"title": "det"}),
_zone(position="middle", slot_payload={"title": "det"}),
_zone(position="bottom", slot_payload={"title": "det"}),
]
_apply_ai_repair_proposals_to_zones(
records, ["top", "middle", "bottom"], zones,
)
assert [r["apply_status"] for r in records] == [
"applied:partial_overrides",
"unsupported_kind_for_reject_route:builder_options_patch",
"no_proposal",
]
assert zones[0]["slot_payload"]["title"] == "AI"
assert zones[1]["slot_payload"]["title"] == "det"
assert zones[2]["slot_payload"]["title"] == "det"

View File

@@ -0,0 +1,154 @@
"""IMP-47B u4 + u6 — Step 12 AI repair wiring + audit artifact tests.
Scope (this slice):
* u4 — Helper ``_run_step12_ai_repair`` (src/phase_z2_pipeline.py)
wires the pipeline's local route-hint helper (``_imp05_route_hint``),
the frame contract loader (``get_contract``), and a
templates/phase_z2/families partial reader
(``_load_frame_partial_html``) into
``gather_step12_ai_repair_proposals``.
* u6 — The gather records flow into ``_write_step_artifact`` under
``step12_ai_repair.json``. The audit shape must stay
JSON-serialisable (no Pydantic / dataclass leakage) so the artifact
write never raises on real runs.
The router short-circuits when ``settings.ai_fallback_enabled`` is
False (default), so AI=0 for non-AI-route units stays a structural
guarantee. Synthetic naming mirrors tests/test_imp47b_override_provisional.py
(MOCK_ prefix; no real catalog template_id / frame_id leakage).
u5 (PARTIAL_OVERRIDES apply), u7 (coverage invariant), and u8
(slide_status surfacing) are out of scope for this unit.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from src.phase_z2_pipeline import (
_load_frame_partial_html,
_run_step12_ai_repair,
_write_step_artifact,
)
@dataclass
class _StubUnit:
label: str | None
provisional: bool
frame_template_id: str = "MOCK_T_x"
frame_id: str = "MOCK_F_x"
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
raw_content: str = "MOCK_raw"
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = ""
zone_position: str = ""
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 0
# ─── Case 1 : mixed units → per-unit skip_reason classification ─────
def test_mixed_units_classified_by_route_and_provisional_flag():
"""Reject + restructure provisional both route to ai_adaptation;
use_as_is / light_edit / non-provisional skip without router call.
With ai_fallback_enabled=False (default) the router returns None,
so the two ai_adaptation provisional units record
``skip_reason='router_short_circuit'``; the rest record their
structural skip_reason (not_provisional / route_not_ai_adaptation).
"""
units = [
_StubUnit(label="use_as_is", provisional=False),
_StubUnit(label="light_edit", provisional=True),
_StubUnit(label="restructure", provisional=True),
_StubUnit(label="reject", provisional=True),
_StubUnit(label="restructure", provisional=False),
]
records = _run_step12_ai_repair(units)
assert [r["skip_reason"] for r in records] == [
"not_provisional",
"route_not_ai_adaptation:deterministic_minor_adjustment",
"router_short_circuit",
"router_short_circuit",
"not_provisional",
]
assert [r["route_hint"] for r in records] == [
"direct_render",
"deterministic_minor_adjustment",
"ai_adaptation_required",
"ai_adaptation_required",
"ai_adaptation_required",
]
assert all(r["ai_called"] is False for r in records)
# ─── Case 2 : reject provisional unit reaches AI gate ───────────────
def test_reject_provisional_unit_reaches_router_short_circuit():
"""Reject + provisional → route_hint=ai_adaptation_required.
Router short-circuit (flag-off default) is the only thing keeping
AI from firing; the wiring proves reject is no longer blocked by
Step 12's bespoke design_reference_only skip (removed by u2).
"""
records = _run_step12_ai_repair([_StubUnit(label="reject", provisional=True)])
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["skip_reason"] == "router_short_circuit"
assert records[0]["ai_called"] is False
# cache_key / fingerprints populated only after the route + provisional
# gates pass — confirms gather reached the AI-eligible code path.
assert records[0]["cache_key"] is not None
assert records[0]["fingerprints"] is not None
# ─── Case 3 : frame visual loader degrades on missing partial ──────
def test_load_frame_partial_html_returns_empty_for_missing_file():
"""__empty__ shell (IMP-30) and any unknown template_id → "".
Keeps gather() crash-free for the IMP-30 first-render-invariant
path where the synthesized empty-shell unit has no families partial.
"""
assert _load_frame_partial_html("__empty__") == ""
assert _load_frame_partial_html("MOCK_T_does_not_exist") == ""
# ─── Case 4 (u6) : audit artifact write is JSON-serialisable ────────
def test_step12_ai_repair_artifact_writes_json_serialisable_records(tmp_path):
"""IMP-47B u6 — gather records feed ``_write_step_artifact`` as the
``step12_ai_repair.json`` audit. Confirms the gather schema contains
only JSON-native primitives (str / int / None / bool / list / dict)
so the artifact write never raises on real runs and the audit
payload preserves per-unit ``route_hint`` / ``skip_reason`` /
``ai_called`` for reviewers.
"""
records = _run_step12_ai_repair([
_StubUnit(label="reject", provisional=True),
_StubUnit(label="use_as_is", provisional=False),
])
fpath = _write_step_artifact(
tmp_path, 12, "ai_repair",
data={"per_unit": records},
outputs=["step12_ai_repair.json"],
)
assert fpath.is_file()
assert fpath.name == "step12_ai_repair.json"
payload = json.loads(fpath.read_text(encoding="utf-8"))
assert payload["step_num"] == 12
assert payload["step_name"] == "ai_repair"
assert payload["step_status"] == "done"
per_unit = payload["data"]["per_unit"]
assert len(per_unit) == 2
assert per_unit[0]["route_hint"] == "ai_adaptation_required"
assert per_unit[0]["skip_reason"] == "router_short_circuit"
assert per_unit[0]["ai_called"] is False
assert per_unit[1]["route_hint"] == "direct_render"
assert per_unit[1]["skip_reason"] == "not_provisional"

View File

@@ -44,3 +44,43 @@ def test_ai_fallback_budget_and_circuit_defaults_locked() -> None:
s = Settings()
assert s.ai_fallback_budget_per_run == 10
assert s.ai_fallback_circuit_breaker_threshold == 5
# IMP-46 u5 — auto-cache opt-in setting default lock.
# The CLI flag ``--auto-cache`` in src/phase_z2_pipeline.py mutates this
# setting at parse time. The default MUST stay OFF so the dual-gate
# contract (visual_check_passed AND user_approved) survives without an
# explicit operator opt-in.
def test_ai_fallback_auto_cache_default_off() -> None:
s = Settings()
assert s.ai_fallback_auto_cache is False, (
"IMP-46 u5 auto-cache MUST default OFF; the dual-gate contract "
"(visual_check_passed AND user_approved) survives without an "
"explicit --auto-cache opt-in."
)
# IMP-47B u1 — reject route hint policy correction.
# Prior to 2026-05-21 the reject V4 label routed to ``design_reference_only``
# (no AI). The user policy correction (issue #76) reroutes reject to
# ``ai_adaptation_required`` so the rank-1 reject frame is kept and the AI
# re-maps MDX content into its declared slots. Activation remains gated by
# ``ai_fallback_enabled`` (default OFF preserves the normal-path AI=0
# contract — see test_ai_fallback_master_flag_default_off above).
def test_reject_route_hint_routes_to_ai_adaptation() -> None:
from src.phase_z2_pipeline import _IMP05_ROUTE_HINTS, _imp05_route_hint
assert _IMP05_ROUTE_HINTS["reject"] == "ai_adaptation_required", (
"IMP-47B u1: reject must route to ai_adaptation_required so the "
"rank-1 reject frame is retained and AI re-maps MDX content into "
"its slots (frame auto-swap forbidden)."
)
assert _imp05_route_hint("reject") == "ai_adaptation_required"
# Sibling routes unchanged — guardrail against accidental drift.
assert _imp05_route_hint("use_as_is") == "direct_render"
assert _imp05_route_hint("light_edit") == "deterministic_minor_adjustment"
assert _imp05_route_hint("restructure") == "ai_adaptation_required"

View File

@@ -237,10 +237,10 @@ def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps
by_rank = {c["rank"]: c for c in candidates}
assert set(by_rank.keys()) == {1, 2, 3}
# rank-1 reject — non-direct, design_reference_only
# rank-1 reject — non-direct, ai_adaptation_required (IMP-47B u1 policy correction)
assert by_rank[1]["v4_label"] == "reject"
assert by_rank[1]["filtered_for_direct_execution"] is True
assert by_rank[1]["route_hint"] == "design_reference_only"
assert by_rank[1]["route_hint"] == "ai_adaptation_required"
# rank-2 restructure — non-direct, ai_adaptation_required
assert by_rank[2]["v4_label"] == "restructure"