feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)
- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook - u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage) - u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks) - u12: coverage_invariant guard - u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
213
tests/test_imp47b_cache_save_gate.py
Normal file
213
tests/test_imp47b_cache_save_gate.py
Normal file
@@ -0,0 +1,213 @@
|
||||
"""IMP-47B u13 — Persist validated proposals through ``save_proposal`` after gates.
|
||||
|
||||
Scope (this slice):
|
||||
Verify the new ``_persist_ai_repair_proposals_to_cache`` helper in
|
||||
``src/phase_z2_pipeline.py`` honours the IMP-46 dual-gate truth table
|
||||
on the post-Step-14 cache-save seam. The helper is exercised in
|
||||
isolation (no Selenium, no full pipeline) with synthetic AI repair
|
||||
records that mirror the gather → apply → coverage chain shape
|
||||
produced by IMP-47B u4 / u5 / u7.
|
||||
|
||||
Guardrails proven by this test (IMP-46 + IMP-47B policy bullets):
|
||||
* ``visual_check_passed=False`` always blocks — never bypassable, even
|
||||
when ``auto_cache=True`` (IMP-46 u5 truth table cell).
|
||||
* ``user_approved=False`` AND ``auto_cache=False`` → gate blocked
|
||||
(default pipeline path has no UX approval gate; ``--auto-cache`` is
|
||||
the documented bypass).
|
||||
* ``visual_check_passed=True`` AND ``auto_cache=True`` → proposal
|
||||
persisted on disk under ``data/frame_cache/{frame_id}/{hash}.json``
|
||||
via ``cache.save_proposal``.
|
||||
* Non-applied records (no_proposal / no_zone_match / unsupported /
|
||||
error) → ``cache_save_status='not_applied'`` and NEVER reach
|
||||
``save_proposal`` (no filesystem touch).
|
||||
* Settings axis — ``settings.ai_fallback_auto_cache`` sourced through
|
||||
the helper kwargs, never inlined (hardcoding ban).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_ai_fallback import cache as cache_mod
|
||||
from src.phase_z2_ai_fallback.cache import AiFallbackCacheGateError
|
||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||
from src.phase_z2_pipeline import _persist_ai_repair_proposals_to_cache
|
||||
|
||||
|
||||
def _applied_record(
|
||||
*,
|
||||
cache_key: str = "MOCK_FRAME::deadbeef" + "0" * 56,
|
||||
fingerprints: dict | None = None,
|
||||
slots: dict | None = None,
|
||||
) -> dict:
|
||||
"""Build an IMP-47B u4/u5 shaped record marked ``applied:partial_overrides``."""
|
||||
if fingerprints is None:
|
||||
fingerprints = {"contract_sha": "c1", "partial_sha": "p1", "catalog_sha": "k1"}
|
||||
if slots is None:
|
||||
slots = {"title": "AI repaired", "bullets": ["b1", "b2"]}
|
||||
proposal = AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
||||
payload={"slots": slots},
|
||||
rationale="cache save gate test",
|
||||
)
|
||||
return {
|
||||
"unit_index": 0,
|
||||
"source_section_ids": ["MOCK_S1"],
|
||||
"frame_template_id": "MOCK_FRAME",
|
||||
"label": "reject",
|
||||
"route_hint": "ai_adaptation_required",
|
||||
"provisional": True,
|
||||
"ai_called": True,
|
||||
"skip_reason": None,
|
||||
"proposal": proposal.model_dump(),
|
||||
"error": None,
|
||||
"cache_key": cache_key,
|
||||
"fingerprints": fingerprints,
|
||||
"apply_status": "applied:partial_overrides",
|
||||
}
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _isolate_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
|
||||
"""Redirect ``cache.CACHE_ROOT`` to a per-test tmp dir so save_proposal
|
||||
writes never touch the real ``data/frame_cache/`` tree."""
|
||||
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
|
||||
yield tmp_path / "frame_cache"
|
||||
|
||||
|
||||
def test_visual_check_failed_blocks_save_even_with_auto_cache(_isolate_cache_root):
|
||||
"""visual_check_passed=False is never bypassable — auto_cache cannot override."""
|
||||
record = _applied_record()
|
||||
records = [record]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=False,
|
||||
user_approved=True,
|
||||
auto_cache=True,
|
||||
)
|
||||
assert record["cache_save_status"].startswith("gate_blocked:")
|
||||
assert "visual_check_passed=False" in record["cache_save_status"]
|
||||
# No filesystem write occurred.
|
||||
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||
|
||||
|
||||
def test_user_not_approved_and_no_auto_cache_blocks_save(_isolate_cache_root):
|
||||
"""Default pipeline path (user_approved=False, auto_cache=False) → gate blocked."""
|
||||
record = _applied_record()
|
||||
records = [record]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=True,
|
||||
user_approved=False,
|
||||
auto_cache=False,
|
||||
)
|
||||
assert record["cache_save_status"].startswith("gate_blocked:")
|
||||
assert "user_approved=False" in record["cache_save_status"]
|
||||
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||
|
||||
|
||||
def test_visual_passed_and_auto_cache_persists_proposal(_isolate_cache_root):
|
||||
"""Happy path — visual_check_passed=True + auto_cache=True persists JSON."""
|
||||
record = _applied_record()
|
||||
records = [record]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=True,
|
||||
user_approved=False,
|
||||
auto_cache=True,
|
||||
)
|
||||
assert record["cache_save_status"] == "saved"
|
||||
written = list(_isolate_cache_root.rglob("*.json"))
|
||||
assert len(written) == 1
|
||||
# Layout = {CACHE_ROOT}/{frame_id}/{signature_hash}.json.
|
||||
written_path = written[0]
|
||||
assert written_path.parent.name == "MOCK_FRAME"
|
||||
|
||||
|
||||
def test_non_applied_records_are_skipped_without_filesystem_touch(_isolate_cache_root):
|
||||
"""no_proposal / no_zone_match / unsupported_kind / error → never reach save_proposal."""
|
||||
no_proposal_record = {
|
||||
"unit_index": 0,
|
||||
"apply_status": "no_proposal",
|
||||
"proposal": None,
|
||||
"cache_key": None,
|
||||
"fingerprints": None,
|
||||
}
|
||||
no_zone_record = {
|
||||
"unit_index": 1,
|
||||
"apply_status": "no_zone_match",
|
||||
"proposal": {"proposal_kind": "partial_overrides", "payload": {"slots": {}}, "rationale": ""},
|
||||
"cache_key": "MOCK::abc",
|
||||
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||
}
|
||||
unsupported_record = {
|
||||
"unit_index": 2,
|
||||
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
|
||||
"proposal": {"proposal_kind": "builder_options_patch", "payload": {}, "rationale": ""},
|
||||
"cache_key": "MOCK::def",
|
||||
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||
}
|
||||
error_record = {
|
||||
"unit_index": 3,
|
||||
"apply_status": None,
|
||||
"proposal": None,
|
||||
"cache_key": "MOCK::ghi",
|
||||
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||
"error": "RuntimeError: boom",
|
||||
}
|
||||
records = [no_proposal_record, no_zone_record, unsupported_record, error_record]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=True,
|
||||
user_approved=True,
|
||||
auto_cache=True,
|
||||
)
|
||||
for r in records:
|
||||
assert r["cache_save_status"] == "not_applied"
|
||||
# Zero JSON files written because none of the records were applied.
|
||||
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||
|
||||
|
||||
def test_mixed_records_only_persist_applied_ones(_isolate_cache_root):
|
||||
"""Mixed batch — only the ``applied:`` record is persisted."""
|
||||
applied = _applied_record(cache_key="MOCK_FRAME::aaaaaaaa" + "0" * 56)
|
||||
not_applied = {
|
||||
"unit_index": 1,
|
||||
"apply_status": "no_proposal",
|
||||
"proposal": None,
|
||||
"cache_key": None,
|
||||
"fingerprints": None,
|
||||
}
|
||||
records = [applied, not_applied]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=True,
|
||||
user_approved=False,
|
||||
auto_cache=True,
|
||||
)
|
||||
assert applied["cache_save_status"] == "saved"
|
||||
assert not_applied["cache_save_status"] == "not_applied"
|
||||
written = list(_isolate_cache_root.rglob("*.json"))
|
||||
assert len(written) == 1
|
||||
|
||||
|
||||
def test_invalid_proposal_payload_surfaces_without_raising(_isolate_cache_root):
|
||||
"""Malformed ``proposal`` dict → ``cache_save_status='invalid_proposal:...'``,
|
||||
no filesystem write, no exception bubbling into the pipeline runtime."""
|
||||
bad_record = {
|
||||
"unit_index": 0,
|
||||
"apply_status": "applied:partial_overrides",
|
||||
"proposal": {"proposal_kind": "not_a_valid_enum_value", "payload": {}, "rationale": ""},
|
||||
"cache_key": "MOCK::bad",
|
||||
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||
}
|
||||
records = [bad_record]
|
||||
_persist_ai_repair_proposals_to_cache(
|
||||
records,
|
||||
visual_check_passed=True,
|
||||
user_approved=True,
|
||||
auto_cache=True,
|
||||
)
|
||||
assert bad_record["cache_save_status"].startswith("invalid_proposal:")
|
||||
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||
Reference in New Issue
Block a user