"""IMP-46 u2 + u3 + u5 — Persistent JSON cache backend for AI fallback proposals. Replaces the IMP-33 u6 ``NotImplementedError`` stub with a content-addressed store at ``data/frame_cache/{frame_id}/{signature_hash}.json``. Key format: * ``read_proposal(key)`` / ``save_proposal(key, ...)`` accept a string ``key`` of the form ``"{frame_id}::{signature_hash}"``. The two components are parsed inside this module so that upstream callers (router, step 12) remain unaware of the on-disk layout. * ``read_proposal`` on a malformed (legacy) key silently returns ``None`` — the IMP-33 u7 router currently passes a legacy ``cache_key`` string, and u4 will switch to the structural form. Until then, all such reads must miss safely (no exception, no false hit). * ``save_proposal`` on a malformed key raises ``ValueError`` (loud, never silent) — writes are gated and must use the structural form. Stored payload (one JSON file per (frame_id, signature_hash) pair): { "schema_version": 1, "proposal": , "slide_css": , "fingerprints": {"contract_sha": ..., "partial_sha": ..., "catalog_sha": ...} } u3 invalidation contract (this module is a *comparator*, not a *computer*): * ``save_proposal`` persists the ``fingerprints`` dict supplied by the caller verbatim. Cache.py never computes any fingerprint — the three declared shas (``contract_sha`` / ``partial_sha`` / ``catalog_sha``) are computed by callers from the live contract YAML / partial templates / catalog payloads and handed in. Keeping the computation out of cache.py preserves AI isolation (no Phase Z runtime knowledge in the cache module) and keeps the cache schema-agnostic — additional fingerprint axes can be added without editing cache.py. * ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When supplied, the stored ``fingerprints`` dict must equal the caller's dict exactly (strict equality, NOT subset). Any mismatch — including a key the caller demands but the stored entry lacks, OR a key the stored entry has but the caller does not pass — returns ``None``. Default ``fingerprints=None`` performs no comparison (back-compat for legacy callers that have not yet adopted fingerprint-aware lookup). Guardrails (locked by Stage 2 plan): * Both write gates preserved — ``visual_check_passed=False`` always raises ``AiFallbackCacheGateError`` BEFORE any filesystem touch. ``user_approved=False`` also raises by default; the IMP-46 u5 ``auto_cache=True`` override bypasses ONLY the ``user_approved`` gate (``visual_check_passed`` is never bypassed). Gate violation never silently no-ops. * Missing or corrupt files cause ``read_proposal`` to return ``None`` — the cache is a hint, never a hard dependency. Errors are not propagated to callers because the AI fallback path can always recompute. * ``mkdir(parents=True, exist_ok=True)`` is performed lazily on save. * No Anthropic / MDX / Phase Z runtime imports (AI isolation contract). * Cache root is held as a module-level :data:`CACHE_ROOT` so tests can redirect writes via ``monkeypatch.setattr`` without subclassing. u5 auto-cache contract (CLI ``--auto-cache`` + ``settings.ai_fallback_auto_cache``): * ``save_proposal(..., auto_cache=True)`` only bypasses the ``user_approved`` gate; ``visual_check_passed`` remains mandatory. * ``auto_cache`` is keyword-only and defaults to ``False`` — existing callers (and the test suite) see the original dual-gate behaviour unless they opt in explicitly. * The truth table over ``(visual_check_passed, user_approved, auto_cache)`` has eight cells; exactly three succeed: ``(True, True, False)``, ``(True, True, True)``, and ``(True, False, True)``. Every other cell raises ``AiFallbackCacheGateError``. """ from __future__ import annotations import json import pathlib from src.phase_z2_ai_fallback.schema import AiFallbackProposal SCHEMA_VERSION = 1 KEY_DELIMITER = "::" CACHE_ROOT: pathlib.Path = pathlib.Path("data/frame_cache") class AiFallbackCacheGateError(RuntimeError): """Raised when ``save_proposal`` is called without both IMP-46 gates True.""" def _parse_key(key: str) -> tuple[str, str] | None: """Parse a ``frame_id::signature_hash`` key. Returns ``None`` if malformed.""" if KEY_DELIMITER not in key: return None frame_id, _, signature_hash = key.partition(KEY_DELIMITER) if not frame_id or not signature_hash: return None if KEY_DELIMITER in signature_hash: return None return frame_id, signature_hash def _cache_path(frame_id: str, signature_hash: str) -> pathlib.Path: return CACHE_ROOT / frame_id / f"{signature_hash}.json" def read_proposal( key: str, *, fingerprints: dict | None = None, ) -> AiFallbackProposal | None: """Look up a previously cached proposal by ``key``. Returns ``None`` for: * empty / non-string key → ``ValueError`` (loud); * non-dict ``fingerprints`` (when supplied) → ``TypeError`` (loud, symmetric with :func:`save_proposal`); * legacy key format (no ``::`` delimiter) → silent ``None`` (router back-compat until u4 switches to the structural form); * missing file under ``data/frame_cache/{frame_id}/{signature_hash}.json``; * corrupt JSON / payload schema mismatch — read errors never propagate; * ``fingerprints`` supplied AND stored ``fingerprints`` field is not a dict OR does not equal the supplied dict (strict equality, u3 invalidation). """ if not isinstance(key, str) or not key: raise ValueError("cache key must be a non-empty string") if fingerprints is not None and not isinstance(fingerprints, dict): raise TypeError("fingerprints must be a dict or None") parsed = _parse_key(key) if parsed is None: return None frame_id, signature_hash = parsed path = _cache_path(frame_id, signature_hash) if not path.is_file(): return None try: data = json.loads(path.read_text(encoding="utf-8")) except (OSError, json.JSONDecodeError): return None if not isinstance(data, dict): return None if fingerprints is not None: stored = data.get("fingerprints") if not isinstance(stored, dict) or stored != fingerprints: return None proposal_dict = data.get("proposal") if not isinstance(proposal_dict, dict): return None try: return AiFallbackProposal.model_validate(proposal_dict) except Exception: # noqa: BLE001 — corrupt payload must miss, not raise return None def save_proposal( key: str, proposal: AiFallbackProposal, *, visual_check_passed: bool, user_approved: bool, slide_css: str | None = None, fingerprints: dict | None = None, auto_cache: bool = False, ) -> pathlib.Path: """Persist ``proposal`` under ``key`` once the IMP-46 gates clear. Gate contract (IMP-46 u5 truth table): * ``visual_check_passed=False`` -> :class:`AiFallbackCacheGateError` always (never bypassable; ``auto_cache`` cannot override). * ``user_approved=False`` AND ``auto_cache=False`` -> :class:`AiFallbackCacheGateError`. * ``user_approved=False`` AND ``auto_cache=True`` -> bypass the user-approval gate (IMP-46 u5 CLI / settings opt-in). * Otherwise (``visual_check_passed=True`` AND either ``user_approved=True`` OR ``auto_cache=True``) -> persist payload. Gate violations are raised BEFORE any filesystem touch — no parent directory is created, no file is written. When the gates clear the JSON payload (schema_version + proposal + slide_css + fingerprints) is written to ``data/frame_cache/{frame_id}/{signature_hash}.json`` and the resolved :class:`pathlib.Path` is returned. ``slide_css`` may be ``None`` (no slide-level CSS captured) or a string. ``fingerprints`` may be ``None`` (treated as empty dict) or a dict mapping fingerprint name to SHA hex digest. ``auto_cache`` is keyword-only and defaults to ``False``. It is wired from :data:`src.config.settings.ai_fallback_auto_cache`, which the ``--auto-cache`` CLI flag in ``src/phase_z2_pipeline.py`` toggles at parse time. The cache module never reads the setting itself — the caller passes the resolved boolean — so AI-isolation contracts (no Phase Z runtime / no Anthropic import) remain intact. """ if not isinstance(key, str) or not key: raise ValueError("cache key must be a non-empty string") if not isinstance(proposal, AiFallbackProposal): raise TypeError( "proposal must be an AiFallbackProposal instance " f"(got {type(proposal).__name__})" ) if not isinstance(auto_cache, bool): raise TypeError("auto_cache must be a bool") if not visual_check_passed: raise AiFallbackCacheGateError( "IMP-46 gate: visual_check_passed=False; refusing to cache an " "unverified proposal. (auto_cache cannot bypass this gate.)" ) if not user_approved and not auto_cache: raise AiFallbackCacheGateError( "IMP-46 gate: user_approved=False and auto_cache=False; " "refusing to cache without explicit user approval. Pass " "auto_cache=True (or --auto-cache on the CLI) to bypass." ) if slide_css is not None and not isinstance(slide_css, str): raise TypeError("slide_css must be a string or None") if fingerprints is None: fingerprints = {} elif not isinstance(fingerprints, dict): raise TypeError("fingerprints must be a dict or None") parsed = _parse_key(key) if parsed is None: raise ValueError( "cache key must be in " f"'frame_id{KEY_DELIMITER}signature_hash' format; got {key!r}" ) frame_id, signature_hash = parsed path = _cache_path(frame_id, signature_hash) path.parent.mkdir(parents=True, exist_ok=True) payload = { "schema_version": SCHEMA_VERSION, "proposal": proposal.model_dump(mode="json"), "slide_css": slide_css, "fingerprints": dict(fingerprints), } path.write_text( json.dumps(payload, sort_keys=True, ensure_ascii=False, indent=2), encoding="utf-8", ) return path