diff --git a/.gitignore b/.gitignore index 7d7a50f..5fcaeff 100644 --- a/.gitignore +++ b/.gitignore @@ -8,7 +8,11 @@ dist/ build/ .venv/ node_modules/ -data/ +data/* +# IMP-46 u6 — track only the frame_cache directory marker; cached payloads stay ignored. +!data/frame_cache/ +data/frame_cache/* +!data/frame_cache/.gitkeep # session workspace (push X — 작업 흐름 trace, 사용자 결정 2026-05-08) forex/ diff --git a/docs/architecture/IMP-17-CARVE-OUT.md b/docs/architecture/IMP-17-CARVE-OUT.md index 555111a..533a5af 100644 --- a/docs/architecture/IMP-17-CARVE-OUT.md +++ b/docs/architecture/IMP-17-CARVE-OUT.md @@ -51,5 +51,5 @@ Phase Q `content_editor.py` 는 **Archive Candidate** ([`PHASE-Q-AUDIT.md`](PHAS | Step 12 entry | `src.phase_z2_ai_fallback.step12.gather_step12_ai_repair_proposals` — IMP-30 provisional gate (`not_provisional` skip) AND reject gate (`design_reference_only_no_ai` skip) AND non-AI route catch-all run BEFORE `route_ai_fallback`. | | Step 17 entry | `src.phase_z2_ai_fallback.step17.gather_step17_ai_repair_proposals` — STRUCTURALLY BLOCKED. Every unit returns `skip_reason="step17_ai_blocked_imp_34_35_prerequisites_missing"`. Module does NOT import `route_ai_fallback` / `AiFallbackClient` / `anthropic`. | | Cascade order | `src.phase_z2_ai_fallback.step17.OVERFLOW_CASCADE_ORDER = (DETERMINISTIC, POPUP, AI_REPAIR, USER_OVERRIDE)` — single source of truth for Step 17 consumers. Aligns with line 16 of this doc. | -| IMP-46 cache gate | `src.phase_z2_ai_fallback.cache.save_proposal(..., visual_check_passed, user_approved)` raises `AiFallbackCacheGateError` unless BOTH gates are True; storage backend then raises `NotImplementedError` (IMP-46 marker). `read_proposal` returns `None` until IMP-46 lands a backend. | +| IMP-46 cache gate | `src.phase_z2_ai_fallback.cache.save_proposal(..., visual_check_passed, user_approved, auto_cache=False)` raises `AiFallbackCacheGateError` unless `visual_check_passed=True` AND (`user_approved=True` OR `auto_cache=True`). Persistent JSON backend at `data/frame_cache/{frame_id}/{signature_hash}.json` (u2); cache key = structural signature over 8 axes (u1+u4); read-side fingerprint invalidation via `read_proposal(..., fingerprints=...)` strict equality (u3); `--auto-cache` CLI flag + `settings.ai_fallback_auto_cache` (default `False`) bypasses ONLY the `user_approved` gate (u5); repo root tracked via `data/frame_cache/.gitkeep` with cached payloads git-ignored (u6). `read_proposal` returns `None` on missing / corrupt / fingerprint-mismatched entries — cache is a hint, never a hard dependency. | | AST isolation | `tests/phase_z2_ai_fallback/test_ast_isolation.py` parses every `*.py` under `src/phase_z2_ai_fallback/` and forbids Phase Q runtime / Kei client / `src.phase_z2_*` (non-fallback) imports. Whitelist = `src.config` + intra-package + stdlib + `anthropic` + `pydantic`. | diff --git a/src/config.py b/src/config.py index f9de404..fec6a38 100644 --- a/src/config.py +++ b/src/config.py @@ -26,6 +26,14 @@ class Settings(BaseSettings): ai_fallback_budget_per_run: int = 10 ai_fallback_circuit_breaker_threshold: int = 5 + # IMP-46 u5 — auto-cache flag. When True, `save_proposal` bypasses the + # `user_approved` gate only (`visual_check_passed` is never bypassed). + # Default OFF preserves the dual-gate contract; the CLI flag + # `--auto-cache` in `src/phase_z2_pipeline.py` mutates this setting at + # parse time. Downstream callers MUST source the flag from Settings, + # never inline literals. + ai_fallback_auto_cache: bool = False + model_config = {"env_file": ".env", "env_file_encoding": "utf-8"} diff --git a/src/phase_z2_ai_fallback/cache.py b/src/phase_z2_ai_fallback/cache.py index 9ceba97..43debb9 100644 --- a/src/phase_z2_ai_fallback/cache.py +++ b/src/phase_z2_ai_fallback/cache.py @@ -1,48 +1,158 @@ -"""IMP-33 u6 — AI fallback proposal cache (IMP-46 gate, no persistent storage). +"""IMP-46 u2 + u3 + u5 — Persistent JSON cache backend for AI fallback proposals. -This module defines the cache contract that IMP-33 callers use to remember -AI fallback proposals across runs. The persistent storage layer itself is -out-of-scope for IMP-33 and is owned by IMP-46 (frame transformation cache). +Replaces the IMP-33 u6 ``NotImplementedError`` stub with a content-addressed +store at ``data/frame_cache/{frame_id}/{signature_hash}.json``. -Behaviour locked by Stage 2 plan (u6): +Key format: -* ``read_proposal(key)`` always returns ``None`` until IMP-46 lands a - persistent backend. Callers MUST handle the cache-miss path. -* ``save_proposal(key, proposal, *, visual_check_passed, user_approved)`` - enforces the IMP-46 gate before any storage write is attempted: +* ``read_proposal(key)`` / ``save_proposal(key, ...)`` accept a string ``key`` + of the form ``"{frame_id}::{signature_hash}"``. The two components are + parsed inside this module so that upstream callers (router, step 12) + remain unaware of the on-disk layout. +* ``read_proposal`` on a malformed (legacy) key silently returns ``None`` + — the IMP-33 u7 router currently passes a legacy ``cache_key`` string, + and u4 will switch to the structural form. Until then, all such reads + must miss safely (no exception, no false hit). +* ``save_proposal`` on a malformed key raises ``ValueError`` (loud, never + silent) — writes are gated and must use the structural form. - - ``visual_check_passed=False`` -> ``AiFallbackCacheGateError`` - - ``user_approved=False`` -> ``AiFallbackCacheGateError`` +Stored payload (one JSON file per (frame_id, signature_hash) pair): - Only when BOTH gates are True does control reach the storage layer, - which currently raises ``NotImplementedError`` (the IMP-46 marker). + { + "schema_version": 1, + "proposal": , + "slide_css": , + "fingerprints": {"contract_sha": ..., "partial_sha": ..., "catalog_sha": ...} + } -Guardrails: +u3 invalidation contract (this module is a *comparator*, not a *computer*): -* No Anthropic import; cache is pure proposal bookkeeping. -* No MDX read/write; proposals are u2 ``AiFallbackProposal`` instances. -* No silent persistence: gate violations are loud, not skipped writes - (`feedback_artifact_status_naming`). +* ``save_proposal`` persists the ``fingerprints`` dict supplied by the + caller verbatim. Cache.py never computes any fingerprint — the three + declared shas (``contract_sha`` / ``partial_sha`` / ``catalog_sha``) are + computed by callers from the live contract YAML / partial templates / + catalog payloads and handed in. Keeping the computation out of cache.py + preserves AI isolation (no Phase Z runtime knowledge in the cache + module) and keeps the cache schema-agnostic — additional fingerprint + axes can be added without editing cache.py. +* ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When + supplied, the stored ``fingerprints`` dict must equal the caller's dict + exactly (strict equality, NOT subset). Any mismatch — including a key + the caller demands but the stored entry lacks, OR a key the stored + entry has but the caller does not pass — returns ``None``. Default + ``fingerprints=None`` performs no comparison (back-compat for legacy + callers that have not yet adopted fingerprint-aware lookup). + +Guardrails (locked by Stage 2 plan): + +* Both write gates preserved — ``visual_check_passed=False`` always + raises ``AiFallbackCacheGateError`` BEFORE any filesystem touch. + ``user_approved=False`` also raises by default; the IMP-46 u5 + ``auto_cache=True`` override bypasses ONLY the ``user_approved`` gate + (``visual_check_passed`` is never bypassed). Gate violation never + silently no-ops. +* Missing or corrupt files cause ``read_proposal`` to return ``None`` — + the cache is a hint, never a hard dependency. Errors are not propagated + to callers because the AI fallback path can always recompute. +* ``mkdir(parents=True, exist_ok=True)`` is performed lazily on save. +* No Anthropic / MDX / Phase Z runtime imports (AI isolation contract). +* Cache root is held as a module-level :data:`CACHE_ROOT` so tests can + redirect writes via ``monkeypatch.setattr`` without subclassing. + +u5 auto-cache contract (CLI ``--auto-cache`` + ``settings.ai_fallback_auto_cache``): + +* ``save_proposal(..., auto_cache=True)`` only bypasses the + ``user_approved`` gate; ``visual_check_passed`` remains mandatory. +* ``auto_cache`` is keyword-only and defaults to ``False`` — existing + callers (and the test suite) see the original dual-gate behaviour + unless they opt in explicitly. +* The truth table over ``(visual_check_passed, user_approved, auto_cache)`` + has eight cells; exactly three succeed: + ``(True, True, False)``, ``(True, True, True)``, and + ``(True, False, True)``. Every other cell raises + ``AiFallbackCacheGateError``. """ from __future__ import annotations +import json +import pathlib + from src.phase_z2_ai_fallback.schema import AiFallbackProposal +SCHEMA_VERSION = 1 +KEY_DELIMITER = "::" +CACHE_ROOT: pathlib.Path = pathlib.Path("data/frame_cache") + + class AiFallbackCacheGateError(RuntimeError): """Raised when ``save_proposal`` is called without both IMP-46 gates True.""" -def read_proposal(key: str) -> AiFallbackProposal | None: +def _parse_key(key: str) -> tuple[str, str] | None: + """Parse a ``frame_id::signature_hash`` key. Returns ``None`` if malformed.""" + if KEY_DELIMITER not in key: + return None + frame_id, _, signature_hash = key.partition(KEY_DELIMITER) + if not frame_id or not signature_hash: + return None + if KEY_DELIMITER in signature_hash: + return None + return frame_id, signature_hash + + +def _cache_path(frame_id: str, signature_hash: str) -> pathlib.Path: + return CACHE_ROOT / frame_id / f"{signature_hash}.json" + + +def read_proposal( + key: str, + *, + fingerprints: dict | None = None, +) -> AiFallbackProposal | None: """Look up a previously cached proposal by ``key``. - IMP-33 ships without a persistent backend; this stub always returns - ``None`` so callers exercise the cache-miss path. The persistent - backend will be wired by IMP-46. + Returns ``None`` for: + + * empty / non-string key → ``ValueError`` (loud); + * non-dict ``fingerprints`` (when supplied) → ``TypeError`` (loud, + symmetric with :func:`save_proposal`); + * legacy key format (no ``::`` delimiter) → silent ``None`` (router + back-compat until u4 switches to the structural form); + * missing file under ``data/frame_cache/{frame_id}/{signature_hash}.json``; + * corrupt JSON / payload schema mismatch — read errors never propagate; + * ``fingerprints`` supplied AND stored ``fingerprints`` field is not a + dict OR does not equal the supplied dict (strict equality, + u3 invalidation). """ if not isinstance(key, str) or not key: raise ValueError("cache key must be a non-empty string") - return None + if fingerprints is not None and not isinstance(fingerprints, dict): + raise TypeError("fingerprints must be a dict or None") + parsed = _parse_key(key) + if parsed is None: + return None + frame_id, signature_hash = parsed + path = _cache_path(frame_id, signature_hash) + if not path.is_file(): + return None + try: + data = json.loads(path.read_text(encoding="utf-8")) + except (OSError, json.JSONDecodeError): + return None + if not isinstance(data, dict): + return None + if fingerprints is not None: + stored = data.get("fingerprints") + if not isinstance(stored, dict) or stored != fingerprints: + return None + proposal_dict = data.get("proposal") + if not isinstance(proposal_dict, dict): + return None + try: + return AiFallbackProposal.model_validate(proposal_dict) + except Exception: # noqa: BLE001 — corrupt payload must miss, not raise + return None def save_proposal( @@ -51,13 +161,39 @@ def save_proposal( *, visual_check_passed: bool, user_approved: bool, -) -> None: - """Persist ``proposal`` under ``key`` once both IMP-46 gates are True. + slide_css: str | None = None, + fingerprints: dict | None = None, + auto_cache: bool = False, +) -> pathlib.Path: + """Persist ``proposal`` under ``key`` once the IMP-46 gates clear. - Raises ``AiFallbackCacheGateError`` if either gate is False — the - proposal is NOT written. When both gates are True, storage raises - ``NotImplementedError`` (the IMP-46 persistent backend has not landed - yet). + Gate contract (IMP-46 u5 truth table): + + * ``visual_check_passed=False`` -> :class:`AiFallbackCacheGateError` + always (never bypassable; ``auto_cache`` cannot override). + * ``user_approved=False`` AND ``auto_cache=False`` -> + :class:`AiFallbackCacheGateError`. + * ``user_approved=False`` AND ``auto_cache=True`` -> bypass the + user-approval gate (IMP-46 u5 CLI / settings opt-in). + * Otherwise (``visual_check_passed=True`` AND either + ``user_approved=True`` OR ``auto_cache=True``) -> persist payload. + + Gate violations are raised BEFORE any filesystem touch — no parent + directory is created, no file is written. When the gates clear the + JSON payload (schema_version + proposal + slide_css + fingerprints) + is written to ``data/frame_cache/{frame_id}/{signature_hash}.json`` + and the resolved :class:`pathlib.Path` is returned. + + ``slide_css`` may be ``None`` (no slide-level CSS captured) or a + string. ``fingerprints`` may be ``None`` (treated as empty dict) or a + dict mapping fingerprint name to SHA hex digest. + + ``auto_cache`` is keyword-only and defaults to ``False``. It is wired + from :data:`src.config.settings.ai_fallback_auto_cache`, which the + ``--auto-cache`` CLI flag in ``src/phase_z2_pipeline.py`` toggles at + parse time. The cache module never reads the setting itself — the + caller passes the resolved boolean — so AI-isolation contracts + (no Phase Z runtime / no Anthropic import) remain intact. """ if not isinstance(key, str) or not key: raise ValueError("cache key must be a non-empty string") @@ -66,17 +202,42 @@ def save_proposal( "proposal must be an AiFallbackProposal instance " f"(got {type(proposal).__name__})" ) + if not isinstance(auto_cache, bool): + raise TypeError("auto_cache must be a bool") if not visual_check_passed: raise AiFallbackCacheGateError( "IMP-46 gate: visual_check_passed=False; refusing to cache an " - "unverified proposal." + "unverified proposal. (auto_cache cannot bypass this gate.)" ) - if not user_approved: + if not user_approved and not auto_cache: raise AiFallbackCacheGateError( - "IMP-46 gate: user_approved=False; refusing to cache without " - "explicit user approval." + "IMP-46 gate: user_approved=False and auto_cache=False; " + "refusing to cache without explicit user approval. Pass " + "auto_cache=True (or --auto-cache on the CLI) to bypass." ) - raise NotImplementedError( - "IMP-46 persistent cache storage is not implemented yet; " - "this is the IMP-33 u6 stub marker." + if slide_css is not None and not isinstance(slide_css, str): + raise TypeError("slide_css must be a string or None") + if fingerprints is None: + fingerprints = {} + elif not isinstance(fingerprints, dict): + raise TypeError("fingerprints must be a dict or None") + parsed = _parse_key(key) + if parsed is None: + raise ValueError( + "cache key must be in " + f"'frame_id{KEY_DELIMITER}signature_hash' format; got {key!r}" + ) + frame_id, signature_hash = parsed + path = _cache_path(frame_id, signature_hash) + path.parent.mkdir(parents=True, exist_ok=True) + payload = { + "schema_version": SCHEMA_VERSION, + "proposal": proposal.model_dump(mode="json"), + "slide_css": slide_css, + "fingerprints": dict(fingerprints), + } + path.write_text( + json.dumps(payload, sort_keys=True, ensure_ascii=False, indent=2), + encoding="utf-8", ) + return path diff --git a/src/phase_z2_ai_fallback/signature.py b/src/phase_z2_ai_fallback/signature.py new file mode 100644 index 0000000..8264b8c --- /dev/null +++ b/src/phase_z2_ai_fallback/signature.py @@ -0,0 +1,91 @@ +"""IMP-46 u1 — Frame transformation cache signature builder. + +Deterministic SHA256 over the 8 declared structural axes: + frame_id, v4_label, cardinality, source_shape, + h3_count, char_count_bucket, layout_preset, zone_position + +Guardrails: + * No sample/section identifiers in the signature surface (no-hardcoding lock). + * source_shape constrained to the bullet/paragraph/table/mixed enum. + * char_count_bucket is the *bucket label*; numeric counts must be projected + via :func:`bucket_char_count` before being fed to :func:`build_signature`. + * Schema version is embedded in the hashed payload so a future axis change + breaks the digest by design (cache invalidation on schema bump). +""" +from __future__ import annotations + +import hashlib +import json +from enum import Enum + + +SCHEMA_VERSION = 1 + + +class SourceShape(str, Enum): + BULLET = "bullet" + PARAGRAPH = "paragraph" + TABLE = "table" + MIXED = "mixed" + + +_CHAR_COUNT_BUCKETS: tuple[tuple[int, str], ...] = ( + (50, "0-50"), + (150, "51-150"), + (400, "151-400"), + (1000, "401-1000"), +) +_CHAR_COUNT_BUCKET_OVERFLOW = "1001+" +CHAR_COUNT_BUCKET_LABELS: tuple[str, ...] = tuple( + label for _, label in _CHAR_COUNT_BUCKETS +) + (_CHAR_COUNT_BUCKET_OVERFLOW,) + + +def bucket_char_count(char_count: int) -> str: + """Project a non-negative character count to its fixed bucket label.""" + if isinstance(char_count, bool) or not isinstance(char_count, int): + raise TypeError("char_count must be a non-negative int") + if char_count < 0: + raise ValueError("char_count must be non-negative") + for upper, label in _CHAR_COUNT_BUCKETS: + if char_count <= upper: + return label + return _CHAR_COUNT_BUCKET_OVERFLOW + + +def build_signature( + *, + frame_id: str, + v4_label: str, + cardinality: int | None, + source_shape: SourceShape | str, + h3_count: int, + char_count_bucket: str, + layout_preset: str, + zone_position: str, +) -> str: + """Return a deterministic SHA256 hex digest over the 8 declared axes.""" + if isinstance(source_shape, SourceShape): + source_shape_value = source_shape.value + elif isinstance(source_shape, str): + source_shape_value = SourceShape(source_shape).value + else: + raise TypeError("source_shape must be SourceShape or str") + if char_count_bucket not in CHAR_COUNT_BUCKET_LABELS: + raise ValueError( + f"char_count_bucket={char_count_bucket!r} is not a known bucket " + f"label (expected one of {CHAR_COUNT_BUCKET_LABELS})" + ) + payload = { + "schema_version": SCHEMA_VERSION, + "frame_id": frame_id, + "v4_label": v4_label, + "cardinality": cardinality, + "source_shape": source_shape_value, + "h3_count": h3_count, + "char_count_bucket": char_count_bucket, + "layout_preset": layout_preset, + "zone_position": zone_position, + } + encoded = json.dumps(payload, sort_keys=True, ensure_ascii=False).encode("utf-8") + return hashlib.sha256(encoded).hexdigest() diff --git a/src/phase_z2_ai_fallback/step12.py b/src/phase_z2_ai_fallback/step12.py index 2cb8a98..f0df5a9 100644 --- a/src/phase_z2_ai_fallback/step12.py +++ b/src/phase_z2_ai_fallback/step12.py @@ -1,32 +1,72 @@ -"""IMP-33 u8 — Step 12 AI repair wiring (IMP-30 provisional units only). +"""IMP-33 u8 + IMP-46 u4 — Step 12 AI repair wiring with structural cache key. Phase Z Step 12 = slot_payload (the runtime "light_edit / restructure" surface where AI-assisted frame-aware adaptation is allowed per IMP-17 carve-out). This module is the only call site that pipes Phase Z composition units into -``src.phase_z2_ai_fallback.router.route_ai_fallback``. Two structural gates -preserve the AI isolation contract: +``src.phase_z2_ai_fallback.router.route_ai_fallback``. One structural gate +preserves the AI isolation contract: * IMP-30 provisional gate — units with ``provisional=False`` are skipped before any route classification. AI repair is reserved for first-render invariant survivors (no rank-1 V4 evidence, recovered as provisional). -* Reject gate — units whose V4 label maps to ``design_reference_only`` - (``reject``) are skipped with ``skip_reason="design_reference_only_no_ai"``. - Reject path is design reference only — never an AI call. + +Per IMP-47B u1+u2, the ``reject`` V4 label routes to +``ai_adaptation_required`` (no longer ``design_reference_only``) and is +admitted to the AI repair path; the legacy "reject gate" short-circuit is +removed. Any unit whose ``route_hint`` is not ``ai_adaptation_required`` +still falls through to the catch-all ``route_not_ai_adaptation:`` +skip — that single gate continues to enforce the AI=0 normal path. Combined with the u7 router's flag-off + route-gate short-circuits, the default Phase Z run path performs zero AI calls (PZ-1). Save to cache is NOT performed here — that is the caller's responsibility AFTER ``visual_check_passed=True`` AND ``user_approved=True`` (u6 IMP-46 gate). + +IMP-46 u4 — structural cache key + fingerprints +------------------------------------------------ + +The legacy ``cache_key`` was ``"{template_id}::{sorted(source_section_ids)}"`` +which leaked sample / section identity into the cache surface +(no-hardcoding lock violation: structurally identical content with +different MDX section ids would miss). u4 replaces it with +``"{frame_id}::{signature_hash}"`` where ``signature_hash`` is the +deterministic SHA256 over the 8 declared structural axes (see +``src.phase_z2_ai_fallback.signature``). Per-unit signature inputs are +read from unit attributes: + +* ``cardinality`` (int | None) — also forwarded to ``v4_result`` +* ``layout_preset`` (str) +* ``zone_position`` (str) +* ``source_shape`` (str) — bullet / paragraph / table / mixed +* ``h3_count`` (int) +* ``char_count`` (int) — bucketed via ``bucket_char_count`` + +In parallel the three invalidation fingerprints +(``contract_sha`` / ``partial_sha`` / ``catalog_sha``) are computed and +attached to the record. The cache.py module remains a *comparator* — all +fingerprint *computation* happens here (or via injected loaders) so the +cache schema-agnostic contract is preserved. The router's existing +``read_proposal(cache_key)`` continues to perform exact-match lookup only +(fuzzy is deferred per Stage 2 plan); read-side fingerprint validation +through the router is a follow-up axis. """ from __future__ import annotations +import hashlib +import json from typing import Any, Callable, Iterable from src.phase_z2_ai_fallback.router import route_ai_fallback +from src.phase_z2_ai_fallback.signature import bucket_char_count, build_signature _AI_ADAPTATION_ROUTE = "ai_adaptation_required" -_DESIGN_REFERENCE_ROUTE = "design_reference_only" + + +def _sha256_of(payload: Any) -> str: + """Deterministic SHA256 hex digest over a JSON-serialisable payload.""" + encoded = json.dumps(payload, sort_keys=True, ensure_ascii=False).encode("utf-8") + return hashlib.sha256(encoded).hexdigest() def gather_step12_ai_repair_proposals( @@ -38,6 +78,7 @@ def gather_step12_ai_repair_proposals( figma_partial_loader: Callable[[str], dict] | None = None, internal_region_lookup: Callable[[Any], dict] | None = None, mdx_text_loader: Callable[[Any], str] | None = None, + catalog_sha_loader: Callable[[], str] | None = None, ) -> list[dict]: """Return one record per unit describing the Step 12 AI repair decision. @@ -55,8 +96,16 @@ def gather_step12_ai_repair_proposals( "skip_reason": str | None, "proposal": dict | None, "error": str | None, + "cache_key": str | None, # IMP-46 u4 + "fingerprints": dict | None, # IMP-46 u4 } + ``cache_key`` and ``fingerprints`` are populated only when the unit + reaches the AI-eligible code path (provisional + ai_adaptation route). + Skipped units retain ``None`` for both — the structural axes + (layout_preset / zone_position / source_shape / h3_count / char_count) + are not guaranteed to be set for non-AI paths. + ``ai_called`` is True only when ``route_ai_fallback`` was invoked AND returned a proposal OR raised. Flag-off / route-mismatch returns ``None`` from the router and is surfaced as ``ai_called=False`` with @@ -64,6 +113,9 @@ def gather_step12_ai_repair_proposals( "router decided not to run" from "router ran and returned a proposal". """ records: list[dict] = [] + catalog_sha = ( + catalog_sha_loader() if catalog_sha_loader is not None else "" + ) for index, unit in enumerate(units): label = getattr(unit, "label", None) route_hint = route_for_label(label) @@ -78,15 +130,13 @@ def gather_step12_ai_repair_proposals( "skip_reason": None, "proposal": None, "error": None, + "cache_key": None, + "fingerprints": None, } if not record["provisional"]: record["skip_reason"] = "not_provisional" records.append(record) continue - if route_hint == _DESIGN_REFERENCE_ROUTE: - record["skip_reason"] = "design_reference_only_no_ai" - records.append(record) - continue if route_hint != _AI_ADAPTATION_ROUTE: record["skip_reason"] = f"route_not_ai_adaptation:{route_hint}" records.append(record) @@ -106,15 +156,40 @@ def gather_step12_ai_repair_proposals( if mdx_text_loader is not None else (getattr(unit, "raw_content", "") or "") ) - cache_key = "::".join( - [template_id, ",".join(sorted(record["source_section_ids"]))] + + frame_id_value = getattr(unit, "frame_id", "") or "" + cardinality = getattr(unit, "cardinality", None) + layout_preset = getattr(unit, "layout_preset", "") or "" + zone_position = getattr(unit, "zone_position", "") or "" + source_shape = getattr(unit, "source_shape", "paragraph") or "paragraph" + h3_count = int(getattr(unit, "h3_count", 0) or 0) + char_count = int(getattr(unit, "char_count", 0) or 0) + char_count_bucket = bucket_char_count(char_count) + signature_hash = build_signature( + frame_id=frame_id_value, + v4_label=label or "", + cardinality=cardinality, + source_shape=source_shape, + h3_count=h3_count, + char_count_bucket=char_count_bucket, + layout_preset=layout_preset, + zone_position=zone_position, ) + cache_key = f"{frame_id_value}::{signature_hash}" + fingerprints = { + "contract_sha": _sha256_of(frame_contract), + "partial_sha": _sha256_of(figma_partial_json), + "catalog_sha": catalog_sha, + } + record["cache_key"] = cache_key + record["fingerprints"] = fingerprints + v4_result = { "route": route_hint, "label": label, "frame_id": getattr(unit, "frame_id", None), "rank": getattr(unit, "v4_rank", None), - "cardinality": None, + "cardinality": cardinality, } try: proposal = route_ai_fallback( diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index d869018..06706c3 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -78,6 +78,12 @@ from phase_z2_failure_router import ( from phase_z2_content_extractor import extract_content_objects, extract_rich_content_objects from phase_z2_placement_planner import plan_placement +# IMP-47B u4 — Step 12 AI repair wiring. gather() short-circuits at the +# router when settings.ai_fallback_enabled is False (default), so import +# at module load is safe for the AI=0 normal path (PZ-1). Activation gate +# stays in src/config.py + src/phase_z2_ai_fallback/router.py. +from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals + # ─── Constants ────────────────────────────────────────────────── @@ -569,12 +575,15 @@ def lookup_v4_match( # use_as_is → Phase Z direct render # light_edit → deterministic minor adjustment # restructure → AI-assisted frame-aware adaptation (deferred to IMP-17 — carve-out, AI fallback only, normal path 밖) -# reject → design reference only (deferred to IMP-29 frontend override) +# reject → AI re-construction over the rank-1 reject frame (IMP-47B u1, 2026-05-21); +# policy correction supersedes the legacy "design reference only" disposition. +# Frame visual / contract stays untouched; AI only re-maps MDX content into +# declared slots. Activation still gated by ai_fallback_enabled (default OFF). _IMP05_ROUTE_HINTS: dict[str, str] = { "use_as_is": "direct_render", "light_edit": "deterministic_minor_adjustment", "restructure": "ai_adaptation_required", - "reject": "design_reference_only", + "reject": "ai_adaptation_required", } @@ -585,6 +594,249 @@ def _imp05_route_hint(label: Optional[str]) -> Optional[str]: return _IMP05_ROUTE_HINTS.get(label) +def _load_frame_partial_html(template_id: str) -> str: + """IMP-47B u4 — Read templates/phase_z2/families/{template_id}.html. + + Missing partial (e.g., ``__empty__`` shell from IMP-30) returns an + empty string so gather_step12_ai_repair_proposals can still build a + record with skip_reason without raising on file IO. + """ + partial_path = TEMPLATE_DIR / "families" / f"{template_id}.html" + if not partial_path.is_file(): + return "" + return partial_path.read_text(encoding="utf-8") + + +def _run_step12_ai_repair(units) -> list[dict]: + """IMP-47B u4 — Wire gather_step12_ai_repair_proposals into Step 12. + + Routes provisional units whose IMP-05 hint maps to + ``ai_adaptation_required`` (``restructure`` + ``reject`` per u1) + through ``src.phase_z2_ai_fallback.router``. Normal-path units + (``use_as_is`` / ``light_edit`` / non-provisional) record a + skip_reason without invoking the router; flag-off runs short-circuit + at the router (``settings.ai_fallback_enabled=False`` default). + Returns the per-unit record list — u5 consumes records for + PARTIAL_OVERRIDES apply and u6 writes the audit artifact. + """ + return gather_step12_ai_repair_proposals( + units, + route_for_label=_imp05_route_hint, + get_contract_fn=get_contract, + frame_visual_loader=_load_frame_partial_html, + ) + + +_REJECT_SUPPORTED_PROPOSAL_KINDS: frozenset[str] = frozenset({"partial_overrides"}) + + +def _apply_ai_repair_proposals_to_zones( + ai_repair_records: list[dict], + unit_positions: list[str], + zones_data: list[dict], +) -> None: + """IMP-47B u5 — Apply PARTIAL_OVERRIDES into zones_data.slot_payload. + + Mutates each record's ``apply_status`` in place and merges + ``proposal.payload.slots`` into the matching zone. Out-of-scope + kinds (``builder_options_patch``, ``slot_mapping_proposal``) + loud-fail with ``unsupported_kind_for_reject_route:`` — zones + untouched (human_review surfacing → u8). IMP-33 u5 validator + guarantees declared-slot completeness, so ``dict.update`` is the + structural merge (``feedback_ai_isolation_contract``). + """ + zone_by_position = {z["position"]: z for z in zones_data} + for record in ai_repair_records: + proposal = record.get("proposal") + if proposal is None: + record["apply_status"] = "no_proposal" + continue + kind = proposal.get("proposal_kind") + if kind not in _REJECT_SUPPORTED_PROPOSAL_KINDS: + record["apply_status"] = f"unsupported_kind_for_reject_route:{kind}" + print( + f" [ai-repair-apply] unit {record['unit_index']} " + f"proposal_kind='{kind}' out-of-scope for reject route — " + "skipping apply; human_review required.", + file=sys.stderr, + ) + continue + unit_index = record["unit_index"] + position = ( + unit_positions[unit_index] + if 0 <= unit_index < len(unit_positions) else None + ) + zone = zone_by_position.get(position) if position is not None else None + if zone is None: + record["apply_status"] = "no_zone_match" + continue + slots = (proposal.get("payload") or {}).get("slots") or {} + zone["slot_payload"].update(slots) + record["apply_status"] = "applied:partial_overrides" + + +def _check_post_ai_coverage_invariant( + units, + ai_repair_records: list[dict], +) -> dict: + """IMP-47B u7 — Verify AI repair preserved every source_section_id. + + Compares the union of unit-level ``source_section_ids`` (pre-AI) to + the union present on ``ai_repair_records`` post-apply. Per the AI + isolation contract + dropped 절대 룰 + (``feedback_ai_isolation_contract``), AI repair never removes a + unit's section coverage. Any divergence indicates a regression that + u8 surfaces through ``slide_status.ai_repair_status``. The check is + structural (set membership); the per-record ``source_section_ids`` + list is a copy populated by ``gather_step12_ai_repair_proposals`` + (``step12.py:124``) so apply mutations cannot silently drop it. + """ + pre_ai_ids: set[str] = set() + for unit in units: + pre_ai_ids.update(getattr(unit, "source_section_ids", []) or []) + post_ai_ids: set[str] = set() + for record in ai_repair_records: + post_ai_ids.update(record.get("source_section_ids") or []) + dropped = sorted(pre_ai_ids - post_ai_ids) + return { + "pre_ai_section_ids": sorted(pre_ai_ids), + "post_ai_section_ids": sorted(post_ai_ids), + "dropped_section_ids": dropped, + "status": "ok" if not dropped else "violated", + } + + +def _persist_ai_repair_proposals_to_cache( + ai_repair_records: list[dict], + *, + visual_check_passed: bool, + user_approved: bool, + auto_cache: bool, +) -> None: + """IMP-47B u13 — Persist applied AI repair proposals through IMP-46 gates. + + Mutates each record in place with a ``cache_save_status`` axis. + Only records whose ``apply_status`` starts with ``"applied:"`` and + that still carry the original ``cache_key`` + ``fingerprints`` + a + serialized ``proposal`` dict are eligible — everything else marked + ``not_applied``. Eligible records go through + ``cache.save_proposal`` with the IMP-46 dual-gate truth table; the + helper catches :class:`AiFallbackCacheGateError` so a gate block is + surfaced (``gate_blocked:``) without raising into the + pipeline runtime (the cache is a hint, never a hard dependency — + cache.py contract). ``visual_check_passed`` is never bypassable; + ``auto_cache=True`` bypasses ONLY the ``user_approved`` gate per + IMP-46 u5. Pure save layer: no AI call, no MDX touch. + """ + from src.phase_z2_ai_fallback.cache import ( + AiFallbackCacheGateError, + save_proposal, + ) + from src.phase_z2_ai_fallback.schema import AiFallbackProposal + for record in ai_repair_records: + apply_status = record.get("apply_status") or "" + proposal_dict = record.get("proposal") + cache_key = record.get("cache_key") + fingerprints = record.get("fingerprints") + if ( + not apply_status.startswith("applied:") + or not isinstance(proposal_dict, dict) + or not cache_key + or not isinstance(fingerprints, dict) + ): + record["cache_save_status"] = "not_applied" + continue + try: + proposal_obj = AiFallbackProposal.model_validate(proposal_dict) + except Exception as exc: # noqa: BLE001 — invalid payload → skip, never raise + record["cache_save_status"] = f"invalid_proposal:{type(exc).__name__}" + continue + try: + save_proposal( + cache_key, + proposal_obj, + visual_check_passed=visual_check_passed, + user_approved=user_approved, + auto_cache=auto_cache, + fingerprints=fingerprints, + ) + except AiFallbackCacheGateError as gate_exc: + record["cache_save_status"] = f"gate_blocked:{gate_exc}" + continue + record["cache_save_status"] = "saved" + + +def _summarize_ai_repair_status( + ai_repair_records: list[dict], + coverage_invariant: dict, +) -> dict: + """IMP-47B u8 — Classify Step 12 AI repair outcomes for slide_status surfacing. + + Reads u4 gather ``error`` + u5 ``apply_status`` + u7 coverage_invariant + to derive a single ``ai_repair_status`` axis attached to + ``slide_status``. Failure-axis priority (highest → lowest): + ``error`` > ``coverage_violated`` > ``unsupported_kind`` > ``applied`` > ``ok``. + ``human_review_required`` flips True on the three failure axes so the + frontend (u11) can surface a notification per the IMP-47B policy + ("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification"). + Pure: no IO, no AI call. + """ + counts = { + "total": len(ai_repair_records), + "applied": 0, + "no_proposal": 0, + "no_zone_match": 0, + "unsupported_kind": 0, + "error": 0, + } + unsupported_records: list[dict] = [] + error_records: list[dict] = [] + for record in ai_repair_records: + if record.get("error"): + counts["error"] += 1 + error_records.append({ + "unit_index": record.get("unit_index"), + "source_section_ids": list(record.get("source_section_ids") or []), + "error": record.get("error"), + }) + continue + apply_status = record.get("apply_status") or "" + if apply_status.startswith("applied:"): + counts["applied"] += 1 + elif apply_status.startswith("unsupported_kind_for_reject_route:"): + counts["unsupported_kind"] += 1 + unsupported_records.append({ + "unit_index": record.get("unit_index"), + "source_section_ids": list(record.get("source_section_ids") or []), + "apply_status": apply_status, + }) + elif apply_status == "no_zone_match": + counts["no_zone_match"] += 1 + else: + counts["no_proposal"] += 1 + coverage_status = (coverage_invariant or {}).get("status", "ok") + dropped = list((coverage_invariant or {}).get("dropped_section_ids") or []) + if counts["error"]: + status = "error" + elif coverage_status != "ok": + status = "coverage_violated" + elif counts["unsupported_kind"]: + status = "unsupported_kind" + elif counts["applied"]: + status = "applied" + else: + status = "ok" + return { + "status": status, + "counts": counts, + "unsupported_kind_records": unsupported_records, + "error_records": error_records, + "coverage_status": coverage_status, + "dropped_section_ids": dropped, + "human_review_required": status in {"error", "coverage_violated", "unsupported_kind"}, + } + + def lookup_v4_match_with_fallback( v4: dict, section_id: str, @@ -878,6 +1130,54 @@ def lookup_v4_candidates( return candidates +def _apply_frame_override_to_unit(unit, new_tid: str, v4: dict) -> str: + """IMP-47B u3 — apply a frame override to *unit* in place. + + Returns a meta_source string for the override book-keeping. Three + probe layers, in order: + + 1. ``unit.v4_candidates`` (non-reject, max_n bounded). Copies + frame_id / frame_number / confidence / label from the matching + candidate so Step 9 metadata stays consistent. Returns + ``"v4_candidates"``. + 2. Full 32 V4 judgments (reject inclusive). When the override + target matches a reject judgment for the unit's primary section, + the unit is promoted to ``provisional=True`` with ``label="reject"`` + so Step 12 (IMP-47B u4) admits the AI repair path. Returns + ``"v4_reject_judgment_provisional"``. + 3. Raw fall-through. Updates only ``frame_template_id``; returns + ``"raw_template_id_only"``. + + Frame visual / contract stay untouched per the AI isolation contract + (frame auto-swap forbidden — AI re-places content into the existing + frame only). The caller validates catalog contract presence before + invoking this helper. + """ + for cand in (unit.v4_candidates or []): + if getattr(cand, "template_id", None) == new_tid: + unit.frame_template_id = cand.template_id + unit.frame_id = cand.frame_id + unit.frame_number = cand.frame_number + unit.confidence = cand.confidence + unit.label = cand.label + return "v4_candidates" + primary_sid = ( + unit.source_section_ids[0] if unit.source_section_ids else None + ) + if primary_sid: + for j in lookup_v4_all_judgments(v4, primary_sid): + if j.template_id == new_tid and j.label == "reject": + unit.frame_template_id = j.template_id + unit.frame_id = j.frame_id + unit.frame_number = j.frame_number + unit.confidence = j.confidence + unit.label = "reject" + unit.provisional = True + return "v4_reject_judgment_provisional" + unit.frame_template_id = new_tid + return "raw_template_id_only" + + # ─── Content weight + zone layout 계산 ───────────────────────── # layout preset 선택은 phase_z2_composition.select_layout_preset (composition v0) 가 담당. # 본 모듈의 select_layout_preset 은 이전 단순 count-based 구현이었고 dead code 로 제거 (2026-04-29). @@ -3336,6 +3636,57 @@ def run_phase_z2_mvp1( ), } + # IMP-47B u12 — mixed direct+reject first-render admission. + # When initial plan_composition produces a viable layout but at least one + # section remains uncovered (typically chain_exhausted / reject), re-run + # with allow_provisional in the lookup + allow_provisional_fill=True so + # reject sections gain a provisional rank-1 V4Match and a last-resort + # provisional candidate fill. This admits the mixed direct+reject case + # to the AI repair path (IMP-47B u4/u5) on first render. Skipped under + # --override-section-assignments to preserve the operator's plan and + # mirror the IMP-30 u4 retry's section_assignment_plan gate. All-direct + # slides have no uncovered sections so this is a no-op. The all-reject + # case is still handled by the IMP-30 u4 retry block below (initial + # plan_composition returns units=[]). + if units and layout_preset is not None and not override_section_assignments: + _u12_covered_ids: set[str] = set() + for _u in units: + _u12_covered_ids.update(_u.source_section_ids) + _u12_uncovered_ids = [ + s.section_id for s in sections if s.section_id not in _u12_covered_ids + ] + if _u12_uncovered_ids: + def _lookup_fn_mixed_admission(sid: str) -> Optional[V4Match]: + match, trace = lookup_v4_match_with_fallback( + v4, + sid, + raw_content=section_content_by_id.get(sid), + alias_keys=section_alias_by_id.get(sid), + allow_provisional=True, + ) + v4_fallback_traces[sid] = trace + return match + + units_mixed, layout_preset_mixed, _comp_debug_mixed = plan_composition( + sections, + _lookup_fn_mixed_admission, + V4_LABEL_TO_PHASE_Z_STATUS, + MVP1_ALLOWED_STATUSES, + capacity_fit_fn=compute_capacity_fit, + v4_candidates_lookup_fn=candidates_lookup_fn, + allow_provisional_fill=True, + ) + if units_mixed and layout_preset_mixed is not None: + units = units_mixed + layout_preset = layout_preset_mixed + comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values()) + comp_debug["imp47b_u12_mixed_admission"] = { + "applied": True, + "uncovered_before": _u12_uncovered_ids, + "result_unit_count": len(units_mixed), + "result_layout_preset": layout_preset_mixed, + } + # ── Step 7-A axis : layout override ── # 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경. # 길이 mismatch (positions count vs unit count) 는 zone loop 의 fallback (zone_{i}) @@ -3684,7 +4035,10 @@ def run_phase_z2_mvp1( # {unit_id: template_id} 형식. unit_id 매칭 시 unit.frame_template_id 강제 변경. # v4_candidates 안에서 같은 template_id 를 가진 entry 를 찾으면 frame_id / # frame_number / confidence / label 까지 그 entry 에서 가져와 갱신 — 그래야 step09 - # artifact 의 메타가 일관됨. + # artifact 의 메타가 일관됨. IMP-47B u3 (2026-05-21) : v4_candidates miss 시 + # 전 32 judgments 까지 probe — reject 라벨 frame 을 사용자가 선택한 경우 + # unit 을 provisional=True 로 승격해 Step 12 AI 재구성 게이트를 통과시킴 + # (frame 유지, 자동 frame swap 금지 — [[feedback_ai_isolation_contract]]). # frame contract 가 catalog 에 등록 안 된 template_id 면 skip + warning — # crash 방지 (V4 score 는 매겨지지만 catalog partial 은 없는 후보 존재). frame_overrides_applied: list[dict] = [] @@ -3713,21 +4067,7 @@ def run_phase_z2_mvp1( file=sys.stderr, ) continue - match = None - for cand in (unit.v4_candidates or []): - if getattr(cand, "template_id", None) == new_tid: - match = cand - break - if match is not None: - unit.frame_template_id = match.template_id - unit.frame_id = match.frame_id - unit.frame_number = match.frame_number - unit.confidence = match.confidence - unit.label = match.label - meta_source = "v4_candidates" - else: - unit.frame_template_id = new_tid - meta_source = "raw_template_id_only" + meta_source = _apply_frame_override_to_unit(unit, new_tid, v4) frame_overrides_applied.append({ "unit_id": unit_id, "from": old_tid, @@ -4329,6 +4669,58 @@ def run_phase_z2_mvp1( note="B4 PlacementPlan slot_assignments — render path 미연결. 실제 render slot 매핑은 mapper.py 의 builder.", ) + # ─── Step 12 IMP-47B u4 — AI repair proposal gather ─── + # Wire gather_step12_ai_repair_proposals so reject / restructure + # provisional units reach the AI fallback router. Normal-path units + # (use_as_is / light_edit / non-provisional) skip via the catch-all + # route gate; flag-off runs short-circuit at the router. Stored locally + # for u5 (PARTIAL_OVERRIDES apply) + u6 (step12_ai_repair.json audit). + ai_repair_records = _run_step12_ai_repair(units) + + # ─── Step 12 IMP-47B u5 — Apply PARTIAL_OVERRIDES proposals ─── + # Mirror the per-unit position derivation from the render loop above + # (L3789-3796); apply merges slots into zone slot_payload, loud-fails + # unsupported kinds via apply_status marker. + unit_positions: list[str] = [] + for _i, _unit in enumerate(units): + _pos = positions[_i] if _i < len(positions) else f"zone_{_i}" + _plan_record = render_record_by_unit_id.get(id(_unit)) + if _plan_record is not None and _plan_record.get("position"): + _pos = _plan_record["position"] + unit_positions.append(_pos) + _apply_ai_repair_proposals_to_zones(ai_repair_records, unit_positions, zones_data) + + # ─── Step 12 IMP-47B u7 — Post-AI source_section_ids coverage invariant ─── + # Structural defense: AI repair must not silently drop a unit's + # source_section_ids. dropped 절대 룰 — text_block / table / image / + # details deletion forbidden. Result feeds u6 audit (below) and + # u8 slide_status.ai_repair_status surfacing. + ai_repair_coverage_invariant = _check_post_ai_coverage_invariant( + units, ai_repair_records, + ) + + # ─── Step 12 IMP-47B u6 — AI repair audit artifact ─── + # Persist per-unit gather/apply outcomes (route_hint, skip_reason, + # apply_status, ai_called, proposal kind, cache_key, fingerprints) + # so reviewers can audit which units reached the AI fallback router + # and what happened. Flag-off default → every record has + # ai_called=False + apply_status='no_proposal'; flag-on + + # provisional reject/restructure → router_short_circuit (cache miss + # without client) or applied:partial_overrides (cache hit / live AI). + # u7 coverage_invariant rides alongside per_unit for reviewers. + _write_step_artifact( + run_dir, 12, "ai_repair", + data={ + "per_unit": ai_repair_records, + "coverage_invariant": ai_repair_coverage_invariant, + }, + step_status="done", + pipeline_path_connected=True, + inputs=["step10_frame_contract.json", "step02_normalized.json"], + outputs=["step12_ai_repair.json"], + note="IMP-47B u6 — Step 12 AI repair gather + apply records per unit (route, skip_reason, apply_status, proposal). u7 coverage_invariant = pre/post AI source_section_ids set comparison.", + ) + # ─── Step 12: Slot Payload (actual values, mapper.py 결과) ─── _write_step_artifact( run_dir, 12, "slot_payload", @@ -4943,6 +5335,24 @@ def run_phase_z2_mvp1( ), ) + # ─── IMP-47B u13: Persist validated AI repair proposals to cache ─── + # Saves each applied PARTIAL_OVERRIDES proposal AFTER Step 14 visual + # check + per IMP-46 dual-gate. ``visual_check_passed`` reads the + # Selenium overflow result; ``auto_cache`` sourced from Settings + # (CLI --auto-cache wires settings.ai_fallback_auto_cache at parse + # time, src/phase_z2_pipeline.py:5631-5633). ``user_approved`` stays + # False — the pipeline has no UX approval gate; the auto_cache + # opt-in is the documented bypass per IMP-46 u5. Gate violations + # surface as ``cache_save_status='gate_blocked:'`` on the + # record (cache is a hint, never a hard dependency). + from src.config import settings as _ai_cache_settings + _persist_ai_repair_proposals_to_cache( + ai_repair_records, + visual_check_passed=bool(overflow.get("passed")), + user_approved=False, + auto_cache=bool(_ai_cache_settings.ai_fallback_auto_cache), + ) + # 10. fit_classifier v0 (A1) — Selenium 결과 → spec §3 category 분류 layer. # *분류만*. action / router / rerender X. behavior 변경 0. fit_classification = classify_visual_runtime_check(overflow, debug_zones) @@ -5126,6 +5536,16 @@ def run_phase_z2_mvp1( debug_zones=debug_zones, ) + # IMP-47B u8 — Surface Step 12 AI repair outcomes through slide_status. + # Composes u4 gather errors + u5 apply_status + u7 coverage_invariant + # into a single ``ai_repair_status`` axis the frontend (u11) reads to + # render human_review notifications. Auto pipeline first + # ([[feedback_auto_pipeline_first]]) — no review_queue insertion; + # explicit status enum + human_review_required flag. + slide_status["ai_repair_status"] = _summarize_ai_repair_status( + ai_repair_records, ai_repair_coverage_invariant, + ) + # ─── Step 20: Slide Status ─── _write_step_artifact( run_dir, 20, "slide_status", @@ -5147,6 +5567,11 @@ def run_phase_z2_mvp1( _aligned = slide_status.get("aligned_section_ids") or [] _covered = slide_status.get("covered_section_ids") or [] _filtered = slide_status.get("filtered_section_ids") or [] + _ai_repair = slide_status.get("ai_repair_status") or {} + _ai_repair_label = ( + f'{_ai_repair.get("status", "?")} ' + f'(human_review_required={_ai_repair.get("human_review_required", False)})' + ) _write_step_html( run_dir, 20, "final_status", title="Final Slide Status", @@ -5161,6 +5586,7 @@ def run_phase_z2_mvp1( f'filtered_section_ids{_filtered}' f'adapter_needed_count{slide_status.get("adapter_needed_count", 0)}' f'content_truncated_count{slide_status.get("content_truncated_count", 0)}' + f'ai_repair_status{_ai_repair_label}' f'' f'

Visual Fail Reasons

{_vfs_html}' f'

Note

{slide_status.get("note", "")}

' @@ -5331,8 +5757,29 @@ if __name__ == "__main__": "--override-section-assignment bottom=03-2,03-3" ), ) + # IMP-46 u5 — auto-cache opt-in. When set, ``cache.save_proposal`` + # bypasses the ``user_approved`` gate only (``visual_check_passed`` + # is never bypassable). Source of truth is + # ``settings.ai_fallback_auto_cache`` (src/config.py); this flag + # mutates the setting in-process so downstream callers read the + # same value through Settings rather than parsing args themselves. + parser.add_argument( + "--auto-cache", + dest="auto_cache", + action="store_true", + default=False, + help=( + "Allow cache.save_proposal to bypass the user_approved gate " + "(visual_check_passed remains mandatory). Sets " + "settings.ai_fallback_auto_cache=True for this run." + ), + ) args = parser.parse_args() + if args.auto_cache: + from src.config import settings as _settings + _settings.ai_fallback_auto_cache = True + overrides_frames: dict[str, str] = {} for ov in args.override_frames: if "=" not in ov: diff --git a/tests/phase_z2_ai_fallback/test_ast_isolation.py b/tests/phase_z2_ai_fallback/test_ast_isolation.py index 8c3e77d..b734287 100644 --- a/tests/phase_z2_ai_fallback/test_ast_isolation.py +++ b/tests/phase_z2_ai_fallback/test_ast_isolation.py @@ -36,6 +36,7 @@ _ALLOWED_TOP_LEVEL: frozenset[str] = frozenset( "ast", "dataclasses", "enum", + "hashlib", "json", "pathlib", "random", diff --git a/tests/phase_z2_ai_fallback/test_cache.py b/tests/phase_z2_ai_fallback/test_cache.py index b3d1dd9..f3576b8 100644 --- a/tests/phase_z2_ai_fallback/test_cache.py +++ b/tests/phase_z2_ai_fallback/test_cache.py @@ -1,32 +1,67 @@ -"""IMP-33 u6 — AI fallback cache gate tests. +"""IMP-46 u2 — Persistent JSON cache backend tests. -Verifies the IMP-46 gate contract: - * ``read_proposal`` is a stub (returns None until IMP-46). - * ``save_proposal`` enforces both gates before any write attempt. - * Storage itself raises NotImplementedError (IMP-46 marker). +Scope (Stage 2 plan, u2): + +* Replaced ``NotImplementedError`` marker with a real persistent backend + at ``data/frame_cache/{frame_id}/{signature_hash}.json``. +* Preserved IMP-33 u6 dual write gate: ``visual_check_passed`` AND + ``user_approved`` BOTH required (loud :class:`AiFallbackCacheGateError` + before any filesystem touch). +* Round-trip every :class:`ProposalKind`; round-trip ``slide_css`` None + *and* set; missing or corrupt files miss silently. +* Fingerprint *comparison* is u3; here we only check that the field is + persisted. + +All filesystem writes are scoped to ``tmp_path`` via +``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`, so the +production directory is never touched by these tests. """ from __future__ import annotations +import json +import pathlib + import pytest +from src.phase_z2_ai_fallback import cache as cache_mod from src.phase_z2_ai_fallback.cache import ( AiFallbackCacheGateError, + KEY_DELIMITER, + SCHEMA_VERSION, read_proposal, save_proposal, ) from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind -def _proposal() -> AiFallbackProposal: +_FRAME_ID = "1171281190" +_SIG_HASH = "a" * 64 # SHA256-shaped placeholder; cache is shape-agnostic. +_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}" + + +def _proposal( + kind: ProposalKind = ProposalKind.BUILDER_OPTIONS_PATCH, + payload: dict | None = None, +) -> AiFallbackProposal: return AiFallbackProposal( - proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH, - payload={"item_parser": "bullet_v2"}, - rationale="u6-test", + proposal_kind=kind, + payload=payload if payload is not None else {"item_parser": "bullet_v2"}, + rationale="u2-test", ) -def test_read_proposal_returns_none_for_any_key(): - assert read_proposal("frame=foo|cardinality=3") is None +@pytest.fixture(autouse=True) +def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + """Redirect the cache root to an isolated tmp directory for every test.""" + monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache") + yield tmp_path / "frame_cache" + + +# -- read_proposal -------------------------------------------------------- + + +def test_read_proposal_returns_none_for_missing_file(): + assert read_proposal(_KEY) is None def test_read_proposal_rejects_empty_key(): @@ -34,10 +69,65 @@ def test_read_proposal_rejects_empty_key(): read_proposal("") +def test_read_proposal_rejects_non_string_key(): + with pytest.raises(ValueError): + read_proposal(None) # type: ignore[arg-type] + + +def test_read_proposal_returns_none_for_legacy_key_format(): + """Router back-compat: pre-u4 cache_key (no '::') misses silently.""" + assert read_proposal("frame:1171281190:cardinality:many") is None + + +def test_read_proposal_returns_none_for_corrupt_json(_isolated_cache_root: pathlib.Path): + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("{not valid json", encoding="utf-8") + assert read_proposal(_KEY) is None + + +def test_read_proposal_returns_none_for_non_dict_root(_isolated_cache_root: pathlib.Path): + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("[]", encoding="utf-8") + assert read_proposal(_KEY) is None + + +def test_read_proposal_returns_none_when_payload_proposal_missing( + _isolated_cache_root: pathlib.Path, +): + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps({"schema_version": 1}), encoding="utf-8") + assert read_proposal(_KEY) is None + + +def test_read_proposal_returns_none_for_forbidden_proposal_kind( + _isolated_cache_root: pathlib.Path, +): + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps( + { + "schema_version": 1, + "proposal": {"proposal_kind": "mdx_text", "payload": {}, "rationale": ""}, + "slide_css": None, + "fingerprints": {}, + } + ), + encoding="utf-8", + ) + assert read_proposal(_KEY) is None + + +# -- save_proposal: write gates ------------------------------------------- + + def test_save_rejects_when_visual_check_failed(): with pytest.raises(AiFallbackCacheGateError) as exc: save_proposal( - "k", _proposal(), visual_check_passed=False, user_approved=True + _KEY, _proposal(), visual_check_passed=False, user_approved=True ) assert "visual_check_passed" in str(exc.value) @@ -45,7 +135,7 @@ def test_save_rejects_when_visual_check_failed(): def test_save_rejects_when_user_not_approved(): with pytest.raises(AiFallbackCacheGateError) as exc: save_proposal( - "k", _proposal(), visual_check_passed=True, user_approved=False + _KEY, _proposal(), visual_check_passed=True, user_approved=False ) assert "user_approved" in str(exc.value) @@ -53,16 +143,20 @@ def test_save_rejects_when_user_not_approved(): def test_save_rejects_when_both_gates_false(): with pytest.raises(AiFallbackCacheGateError): save_proposal( - "k", _proposal(), visual_check_passed=False, user_approved=False + _KEY, _proposal(), visual_check_passed=False, user_approved=False ) -def test_save_raises_not_implemented_when_both_gates_pass(): - with pytest.raises(NotImplementedError) as exc: +def test_save_gate_violation_does_not_touch_filesystem( + _isolated_cache_root: pathlib.Path, +): + with pytest.raises(AiFallbackCacheGateError): save_proposal( - "k", _proposal(), visual_check_passed=True, user_approved=True + _KEY, _proposal(), visual_check_passed=False, user_approved=True ) - assert "IMP-46" in str(exc.value) + # Cache root may or may not exist depending on fixture order, but the + # frame_id directory must NOT exist when the gate rejects the write. + assert not (_isolated_cache_root / _FRAME_ID).exists() def test_save_rejects_empty_key(): @@ -75,16 +169,340 @@ def test_save_rejects_empty_key(): def test_save_rejects_non_proposal_object(): with pytest.raises(TypeError): save_proposal( - "k", + _KEY, {"proposal_kind": "builder_options_patch"}, # type: ignore[arg-type] visual_check_passed=True, user_approved=True, ) -def test_gate_error_is_not_notimplementederror(): - with pytest.raises(AiFallbackCacheGateError): +def test_save_rejects_legacy_key_format(): + """Writes must use the structural ``frame_id::signature_hash`` form.""" + with pytest.raises(ValueError): save_proposal( - "k", _proposal(), visual_check_passed=False, user_approved=True + "frame:1171281190:cardinality:many", + _proposal(), + visual_check_passed=True, + user_approved=True, ) + + +def test_save_rejects_slide_css_non_string(): + with pytest.raises(TypeError): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + slide_css=123, # type: ignore[arg-type] + ) + + +def test_save_rejects_fingerprints_non_dict(): + with pytest.raises(TypeError): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=["contract_sha", "abc"], # type: ignore[arg-type] + ) + + +def test_gate_error_is_not_notimplementederror(): + """The persistent backend no longer raises ``NotImplementedError`` — + callers must distinguish gate violation from absent persistence.""" assert not issubclass(AiFallbackCacheGateError, NotImplementedError) + + +# -- save_proposal: persistence + round-trip ------------------------------ + + +def test_save_creates_parent_directories(_isolated_cache_root: pathlib.Path): + assert not (_isolated_cache_root / _FRAME_ID).exists() + save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=True + ) + assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file() + + +def test_save_returns_resolved_path(_isolated_cache_root: pathlib.Path): + path = save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=True + ) + assert path == _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + + +def test_save_payload_includes_schema_version(_isolated_cache_root: pathlib.Path): + path = save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=True + ) + data = json.loads(path.read_text(encoding="utf-8")) + assert data["schema_version"] == SCHEMA_VERSION + + +def test_save_payload_includes_proposal_dump(_isolated_cache_root: pathlib.Path): + proposal = _proposal(payload={"item_parser": "pillar_item"}) + path = save_proposal( + _KEY, proposal, visual_check_passed=True, user_approved=True + ) + data = json.loads(path.read_text(encoding="utf-8")) + assert data["proposal"] == proposal.model_dump(mode="json") + + +def test_round_trip_default_slide_css_is_none(_isolated_cache_root: pathlib.Path): + path = save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=True + ) + data = json.loads(path.read_text(encoding="utf-8")) + assert data["slide_css"] is None + assert data["fingerprints"] == {} + + +def test_round_trip_with_slide_css_set(_isolated_cache_root: pathlib.Path): + css = ".slide { padding: 40px; }" + path = save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + slide_css=css, + ) + data = json.loads(path.read_text(encoding="utf-8")) + assert data["slide_css"] == css + + +def test_round_trip_with_fingerprints(_isolated_cache_root: pathlib.Path): + fingerprints = { + "contract_sha": "c" * 64, + "partial_sha": "p" * 64, + "catalog_sha": "x" * 64, + } + path = save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=fingerprints, + ) + data = json.loads(path.read_text(encoding="utf-8")) + assert data["fingerprints"] == fingerprints + + +def test_read_returns_proposal_after_save(_isolated_cache_root: pathlib.Path): + original = _proposal(payload={"key": "value"}) + save_proposal( + _KEY, original, visual_check_passed=True, user_approved=True + ) + loaded = read_proposal(_KEY) + assert loaded is not None + assert loaded.proposal_kind == original.proposal_kind + assert loaded.payload == original.payload + assert loaded.rationale == original.rationale + + +@pytest.mark.parametrize("kind", list(ProposalKind)) +def test_round_trip_all_proposal_kinds( + kind: ProposalKind, _isolated_cache_root: pathlib.Path +): + """Every whitelisted ProposalKind survives save → read unchanged.""" + if kind is ProposalKind.PARTIAL_OVERRIDES: + payload = {"slots": {"pillar_1": "alpha"}} + elif kind is ProposalKind.SLOT_MAPPING_PROPOSAL: + payload = {"mapping": [{"from": "a", "to": "b"}]} + else: + payload = {"item_parser": "bullet_v2"} + save_proposal( + _KEY, + _proposal(kind=kind, payload=payload), + visual_check_passed=True, + user_approved=True, + ) + loaded = read_proposal(_KEY) + assert loaded is not None + assert loaded.proposal_kind is kind + assert loaded.payload == payload + + +def test_save_overwrites_existing_entry(_isolated_cache_root: pathlib.Path): + save_proposal( + _KEY, + _proposal(payload={"v": 1}), + visual_check_passed=True, + user_approved=True, + ) + save_proposal( + _KEY, + _proposal(payload={"v": 2}), + visual_check_passed=True, + user_approved=True, + ) + loaded = read_proposal(_KEY) + assert loaded is not None + assert loaded.payload == {"v": 2} + + +def test_file_layout_uses_frame_id_directory(_isolated_cache_root: pathlib.Path): + """Storage layout = ``frame_id/`` directory, ``signature_hash.json`` file.""" + other_frame_key = f"{_FRAME_ID}_other{KEY_DELIMITER}{_SIG_HASH}" + save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=True + ) + save_proposal( + other_frame_key, + _proposal(), + visual_check_passed=True, + user_approved=True, + ) + assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file() + assert ( + _isolated_cache_root / f"{_FRAME_ID}_other" / f"{_SIG_HASH}.json" + ).is_file() + + +def test_different_signature_hashes_isolated(_isolated_cache_root: pathlib.Path): + """Two distinct signature hashes under the same frame_id never collide.""" + key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}" + key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}" + save_proposal( + key_a, + _proposal(payload={"sig": "a"}), + visual_check_passed=True, + user_approved=True, + ) + save_proposal( + key_b, + _proposal(payload={"sig": "b"}), + visual_check_passed=True, + user_approved=True, + ) + loaded_a = read_proposal(key_a) + loaded_b = read_proposal(key_b) + assert loaded_a is not None and loaded_a.payload == {"sig": "a"} + assert loaded_b is not None and loaded_b.payload == {"sig": "b"} + + +def test_parse_key_rejects_triple_delimiter(): + """Two ``::`` markers (extra delimiter inside signature) is rejected.""" + assert ( + read_proposal( + f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}{KEY_DELIMITER}extra" + ) + is None + ) + + +# -- IMP-46 u5: auto_cache gate (2^3 truth table) ------------------------- +# +# Three booleans: visual_check_passed (V), user_approved (U), auto_cache (A). +# Contract: V=True AND (U=True OR A=True) -> persist; else gate-raise. +# V is never bypassable; A=True only relaxes U=False. + +_GATE_TRUTH_TABLE = [ + # (V, U, A, expect_persist) + (False, False, False, False), + (False, False, True, False), + (False, True, False, False), + (False, True, True, False), + (True, False, False, False), + (True, False, True, True), + (True, True, False, True), + (True, True, True, True), +] + + +@pytest.mark.parametrize("v,u,a,expect_persist", _GATE_TRUTH_TABLE) +def test_save_gate_truth_table( + v: bool, + u: bool, + a: bool, + expect_persist: bool, + _isolated_cache_root: pathlib.Path, +) -> None: + """IMP-46 u5 — exhaustive 2^3 enumeration of (V, U, A) -> {persist, raise}.""" + if expect_persist: + path = save_proposal( + _KEY, + _proposal(payload={"v": int(v), "u": int(u), "a": int(a)}), + visual_check_passed=v, + user_approved=u, + auto_cache=a, + ) + assert path.is_file(), f"truth row (V={v}, U={u}, A={a}) must persist" + else: + with pytest.raises(AiFallbackCacheGateError): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=v, + user_approved=u, + auto_cache=a, + ) + # Gate violations must never touch the filesystem (parent dir absent). + assert not (_isolated_cache_root / _FRAME_ID).exists(), ( + f"truth row (V={v}, U={u}, A={a}) leaked a directory" + ) + + +def test_auto_cache_default_off_preserves_dual_gate_semantics( + _isolated_cache_root: pathlib.Path, +) -> None: + """Calling save_proposal without ``auto_cache`` keeps the IMP-46 u2 behaviour.""" + with pytest.raises(AiFallbackCacheGateError) as exc: + save_proposal( + _KEY, _proposal(), visual_check_passed=True, user_approved=False + ) + assert "user_approved" in str(exc.value) + assert not (_isolated_cache_root / _FRAME_ID).exists() + + +def test_auto_cache_cannot_bypass_visual_check() -> None: + """``visual_check_passed=False`` raises even with ``auto_cache=True``.""" + with pytest.raises(AiFallbackCacheGateError) as exc: + save_proposal( + _KEY, + _proposal(), + visual_check_passed=False, + user_approved=True, + auto_cache=True, + ) + assert "visual_check_passed" in str(exc.value) + + +def test_auto_cache_bypass_user_approved_persists( + _isolated_cache_root: pathlib.Path, +) -> None: + """``auto_cache=True`` with ``user_approved=False`` persists the proposal.""" + path = save_proposal( + _KEY, + _proposal(payload={"bypass": "user"}), + visual_check_passed=True, + user_approved=False, + auto_cache=True, + ) + assert path.is_file() + loaded = read_proposal(_KEY) + assert loaded is not None + assert loaded.payload == {"bypass": "user"} + + +def test_auto_cache_rejects_non_bool() -> None: + """``auto_cache`` must be a bool (loud TypeError, symmetric with other kwargs).""" + with pytest.raises(TypeError): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + auto_cache="yes", # type: ignore[arg-type] + ) + + +def test_auto_cache_is_keyword_only() -> None: + """``auto_cache`` must be passed by keyword (positional rejected).""" + import inspect + + sig = inspect.signature(save_proposal) + param = sig.parameters["auto_cache"] + assert param.kind is inspect.Parameter.KEYWORD_ONLY + assert param.default is False diff --git a/tests/phase_z2_ai_fallback/test_cache_invalidation.py b/tests/phase_z2_ai_fallback/test_cache_invalidation.py new file mode 100644 index 0000000..5a8afab --- /dev/null +++ b/tests/phase_z2_ai_fallback/test_cache_invalidation.py @@ -0,0 +1,347 @@ +"""IMP-46 u3 — Fingerprint-based cache invalidation tests. + +Scope (Stage 2 plan, u3): + +* ``save_proposal`` persists ``fingerprints`` verbatim (u2 already covers + the round-trip; this suite re-asserts the read-side comparator). +* ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When + supplied, the stored dict must equal the supplied dict EXACTLY (strict + equality). Mismatch — including missing keys, extra keys, or value + drift — returns ``None``. +* Default ``fingerprints=None`` performs no comparison (back-compat for + legacy callers). +* Fingerprint *computation* stays outside ``cache.py`` — these tests + treat the three declared shas (``contract_sha`` / ``partial_sha`` / + ``catalog_sha``) as opaque hex strings, never recomputing them. The + cache layer is a content-addressed *comparator*, not a content + *hasher*. + +All filesystem writes are scoped to ``tmp_path`` via +``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`. +""" +from __future__ import annotations + +import json +import pathlib + +import pytest + +from src.phase_z2_ai_fallback import cache as cache_mod +from src.phase_z2_ai_fallback.cache import ( + KEY_DELIMITER, + read_proposal, + save_proposal, +) +from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind + + +_FRAME_ID = "1171281190" +_SIG_HASH = "f" * 64 +_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}" + +_FINGERPRINTS_BASELINE: dict[str, str] = { + "contract_sha": "c" * 64, + "partial_sha": "p" * 64, + "catalog_sha": "x" * 64, +} + + +def _proposal(payload: dict | None = None) -> AiFallbackProposal: + return AiFallbackProposal( + proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH, + payload=payload if payload is not None else {"item_parser": "bullet_v2"}, + rationale="u3-test", + ) + + +@pytest.fixture(autouse=True) +def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache") + yield tmp_path / "frame_cache" + + +# -- save side: fingerprints persisted verbatim --------------------------- + + +def test_save_persists_fingerprints_verbatim( + _isolated_cache_root: pathlib.Path, +): + path = save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + stored = json.loads(path.read_text(encoding="utf-8"))["fingerprints"] + assert stored == _FINGERPRINTS_BASELINE + + +# -- read side: back-compat (no fingerprints kwarg) ----------------------- + + +def test_read_without_fingerprints_kwarg_returns_proposal( + _isolated_cache_root: pathlib.Path, +): + """Legacy read path (no kwarg) skips invalidation — round-trip succeeds.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + loaded = read_proposal(_KEY) + assert loaded is not None + assert loaded.payload == {"item_parser": "bullet_v2"} + + +def test_read_without_fingerprints_kwarg_ignores_stored_mismatch( + _isolated_cache_root: pathlib.Path, +): + """A caller that has not adopted fingerprint-aware lookup must still + see the proposal — invalidation only kicks in when explicitly asked.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints={"contract_sha": "old"}, + ) + loaded = read_proposal(_KEY) + assert loaded is not None + + +# -- read side: matching fingerprints ------------------------------------- + + +def test_read_with_matching_fingerprints_returns_proposal( + _isolated_cache_root: pathlib.Path, +): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + loaded = read_proposal(_KEY, fingerprints=dict(_FINGERPRINTS_BASELINE)) + assert loaded is not None + assert loaded.proposal_kind is ProposalKind.BUILDER_OPTIONS_PATCH + + +def test_read_with_empty_fingerprints_matches_empty_stored( + _isolated_cache_root: pathlib.Path, +): + """Both sides empty is an exact match, not a special-case None.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + # default fingerprints=None → stored as {} + ) + loaded = read_proposal(_KEY, fingerprints={}) + assert loaded is not None + + +# -- read side: invalidation on mismatch ---------------------------------- + + +@pytest.mark.parametrize( + "drifted_axis", + ["contract_sha", "partial_sha", "catalog_sha"], +) +def test_read_invalidates_on_single_axis_drift( + drifted_axis: str, _isolated_cache_root: pathlib.Path +): + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + supplied = dict(_FINGERPRINTS_BASELINE) + supplied[drifted_axis] = "deadbeef" * 8 # 64-char distinct value + assert read_proposal(_KEY, fingerprints=supplied) is None + + +def test_read_invalidates_when_caller_supplies_extra_key( + _isolated_cache_root: pathlib.Path, +): + """Strict equality — extra key on caller side is a mismatch.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + supplied = dict(_FINGERPRINTS_BASELINE) + supplied["future_axis_sha"] = "z" * 64 + assert read_proposal(_KEY, fingerprints=supplied) is None + + +def test_read_invalidates_when_caller_supplies_subset( + _isolated_cache_root: pathlib.Path, +): + """Strict equality — subset on caller side is a mismatch.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + fingerprints=_FINGERPRINTS_BASELINE, + ) + subset = {"contract_sha": _FINGERPRINTS_BASELINE["contract_sha"]} + assert read_proposal(_KEY, fingerprints=subset) is None + + +def test_read_invalidates_when_entry_saved_without_fingerprints( + _isolated_cache_root: pathlib.Path, +): + """A pre-invalidation cache entry (empty stored fingerprints) MUST NOT + satisfy a fingerprint-aware lookup — caller demands proof of freshness.""" + save_proposal( + _KEY, + _proposal(), + visual_check_passed=True, + user_approved=True, + # default fingerprints=None → stored as {} + ) + assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None + + +def test_read_invalidates_when_stored_fingerprints_not_dict( + _isolated_cache_root: pathlib.Path, +): + """Hand-corrupted payload (fingerprints serialized as non-dict) → None.""" + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps( + { + "schema_version": 1, + "proposal": _proposal().model_dump(mode="json"), + "slide_css": None, + "fingerprints": ["contract_sha", "c" * 64], + } + ), + encoding="utf-8", + ) + assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None + + +def test_read_invalidates_when_stored_fingerprints_field_missing( + _isolated_cache_root: pathlib.Path, +): + """Legacy payload (no ``fingerprints`` field at all) → None when caller + demands fingerprint comparison.""" + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text( + json.dumps( + { + "schema_version": 1, + "proposal": _proposal().model_dump(mode="json"), + "slide_css": None, + # fingerprints field deliberately omitted + } + ), + encoding="utf-8", + ) + assert read_proposal(_KEY, fingerprints={"contract_sha": "c" * 64}) is None + + +def test_read_with_matching_fingerprints_still_loses_to_missing_file(): + """File missing takes precedence over fingerprint check — no false hit.""" + assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None + + +def test_read_with_matching_fingerprints_still_loses_to_corrupt_json( + _isolated_cache_root: pathlib.Path, +): + path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json" + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text("{not valid json", encoding="utf-8") + assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None + + +# -- read side: input validation symmetry with save ----------------------- + + +def test_read_rejects_non_dict_fingerprints(): + with pytest.raises(TypeError): + read_proposal(_KEY, fingerprints=["contract_sha", "c" * 64]) # type: ignore[arg-type] + + +def test_read_rejects_non_dict_fingerprints_string(): + with pytest.raises(TypeError): + read_proposal(_KEY, fingerprints="contract_sha=c" * 8) # type: ignore[arg-type] + + +def test_read_rejects_non_dict_fingerprints_int(): + with pytest.raises(TypeError): + read_proposal(_KEY, fingerprints=42) # type: ignore[arg-type] + + +# -- isolation: cache.py never computes fingerprints ---------------------- + + +def test_cache_module_has_no_fingerprint_computer(): + """Guardrail: cache.py is a *comparator*, not a *hasher*. The three + declared shas are computed outside this module (step 12 / pipeline + glue). Adding a fingerprint computer here would leak Phase Z runtime + knowledge into the cache layer and violate AI isolation.""" + public_surface = [ + name + for name in dir(cache_mod) + if not name.startswith("_") and callable(getattr(cache_mod, name)) + ] + forbidden_substrings = ("hash", "sha", "fingerprint") + leaks = [ + name + for name in public_surface + if any(sub in name.lower() for sub in forbidden_substrings) + ] + assert leaks == [], ( + f"cache.py public surface leaks fingerprint computation: {leaks}; " + "computation must live outside cache.py per IMP-46 u3 contract." + ) + + +# -- isolation across distinct fingerprint sets --------------------------- + + +def test_distinct_fingerprint_sets_isolated_per_signature( + _isolated_cache_root: pathlib.Path, +): + """Two entries under different signature hashes keep their own + fingerprints; reading one with the other's fingerprints misses.""" + key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}" + key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}" + fps_a = {"contract_sha": "a" * 64} + fps_b = {"contract_sha": "b" * 64} + save_proposal( + key_a, + _proposal(payload={"sig": "a"}), + visual_check_passed=True, + user_approved=True, + fingerprints=fps_a, + ) + save_proposal( + key_b, + _proposal(payload={"sig": "b"}), + visual_check_passed=True, + user_approved=True, + fingerprints=fps_b, + ) + # Crossed lookups miss. + assert read_proposal(key_a, fingerprints=fps_b) is None + assert read_proposal(key_b, fingerprints=fps_a) is None + # Aligned lookups hit. + a_hit = read_proposal(key_a, fingerprints=fps_a) + b_hit = read_proposal(key_b, fingerprints=fps_b) + assert a_hit is not None and a_hit.payload == {"sig": "a"} + assert b_hit is not None and b_hit.payload == {"sig": "b"} diff --git a/tests/phase_z2_ai_fallback/test_cache_repo_layout.py b/tests/phase_z2_ai_fallback/test_cache_repo_layout.py new file mode 100644 index 0000000..0ac101c --- /dev/null +++ b/tests/phase_z2_ai_fallback/test_cache_repo_layout.py @@ -0,0 +1,93 @@ +"""IMP-46 u6 — repository layout coverage for the persistent frame cache. + +This module is a *layout* contract test, not a runtime test. It asserts the +files committed to source control that make ``data/frame_cache/`` exist on a +fresh checkout while keeping cached JSON payloads ignored by git: + +* ``data/frame_cache/.gitkeep`` is tracked (so the cache root exists for a + fresh clone before any AI fallback run materialises payloads). +* ``.gitignore`` ignores ``data/*`` broadly, re-includes the + ``data/frame_cache/`` directory, ignores its contents, and re-includes + ``data/frame_cache/.gitkeep`` so cache payloads under + ``data/frame_cache/{frame_id}/{signature_hash}.json`` remain ignored. + +If somebody removes the ``.gitkeep`` marker, drops the negation lines from +``.gitignore``, or commits a real cache payload, this test fails. The cache +module surface (cache.py) is exercised by ``test_cache.py`` / +``test_cache_invalidation.py`` and is intentionally *not* re-asserted here — +this file is the layout-only lock that Stage 2 u6 declared. +""" +from __future__ import annotations + +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parents[2] +GITIGNORE_PATH = REPO_ROOT / ".gitignore" +CACHE_ROOT = REPO_ROOT / "data" / "frame_cache" +GITKEEP_PATH = CACHE_ROOT / ".gitkeep" + + +def _gitignore_lines() -> list[str]: + assert GITIGNORE_PATH.is_file(), f".gitignore missing at {GITIGNORE_PATH}" + text = GITIGNORE_PATH.read_text(encoding="utf-8") + return [line.strip() for line in text.splitlines()] + + +def test_frame_cache_root_directory_exists() -> None: + """``data/frame_cache/`` must exist on disk as the cache root.""" + assert CACHE_ROOT.is_dir(), ( + f"frame cache root missing: {CACHE_ROOT}. The directory must exist " + "for save_proposal to write JSON payloads without first conjuring a " + "parent on demand from outside the cache module." + ) + + +def test_gitkeep_marker_is_tracked_file() -> None: + """``data/frame_cache/.gitkeep`` is the marker that keeps the dir tracked.""" + assert GITKEEP_PATH.is_file(), ( + f".gitkeep marker missing: {GITKEEP_PATH}. Without it the cache root " + "would disappear on a fresh clone (everything under data/ is " + "ignored by default)." + ) + + +@pytest.mark.parametrize( + "rule", + [ + # Broad ignore for everything under data/ (cache payloads, runs/, etc.). + "data/*", + # Re-include the frame_cache directory itself so child negations work. + "!data/frame_cache/", + # Ignore everything inside frame_cache/ (cached JSON payloads). + "data/frame_cache/*", + # Re-include the .gitkeep marker only. + "!data/frame_cache/.gitkeep", + ], +) +def test_gitignore_contains_frame_cache_exception(rule: str) -> None: + """The four ignore rules together pin the 'track marker only' contract.""" + lines = _gitignore_lines() + assert rule in lines, ( + f".gitignore missing IMP-46 u6 rule: {rule!r}. The four-line block " + "(data/*, !data/frame_cache/, data/frame_cache/*, " + "!data/frame_cache/.gitkeep) together ensure the cache root is " + "tracked while cached payloads remain ignored." + ) + + +def test_gitignore_rule_order_keeps_payloads_ignored() -> None: + """Rule order matters: the ``data/frame_cache/*`` re-ignore must come + AFTER the ``!data/frame_cache/`` directory re-include, otherwise the + re-include would shadow it and cached JSON payloads would be tracked.""" + lines = _gitignore_lines() + reinclude_dir = lines.index("!data/frame_cache/") + reignore_contents = lines.index("data/frame_cache/*") + reinclude_marker = lines.index("!data/frame_cache/.gitkeep") + assert reinclude_dir < reignore_contents < reinclude_marker, ( + "gitignore IMP-46 u6 block out of order: expected " + "'!data/frame_cache/' < 'data/frame_cache/*' < " + "'!data/frame_cache/.gitkeep' so cached payloads stay ignored while " + "only the marker is tracked." + ) diff --git a/tests/phase_z2_ai_fallback/test_signature.py b/tests/phase_z2_ai_fallback/test_signature.py new file mode 100644 index 0000000..01b2b7c --- /dev/null +++ b/tests/phase_z2_ai_fallback/test_signature.py @@ -0,0 +1,184 @@ +"""IMP-46 u1 — Frame cache signature builder tests. + +Verifies: + * Determinism — identical inputs yield the same SHA256 digest. + * Axis-change sensitivity — every one of the 8 declared axes mutates the + digest when changed in isolation. + * Public surface — only the 8 declared axes are accepted (no + sample/section identifier leakage). + * char_count bucket boundaries (0-50, 51-150, 151-400, 401-1000, 1001+). + * source_shape enum equivalence (string and SourceShape inputs match). + * schema_version is part of the hashed payload (digest stable for fixture). +""" +from __future__ import annotations + +import inspect + +import pytest + +from src.phase_z2_ai_fallback.signature import ( + CHAR_COUNT_BUCKET_LABELS, + SCHEMA_VERSION, + SourceShape, + bucket_char_count, + build_signature, +) + + +def _base_kwargs() -> dict: + return dict( + frame_id="frame_03", + v4_label="light_edit", + cardinality=3, + source_shape=SourceShape.BULLET, + h3_count=2, + char_count_bucket="51-150", + layout_preset="sidebar-right", + zone_position="top", + ) + + +def test_schema_version_is_one() -> None: + assert SCHEMA_VERSION == 1 + + +def test_bucket_labels_match_spec() -> None: + assert CHAR_COUNT_BUCKET_LABELS == ( + "0-50", + "51-150", + "151-400", + "401-1000", + "1001+", + ) + + +def test_signature_is_deterministic() -> None: + a = build_signature(**_base_kwargs()) + b = build_signature(**_base_kwargs()) + assert a == b + assert len(a) == 64 + + +@pytest.mark.parametrize( + "axis, new_value", + [ + ("frame_id", "frame_04"), + ("v4_label", "restructure"), + ("cardinality", 5), + ("source_shape", SourceShape.PARAGRAPH), + ("h3_count", 3), + ("char_count_bucket", "151-400"), + ("layout_preset", "two-column"), + ("zone_position", "bottom_l"), + ], +) +def test_signature_changes_for_each_axis(axis: str, new_value: object) -> None: + base = build_signature(**_base_kwargs()) + kwargs = _base_kwargs() + kwargs[axis] = new_value + assert build_signature(**kwargs) != base + + +def test_signature_accepts_string_source_shape() -> None: + enum_sig = build_signature(**_base_kwargs()) + kwargs = _base_kwargs() + kwargs["source_shape"] = "bullet" + assert build_signature(**kwargs) == enum_sig + + +def test_signature_rejects_unknown_source_shape() -> None: + kwargs = _base_kwargs() + kwargs["source_shape"] = "nonsense" + with pytest.raises(ValueError): + build_signature(**kwargs) + + +def test_signature_rejects_unknown_char_count_bucket() -> None: + kwargs = _base_kwargs() + kwargs["char_count_bucket"] = "999-1234" + with pytest.raises(ValueError): + build_signature(**kwargs) + + +def test_signature_handles_none_cardinality() -> None: + kwargs = _base_kwargs() + kwargs["cardinality"] = None + sig = build_signature(**kwargs) + assert len(sig) == 64 + kwargs2 = _base_kwargs() + kwargs2["cardinality"] = 0 + assert build_signature(**kwargs2) != sig + + +def test_signature_surface_only_8_declared_axes() -> None: + params = set(inspect.signature(build_signature).parameters) + expected = { + "frame_id", + "v4_label", + "cardinality", + "source_shape", + "h3_count", + "char_count_bucket", + "layout_preset", + "zone_position", + } + assert params == expected + + +def test_bucket_boundaries() -> None: + assert bucket_char_count(0) == "0-50" + assert bucket_char_count(50) == "0-50" + assert bucket_char_count(51) == "51-150" + assert bucket_char_count(150) == "51-150" + assert bucket_char_count(151) == "151-400" + assert bucket_char_count(400) == "151-400" + assert bucket_char_count(401) == "401-1000" + assert bucket_char_count(1000) == "401-1000" + assert bucket_char_count(1001) == "1001+" + assert bucket_char_count(10_000) == "1001+" + + +def test_bucket_rejects_negative() -> None: + with pytest.raises(ValueError): + bucket_char_count(-1) + + +def test_bucket_rejects_non_int() -> None: + with pytest.raises(TypeError): + bucket_char_count(3.14) # type: ignore[arg-type] + with pytest.raises(TypeError): + bucket_char_count(True) # type: ignore[arg-type] + + +def test_signature_stable_known_fixture() -> None: + """Lock the digest for a known fixture so a silent payload-shape change + (e.g. a new axis sneaks in, or schema_version drifts) breaks this test. + """ + sig = build_signature( + frame_id="frame_03", + v4_label="light_edit", + cardinality=3, + source_shape=SourceShape.BULLET, + h3_count=2, + char_count_bucket="51-150", + layout_preset="sidebar-right", + zone_position="top", + ) + import hashlib + import json + + expected_payload = { + "schema_version": 1, + "frame_id": "frame_03", + "v4_label": "light_edit", + "cardinality": 3, + "source_shape": "bullet", + "h3_count": 2, + "char_count_bucket": "51-150", + "layout_preset": "sidebar-right", + "zone_position": "top", + } + expected = hashlib.sha256( + json.dumps(expected_payload, sort_keys=True, ensure_ascii=False).encode("utf-8") + ).hexdigest() + assert sig == expected diff --git a/tests/phase_z2_ai_fallback/test_step12.py b/tests/phase_z2_ai_fallback/test_step12.py index f66eadc..d8ad4b3 100644 --- a/tests/phase_z2_ai_fallback/test_step12.py +++ b/tests/phase_z2_ai_fallback/test_step12.py @@ -1,12 +1,18 @@ -"""IMP-33 u8 — Step 12 AI repair wiring tests. +"""IMP-33 u8 + IMP-46 u4 + IMP-47B u2 — Step 12 AI repair wiring tests. -Covers the two structural gates layered on top of the u7 router: +Covers the structural gates layered on top of the u7 router: * IMP-30 provisional gate (only provisional units may invoke AI repair) - * Reject gate (route_hint=design_reference_only NEVER calls AI) -Plus the record-shape contract returned for downstream Step 12 artifacts. + * Catch-all ``route_not_ai_adaptation:`` skip — every route_hint + other than ``ai_adaptation_required`` (including the legacy + ``design_reference_only`` hint) falls through to a single uniform skip + after the IMP-47B u2 removal of the bespoke reject gate. +Plus the record-shape contract returned for downstream Step 12 artifacts +and the IMP-46 u4 structural cache key + fingerprints contract. """ from __future__ import annotations +import hashlib +import json from dataclasses import dataclass, field from typing import Any from unittest.mock import MagicMock @@ -24,6 +30,12 @@ class FakeUnit: source_section_ids: list[str] = field(default_factory=lambda: ["s1"]) raw_content: str = "raw" v4_rank: int | None = 1 + cardinality: int | None = None + layout_preset: str = "" + zone_position: str = "" + source_shape: str = "paragraph" + h3_count: int = 0 + char_count: int = 0 _ROUTE_HINTS: dict[str | None, str | None] = { @@ -64,6 +76,25 @@ def _call( return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs) +def _ai_unit(**overrides: Any) -> FakeUnit: + """Construct an AI-eligible FakeUnit (provisional + restructure) with sane defaults.""" + base: dict[str, Any] = dict( + label="restructure", + provisional=True, + frame_template_id="tmpl_x", + frame_id="fid_123", + source_section_ids=["02-1"], + layout_preset="single_column", + zone_position="zone_a", + source_shape="bullet", + h3_count=3, + char_count=200, + cardinality=5, + ) + base.update(overrides) + return FakeUnit(**base) + + def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch): router = MagicMock() monkeypatch.setattr(step12_mod, "route_ai_fallback", router) @@ -75,13 +106,20 @@ def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch): router.assert_not_called() -def test_reject_route_is_skipped_without_ai_call(monkeypatch): +def test_design_reference_route_falls_through_to_route_not_ai_adaptation(monkeypatch): + """IMP-47B u2 — the bespoke 'design_reference_only_no_ai' skip is gone. + + Any non-AI-adaptation route_hint (including the legacy + ``design_reference_only`` hint exercised here via the local test mapping + of ``reject``) now flows into the single ``route_not_ai_adaptation:`` + catch-all. Production reject routing is exercised by u9. + """ router = MagicMock() monkeypatch.setattr(step12_mod, "route_ai_fallback", router) units = [FakeUnit(label="reject", provisional=True)] records = _call(units) assert records[0]["ai_called"] is False - assert records[0]["skip_reason"] == "design_reference_only_no_ai" + assert records[0]["skip_reason"] == "route_not_ai_adaptation:design_reference_only" assert records[0]["route_hint"] == "design_reference_only" router.assert_not_called() @@ -153,29 +191,206 @@ def test_mixed_units_each_independently_classified(monkeypatch): records = _call(units) assert [r["skip_reason"] for r in records] == [ "not_provisional", - "design_reference_only_no_ai", + "route_not_ai_adaptation:design_reference_only", "router_short_circuit", "not_provisional", ] assert router.call_count == 1 -def test_cache_key_includes_template_and_section_ids(monkeypatch): +# --------------------------------------------------------------------------- +# IMP-46 u4 — structural cache key + fingerprints +# --------------------------------------------------------------------------- + + +def test_cache_key_format_is_frame_id_plus_sha256(monkeypatch): + """cache_key is '{frame_id}::{64-hex-sha256}', NOT template_id + section_ids.""" router = MagicMock(return_value=None) monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit()]) + cache_key = router.call_args.kwargs["cache_key"] + assert "::" in cache_key + frame_part, _, signature_part = cache_key.partition("::") + assert frame_part == "fid_123" + assert len(signature_part) == 64 + assert all(c in "0123456789abcdef" for c in signature_part) + # The legacy "template_id::sorted(section_ids)" form is gone. + assert "tmpl_x" not in cache_key + assert "02-1" not in cache_key + + +def test_cache_key_invariant_to_section_id_changes(monkeypatch): + """Same structural axes → same cache_key regardless of source_section_ids.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit(source_section_ids=["02-1"])]) + key_a = router.call_args.kwargs["cache_key"] + router.reset_mock() + _call([_ai_unit(source_section_ids=["05-2", "07-3"])]) + key_b = router.call_args.kwargs["cache_key"] + assert key_a == key_b + + +def test_cache_key_invariant_to_template_id_changes(monkeypatch): + """frame_template_id is NOT part of the structural signature (frame_id is).""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit(frame_template_id="tmpl_x")]) + key_a = router.call_args.kwargs["cache_key"] + router.reset_mock() + _call([_ai_unit(frame_template_id="tmpl_OTHER")]) + key_b = router.call_args.kwargs["cache_key"] + assert key_a == key_b + + +def test_cache_key_changes_when_any_signature_axis_changes(monkeypatch): + """Flipping any of the 7 unit-derived signature axes mutates cache_key.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit()]) + base_key = router.call_args.kwargs["cache_key"] + perturbations: dict[str, Any] = { + "frame_id": "fid_OTHER", + "label": "use_as_is", # v4_label axis change; still routed to AI via _ROUTE_HINTS? No. + # ↑ "use_as_is" → "direct_render" → would skip. Use another ai-adaptation-mapped label. + # Replace with frame_id-only diff to keep route stable. Drop this entry below. + } + # Rebuild perturbations restricted to axes that don't change routing. + perturbations = { + "frame_id": "fid_OTHER", + "layout_preset": "two_column", + "zone_position": "zone_b", + "source_shape": "paragraph", + "h3_count": 7, + "char_count": 500, # bucket boundary crossing (151-400 → 401-1000) + "cardinality": 4, + } + for axis, value in perturbations.items(): + router.reset_mock() + _call([_ai_unit(**{axis: value})]) + new_key = router.call_args.kwargs["cache_key"] + assert new_key != base_key, f"signature axis {axis!r} did not mutate cache_key" + + +def test_char_count_bucket_collapses_within_bucket(monkeypatch): + """Different char_counts in the SAME bucket → identical cache_key.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit(char_count=160)]) + key_low = router.call_args.kwargs["cache_key"] + router.reset_mock() + _call([_ai_unit(char_count=399)]) + key_high = router.call_args.kwargs["cache_key"] + assert key_low == key_high # both fall in "151-400" + router.reset_mock() + _call([_ai_unit(char_count=401)]) + key_overflow = router.call_args.kwargs["cache_key"] + assert key_overflow != key_low # crossed into "401-1000" + + +def test_fingerprints_attached_to_ai_record(monkeypatch): + """AI-called records expose contract_sha + partial_sha + catalog_sha.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + contract = {"frame_id": "fid", "payload": {"x": 1}, "sub_zones": []} + partial = {"some": "partial", "deeper": [1, 2, 3]} + catalog_value = "deadbeef" * 8 + recs = _call( + [_ai_unit()], + get_contract_fn=lambda _t: contract, + figma_partial_loader=lambda _t: partial, + catalog_sha_loader=lambda: catalog_value, + ) + fps = recs[0]["fingerprints"] + assert isinstance(fps, dict) + assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"} + assert all(isinstance(v, str) for v in fps.values()) + assert fps["catalog_sha"] == catalog_value + # contract_sha and partial_sha must be deterministic SHA256 over JSON-sorted payloads. + expected_contract = hashlib.sha256( + json.dumps(contract, sort_keys=True, ensure_ascii=False).encode("utf-8") + ).hexdigest() + expected_partial = hashlib.sha256( + json.dumps(partial, sort_keys=True, ensure_ascii=False).encode("utf-8") + ).hexdigest() + assert fps["contract_sha"] == expected_contract + assert fps["partial_sha"] == expected_partial + + +def test_fingerprints_default_catalog_sha_is_empty_string(monkeypatch): + """No catalog_sha_loader → catalog_sha defaults to '' (sentinel, not missing key).""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + fps = recs[0]["fingerprints"] + assert fps["catalog_sha"] == "" + # contract_sha + partial_sha keys still present (always 3 keys). + assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"} + + +def test_fingerprints_change_when_contract_changes(monkeypatch): + """Different frame_contract → different contract_sha, partial_sha unchanged.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + fps_a = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 1})[0]["fingerprints"] + fps_b = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 2})[0]["fingerprints"] + assert fps_a["contract_sha"] != fps_b["contract_sha"] + assert fps_a["partial_sha"] == fps_b["partial_sha"] + + +def test_fingerprints_change_when_partial_changes(monkeypatch): + """Different figma_partial_json → different partial_sha, contract_sha unchanged.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + fps_a = _call( + [_ai_unit()], figma_partial_loader=lambda _t: {"p": 1} + )[0]["fingerprints"] + fps_b = _call( + [_ai_unit()], figma_partial_loader=lambda _t: {"p": 2} + )[0]["fingerprints"] + assert fps_a["partial_sha"] != fps_b["partial_sha"] + assert fps_a["contract_sha"] == fps_b["contract_sha"] + + +def test_v4_result_cardinality_uses_unit_value(monkeypatch): + """v4_result['cardinality'] mirrors the unit's cardinality (no longer hardcoded None).""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit(cardinality=7)]) + assert router.call_args.kwargs["v4_result"]["cardinality"] == 7 + router.reset_mock() + _call([_ai_unit(cardinality=None)]) + assert router.call_args.kwargs["v4_result"]["cardinality"] is None + + +def test_skipped_records_have_no_cache_key_or_fingerprints(monkeypatch): + """Non-AI-eligible records keep cache_key and fingerprints as None.""" + monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None)) units = [ - FakeUnit( - label="restructure", - provisional=True, - frame_template_id="tmpl_abc", - source_section_ids=["02-1", "02-2"], - ) + FakeUnit(label="restructure", provisional=False), + FakeUnit(label="reject", provisional=True), + FakeUnit(label="light_edit", provisional=True), ] - _call(units) - assert router.call_args.kwargs["cache_key"] == "tmpl_abc::02-1,02-2" + recs = _call(units) + for rec in recs: + assert rec["cache_key"] is None + assert rec["fingerprints"] is None -def test_record_shape_contract_is_stable(monkeypatch): +def test_catalog_sha_loader_called_once_per_gather(monkeypatch): + """catalog_sha is computed once per gather call, not per unit.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + loader = MagicMock(return_value="cafefeed" * 8) + _call( + [_ai_unit(), _ai_unit(frame_id="fid_other"), _ai_unit(frame_id="fid_third")], + catalog_sha_loader=loader, + ) + loader.assert_called_once() + + +def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch): + """Record schema includes the IMP-46 u4 cache_key + fingerprints fields.""" monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None)) units = [FakeUnit(label="reject", provisional=True)] rec = _call(units)[0] @@ -190,4 +405,98 @@ def test_record_shape_contract_is_stable(monkeypatch): "skip_reason", "proposal", "error", + "cache_key", + "fingerprints", } + + +def test_cache_key_is_compatible_with_cache_parse_key(monkeypatch): + """cache_key produced here must round-trip through cache.py's _parse_key.""" + from src.phase_z2_ai_fallback.cache import KEY_DELIMITER, _parse_key + + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + _call([_ai_unit()]) + cache_key = router.call_args.kwargs["cache_key"] + parsed = _parse_key(cache_key) + assert parsed is not None + frame_id, signature_hash = parsed + assert frame_id == "fid_123" + assert len(signature_hash) == 64 + assert KEY_DELIMITER not in signature_hash + + +# --------------------------------------------------------------------------- +# IMP-47B u9 — Step 12 reject eligibility + normal-path AI=0 regression +# --------------------------------------------------------------------------- +# Locks the end-to-end Step 12 contract against the production route helper +# `_imp05_route_hint`. The local `_ROUTE_HINTS` mapping above intentionally +# preserves the legacy ``reject -> design_reference_only`` form to exercise +# the catch-all fall-through branch; u9 instead drives gather with the real +# production map (post-u1 flip) so reject provisional units reach the router +# and normal-path labels stay AI=0. + + +def test_production_reject_route_reaches_router_when_provisional(monkeypatch): + """Post-u1, provisional reject units must reach ``route_ai_fallback``.""" + from src.phase_z2_pipeline import _imp05_route_hint + + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + records = step12_mod.gather_step12_ai_repair_proposals( + [FakeUnit(label="reject", provisional=True)], + route_for_label=_imp05_route_hint, + get_contract_fn=_get_contract, + frame_visual_loader=_frame_visual, + ) + assert records[0]["route_hint"] == "ai_adaptation_required" + assert records[0]["skip_reason"] == "router_short_circuit" + assert records[0]["ai_called"] is False + router.assert_called_once() + + +def test_production_normal_route_labels_never_reach_router(monkeypatch): + """Normal-path labels stay AI=0 even when the unit is provisional.""" + from src.phase_z2_pipeline import _imp05_route_hint + + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + units = [ + FakeUnit(label="use_as_is", provisional=True), + FakeUnit(label="light_edit", provisional=True), + FakeUnit(label=None, provisional=True), + ] + records = step12_mod.gather_step12_ai_repair_proposals( + units, + route_for_label=_imp05_route_hint, + get_contract_fn=_get_contract, + frame_visual_loader=_frame_visual, + ) + assert records[0]["skip_reason"] == "route_not_ai_adaptation:direct_render" + assert records[1]["skip_reason"] == ( + "route_not_ai_adaptation:deterministic_minor_adjustment" + ) + assert records[2]["skip_reason"] == "route_not_ai_adaptation:None" + router.assert_not_called() + + +def test_production_non_provisional_reject_skipped_before_route_gate(monkeypatch): + """The provisional gate fires before the route gate (production routing). + + Even with reject routed to ``ai_adaptation_required`` (post-u1), a + non-provisional reject unit must short-circuit at ``not_provisional`` + without ever consulting ``route_for_label`` for an AI dispatch. + """ + from src.phase_z2_pipeline import _imp05_route_hint + + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + records = step12_mod.gather_step12_ai_repair_proposals( + [FakeUnit(label="reject", provisional=False)], + route_for_label=_imp05_route_hint, + get_contract_fn=_get_contract, + frame_visual_loader=_frame_visual, + ) + assert records[0]["skip_reason"] == "not_provisional" + assert records[0]["ai_called"] is False + router.assert_not_called() diff --git a/tests/test_imp47b_cache_save_gate.py b/tests/test_imp47b_cache_save_gate.py new file mode 100644 index 0000000..d57ec3c --- /dev/null +++ b/tests/test_imp47b_cache_save_gate.py @@ -0,0 +1,213 @@ +"""IMP-47B u13 — Persist validated proposals through ``save_proposal`` after gates. + +Scope (this slice): + Verify the new ``_persist_ai_repair_proposals_to_cache`` helper in + ``src/phase_z2_pipeline.py`` honours the IMP-46 dual-gate truth table + on the post-Step-14 cache-save seam. The helper is exercised in + isolation (no Selenium, no full pipeline) with synthetic AI repair + records that mirror the gather → apply → coverage chain shape + produced by IMP-47B u4 / u5 / u7. + +Guardrails proven by this test (IMP-46 + IMP-47B policy bullets): + * ``visual_check_passed=False`` always blocks — never bypassable, even + when ``auto_cache=True`` (IMP-46 u5 truth table cell). + * ``user_approved=False`` AND ``auto_cache=False`` → gate blocked + (default pipeline path has no UX approval gate; ``--auto-cache`` is + the documented bypass). + * ``visual_check_passed=True`` AND ``auto_cache=True`` → proposal + persisted on disk under ``data/frame_cache/{frame_id}/{hash}.json`` + via ``cache.save_proposal``. + * Non-applied records (no_proposal / no_zone_match / unsupported / + error) → ``cache_save_status='not_applied'`` and NEVER reach + ``save_proposal`` (no filesystem touch). + * Settings axis — ``settings.ai_fallback_auto_cache`` sourced through + the helper kwargs, never inlined (hardcoding ban). +""" +from __future__ import annotations + +import pathlib + +import pytest + +from src.phase_z2_ai_fallback import cache as cache_mod +from src.phase_z2_ai_fallback.cache import AiFallbackCacheGateError +from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind +from src.phase_z2_pipeline import _persist_ai_repair_proposals_to_cache + + +def _applied_record( + *, + cache_key: str = "MOCK_FRAME::deadbeef" + "0" * 56, + fingerprints: dict | None = None, + slots: dict | None = None, +) -> dict: + """Build an IMP-47B u4/u5 shaped record marked ``applied:partial_overrides``.""" + if fingerprints is None: + fingerprints = {"contract_sha": "c1", "partial_sha": "p1", "catalog_sha": "k1"} + if slots is None: + slots = {"title": "AI repaired", "bullets": ["b1", "b2"]} + proposal = AiFallbackProposal( + proposal_kind=ProposalKind.PARTIAL_OVERRIDES, + payload={"slots": slots}, + rationale="cache save gate test", + ) + return { + "unit_index": 0, + "source_section_ids": ["MOCK_S1"], + "frame_template_id": "MOCK_FRAME", + "label": "reject", + "route_hint": "ai_adaptation_required", + "provisional": True, + "ai_called": True, + "skip_reason": None, + "proposal": proposal.model_dump(), + "error": None, + "cache_key": cache_key, + "fingerprints": fingerprints, + "apply_status": "applied:partial_overrides", + } + + +@pytest.fixture(autouse=True) +def _isolate_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch): + """Redirect ``cache.CACHE_ROOT`` to a per-test tmp dir so save_proposal + writes never touch the real ``data/frame_cache/`` tree.""" + monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache") + yield tmp_path / "frame_cache" + + +def test_visual_check_failed_blocks_save_even_with_auto_cache(_isolate_cache_root): + """visual_check_passed=False is never bypassable — auto_cache cannot override.""" + record = _applied_record() + records = [record] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=False, + user_approved=True, + auto_cache=True, + ) + assert record["cache_save_status"].startswith("gate_blocked:") + assert "visual_check_passed=False" in record["cache_save_status"] + # No filesystem write occurred. + assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json")) + + +def test_user_not_approved_and_no_auto_cache_blocks_save(_isolate_cache_root): + """Default pipeline path (user_approved=False, auto_cache=False) → gate blocked.""" + record = _applied_record() + records = [record] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=True, + user_approved=False, + auto_cache=False, + ) + assert record["cache_save_status"].startswith("gate_blocked:") + assert "user_approved=False" in record["cache_save_status"] + assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json")) + + +def test_visual_passed_and_auto_cache_persists_proposal(_isolate_cache_root): + """Happy path — visual_check_passed=True + auto_cache=True persists JSON.""" + record = _applied_record() + records = [record] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=True, + user_approved=False, + auto_cache=True, + ) + assert record["cache_save_status"] == "saved" + written = list(_isolate_cache_root.rglob("*.json")) + assert len(written) == 1 + # Layout = {CACHE_ROOT}/{frame_id}/{signature_hash}.json. + written_path = written[0] + assert written_path.parent.name == "MOCK_FRAME" + + +def test_non_applied_records_are_skipped_without_filesystem_touch(_isolate_cache_root): + """no_proposal / no_zone_match / unsupported_kind / error → never reach save_proposal.""" + no_proposal_record = { + "unit_index": 0, + "apply_status": "no_proposal", + "proposal": None, + "cache_key": None, + "fingerprints": None, + } + no_zone_record = { + "unit_index": 1, + "apply_status": "no_zone_match", + "proposal": {"proposal_kind": "partial_overrides", "payload": {"slots": {}}, "rationale": ""}, + "cache_key": "MOCK::abc", + "fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"}, + } + unsupported_record = { + "unit_index": 2, + "apply_status": "unsupported_kind_for_reject_route:builder_options_patch", + "proposal": {"proposal_kind": "builder_options_patch", "payload": {}, "rationale": ""}, + "cache_key": "MOCK::def", + "fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"}, + } + error_record = { + "unit_index": 3, + "apply_status": None, + "proposal": None, + "cache_key": "MOCK::ghi", + "fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"}, + "error": "RuntimeError: boom", + } + records = [no_proposal_record, no_zone_record, unsupported_record, error_record] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=True, + user_approved=True, + auto_cache=True, + ) + for r in records: + assert r["cache_save_status"] == "not_applied" + # Zero JSON files written because none of the records were applied. + assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json")) + + +def test_mixed_records_only_persist_applied_ones(_isolate_cache_root): + """Mixed batch — only the ``applied:`` record is persisted.""" + applied = _applied_record(cache_key="MOCK_FRAME::aaaaaaaa" + "0" * 56) + not_applied = { + "unit_index": 1, + "apply_status": "no_proposal", + "proposal": None, + "cache_key": None, + "fingerprints": None, + } + records = [applied, not_applied] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=True, + user_approved=False, + auto_cache=True, + ) + assert applied["cache_save_status"] == "saved" + assert not_applied["cache_save_status"] == "not_applied" + written = list(_isolate_cache_root.rglob("*.json")) + assert len(written) == 1 + + +def test_invalid_proposal_payload_surfaces_without_raising(_isolate_cache_root): + """Malformed ``proposal`` dict → ``cache_save_status='invalid_proposal:...'``, + no filesystem write, no exception bubbling into the pipeline runtime.""" + bad_record = { + "unit_index": 0, + "apply_status": "applied:partial_overrides", + "proposal": {"proposal_kind": "not_a_valid_enum_value", "payload": {}, "rationale": ""}, + "cache_key": "MOCK::bad", + "fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"}, + } + records = [bad_record] + _persist_ai_repair_proposals_to_cache( + records, + visual_check_passed=True, + user_approved=True, + auto_cache=True, + ) + assert bad_record["cache_save_status"].startswith("invalid_proposal:") + assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json")) diff --git a/tests/test_imp47b_coverage_invariant.py b/tests/test_imp47b_coverage_invariant.py new file mode 100644 index 0000000..48842f8 --- /dev/null +++ b/tests/test_imp47b_coverage_invariant.py @@ -0,0 +1,95 @@ +"""IMP-47B u7 — Post-AI source_section_ids coverage invariant tests. + +Scope (this slice): + * Helper ``_check_post_ai_coverage_invariant(units, ai_repair_records)`` + (src/phase_z2_pipeline.py) compares the pre-AI superset (unit + ``source_section_ids``) to the post-apply superset present on + gather records. Per the AI isolation contract + dropped 절대 룰 + (``feedback_ai_isolation_contract``), AI repair must not silently + drop a section. + * The helper returns a structured dict (``pre_ai_section_ids``, + ``post_ai_section_ids``, ``dropped_section_ids``, ``status``) so u8 + can surface ``status`` through ``slide_status.ai_repair_status``. + +u8 slide_status surfacing and u10 E2E no-text-loss assertion are out +of scope for this unit. The helper is pure (no AI call, no IO) so a +synthetic stub-unit / stub-record fixture exercises it directly. +""" +from __future__ import annotations + +from dataclasses import dataclass, field + +from src.phase_z2_pipeline import _check_post_ai_coverage_invariant + + +@dataclass +class _StubUnit: + source_section_ids: list[str] = field(default_factory=list) + + +def _record(source_section_ids: list[str]) -> dict: + """Minimal gather-record stub — only the field u7 reads.""" + return {"source_section_ids": list(source_section_ids)} + + +# ─── Case 1 : matched coverage → status='ok' ──────────────────────── + + +def test_coverage_invariant_ok_when_records_match_units(): + """Records carry every unit's source_section_ids → no drop, status='ok'.""" + units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])] + records = [_record(["MOCK_S1", "MOCK_S2"]), _record(["MOCK_S3"])] + result = _check_post_ai_coverage_invariant(units, records) + assert result["status"] == "ok" + assert result["dropped_section_ids"] == [] + assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + assert result["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + + +# ─── Case 2 : record drops a section → status='violated' ──────────── + + +def test_coverage_invariant_violated_when_record_drops_section(): + """If a record loses a unit's section_id (e.g., apply mutation bug), + the invariant reports status='violated' + dropped list (dropped 절대 룰). + """ + units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])] + records = [_record(["MOCK_S1"]), _record(["MOCK_S3"])] # MOCK_S2 dropped + result = _check_post_ai_coverage_invariant(units, records) + assert result["status"] == "violated" + assert result["dropped_section_ids"] == ["MOCK_S2"] + assert "MOCK_S2" in result["pre_ai_section_ids"] + assert "MOCK_S2" not in result["post_ai_section_ids"] + + +# ─── Case 3 : empty inputs → status='ok' (no false positive) ──────── + + +def test_coverage_invariant_ok_on_empty_units_and_records(): + """Empty pipeline (no units / no records) is a vacuous pass — + avoids false-positive 'violated' on edge-case shapes (no AI work). + """ + result = _check_post_ai_coverage_invariant([], []) + assert result["status"] == "ok" + assert result["dropped_section_ids"] == [] + assert result["pre_ai_section_ids"] == [] + assert result["post_ai_section_ids"] == [] + + +# ─── Case 4 : multiple drops + dedup ──────────────────────────────── + + +def test_coverage_invariant_lists_all_dropped_sections_sorted_and_deduped(): + """Multiple missing sections → dropped_section_ids is sorted + deduped. + Duplicate ids across units / records collapse to a set comparison. + """ + units = [ + _StubUnit(["MOCK_S3", "MOCK_S1"]), + _StubUnit(["MOCK_S2", "MOCK_S1"]), # MOCK_S1 duplicate + ] + records: list[dict] = [] # full drop — every unit section missing + result = _check_post_ai_coverage_invariant(units, records) + assert result["status"] == "violated" + assert result["dropped_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + assert result["post_ai_section_ids"] == [] diff --git a/tests/test_imp47b_end_to_end.py b/tests/test_imp47b_end_to_end.py new file mode 100644 index 0000000..bcce7d9 --- /dev/null +++ b/tests/test_imp47b_end_to_end.py @@ -0,0 +1,269 @@ +"""IMP-47B u10 — End-to-end reject smoke (mocked client + full chain + render). + +Scope (this slice): + E2E chain proving the IMP-47B reject route activates, preserves + full coverage, and propagates the AI-repaired ``slot_payload`` + into the rendered ``final.html`` artifact when the AI fallback + client returns a deterministic PARTIAL_OVERRIDES proposal. Wires + together the four pipeline helpers introduced by u4 / u5 / u7 / u8 + plus the Step 13 render step: + + gather → apply → coverage_invariant → ai_repair_status surfacing + → render_slide → final.html + + The chain mirrors the ``run_phase_z2_mvp1`` call sequence between + the Step 12 slot_payload write and the Step 20 ``slide_status`` + attach (src/phase_z2_pipeline.py — u4 call site, u5 apply, u6 + artifact, u7 invariant, u8 surface). The Step 13 render path + (``render_slide`` at src/phase_z2_pipeline.py:2319, called from the + production write site at src/phase_z2_pipeline.py:5107-5111) + consumes ``zones_data[i]["slot_payload"]`` verbatim, so this test + drives that exact production seam: it calls ``render_slide`` on + the post-apply ``zones_data`` and writes the resulting HTML to a + ``final.html`` file inside ``tmp_path``, then asserts the AI + proposal text appears in the on-disk artifact. A heavy + ``run_phase_z2_mvp1`` integration variant with Selenium overflow + check remains deferred — this smoke test stops at the rendered + HTML. + +Guardrails proven by this test (IMP-47B policy bullets): + * AI 호출 = fallback path only → master flag default OFF preserved + (test enables for itself only, restores after). + * MDX 원문 100% 보존 → coverage_invariant.status == "ok", + source_section_ids identical before/after AI. + * 자동 frame swap 금지 → frame_template_id unchanged. + * frame visual 임의 변경 금지 → frame_contract / partial untouched + (apply only merges proposal.payload.slots into slot_payload). + * dropped 절대 룰 → slot_payload AI keys merged on top + of deterministic keys; pre-existing meta keys survive. +""" +from __future__ import annotations + +from dataclasses import dataclass, field + +from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind +from src.phase_z2_pipeline import ( + _apply_ai_repair_proposals_to_zones, + _check_post_ai_coverage_invariant, + _run_step12_ai_repair, + _summarize_ai_repair_status, +) + + +@dataclass +class _StubUnit: + """Synthetic CompositionUnit stand-in (subset of fields gather reads).""" + label: str | None = "reject" + provisional: bool = True + frame_template_id: str = "MOCK_T_reject" + frame_id: str = "MOCK_F_reject" + source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"]) + raw_content: str = "MOCK MDX paragraph that must survive AI repair." + v4_rank: int | None = 1 + cardinality: int | None = None + layout_preset: str = "two_zone_vertical" + zone_position: str = "top" + source_shape: str = "paragraph" + h3_count: int = 0 + char_count: int = 48 + + +def _patched_route_ai_fallback(**kwargs): + """Deterministic stand-in for ``route_ai_fallback`` — returns a + PARTIAL_OVERRIDES proposal that mirrors the declared frame slots. + The validator (src/phase_z2_ai_fallback/validate.py:61-74) is not + re-invoked here because this helper bypasses the router; the + structural slot completeness is asserted by the apply step + the + coverage invariant downstream. + """ + return AiFallbackProposal( + proposal_kind=ProposalKind.PARTIAL_OVERRIDES, + payload={ + "slots": { + "title": "AI repaired title", + "bullets": ["AI repaired bullet 1", "AI repaired bullet 2"], + } + }, + rationale="E2E smoke proposal — deterministic.", + ) + + +def test_e2e_reject_chain_applies_proposal_and_preserves_coverage(monkeypatch): + """End-to-end reject smoke (synthetic chain, mocked client). + + Drives the four IMP-47B u4/u5/u7/u8 helpers in pipeline order with + a single reject+provisional unit. Asserts every guardrail listed + in the module docstring + the four E2E invariants + (final.html-bound slot_payload / full coverage / no text loss / + human_review NOT required on the success path). + """ + # IMP-47B u4 wiring — patch the router seam in src/phase_z2_ai_fallback/step12.py + # so the gather call returns a deterministic PARTIAL_OVERRIDES proposal + # without touching the master flag / network / cache layers. + import src.phase_z2_ai_fallback.step12 as step12_mod + monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback) + + unit = _StubUnit() + units = [unit] + + # Step 12 gather (u4) — eligible reject reaches the patched router. + records = _run_step12_ai_repair(units) + assert len(records) == 1 + assert records[0]["route_hint"] == "ai_adaptation_required" + assert records[0]["ai_called"] is True + assert records[0]["skip_reason"] is None + assert records[0]["proposal"]["proposal_kind"] == "partial_overrides" + assert records[0]["source_section_ids"] == ["MOCK_S1"] + + # Step 12 apply (u5) — PARTIAL_OVERRIDES merged into the matching zone. + # zones_data[0]["slot_payload"] is exactly what render_slide consumes + # to emit final.html (src/phase_z2_pipeline.py:5107) — asserting it + # here proves the reject route now flows into the rendered HTML. + zones = [{ + "position": "top", + "template_id": "MOCK_T_reject", + "slot_payload": { + "title": "deterministic title", + "bullets": ["deterministic bullet"], + "_truncated_count": 0, + }, + }] + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + assert records[0]["apply_status"] == "applied:partial_overrides" + # final.html-bound slot_payload carries AI proposal values + assert zones[0]["slot_payload"]["title"] == "AI repaired title" + assert zones[0]["slot_payload"]["bullets"] == [ + "AI repaired bullet 1", + "AI repaired bullet 2", + ] + # frame visual / pre-existing meta keys survive (no silent shrink). + assert zones[0]["template_id"] == "MOCK_T_reject" + assert zones[0]["slot_payload"]["_truncated_count"] == 0 + # frame_template_id on the unit is byte-identical (no auto frame swap). + assert unit.frame_template_id == "MOCK_T_reject" + + # Step 12 coverage invariant (u7) — full coverage, no text loss. + coverage = _check_post_ai_coverage_invariant(units, records) + assert coverage["status"] == "ok" + assert coverage["pre_ai_section_ids"] == ["MOCK_S1"] + assert coverage["post_ai_section_ids"] == ["MOCK_S1"] + assert coverage["dropped_section_ids"] == [] + + # Step 20 ai_repair_status surfacing (u8) — applied without human review. + status = _summarize_ai_repair_status(records, coverage) + assert status["status"] == "applied" + assert status["counts"]["applied"] == 1 + assert status["counts"]["error"] == 0 + assert status["counts"]["unsupported_kind"] == 0 + assert status["coverage_status"] == "ok" + assert status.get("human_review_required") is not True + + +def test_e2e_reject_chain_writes_final_html_with_ai_repaired_slot(monkeypatch, tmp_path): + """End-to-end reject smoke (real render path → final.html on disk). + + Drives the full Stage-2 u10 chain INCLUDING ``render_slide``: the + AI-repaired ``slot_payload`` is fed through the same Jinja2 + rendering seam the production pipeline uses + (src/phase_z2_pipeline.py:5107-5111), the resulting HTML is + written to ``tmp_path / "final.html"``, and the on-disk artifact + is then asserted to carry the AI proposal value. Uses + ``bim_dx_comparison_table`` — a real registered frame partial + (templates/phase_z2/families/bim_dx_comparison_table.html) whose + template emits ``{{ slot_payload.title }}`` verbatim, so a + proposal-overridden title surfaces literally in the HTML output. + """ + import src.phase_z2_ai_fallback.step12 as step12_mod + monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback) + from src.phase_z2_pipeline import build_layout_css, render_slide + + unit = _StubUnit( + frame_template_id="bim_dx_comparison_table", + zone_position="primary", + layout_preset="single", + ) + + # Step 12 gather + apply. Deterministic non-overridden slots + # (col_a_label, col_b_label, rows[*]) are seeded BEFORE apply so the + # post-render assertions below can prove u5 merge semantics + # (dict.update — not dict-replace) survive the render seam. The + # router proposal only carries ``{title, bullets}`` — every other + # slot must reach final.html untouched. + records = _run_step12_ai_repair([unit]) + zones = [{ + "position": "primary", + "template_id": "bim_dx_comparison_table", + "slot_payload": { + "title": "deterministic frame title", + "col_a_label": "DETERMINISTIC_COL_A_LABEL", + "col_b_label": "DETERMINISTIC_COL_B_LABEL", + "rows": [ + {"label": "DET_ROW_LABEL", "col_a": "DET_ROW_A", "col_b": "DET_ROW_B"}, + ], + }, + }] + _apply_ai_repair_proposals_to_zones(records, ["primary"], zones) + assert records[0]["apply_status"] == "applied:partial_overrides" + + # Step 13 render — production seam (src/phase_z2_pipeline.py:5107-5111). + layout_css = build_layout_css("single", zones) + html = render_slide("IMP-47B E2E reject smoke", None, zones, "single", layout_css) + final_html_path = tmp_path / "final.html" + final_html_path.write_text(html, encoding="utf-8") + + # final.html artifact exists on disk and is non-empty. + assert final_html_path.is_file() + assert final_html_path.stat().st_size > 0 + rendered = final_html_path.read_text(encoding="utf-8") + + # AI-repaired slot content appears in the rendered HTML. + assert "AI repaired title" in rendered + # Deterministic pre-apply title was overridden in the HTML output + # (no silent merge that leaves both values visible). + assert "deterministic frame title" not in rendered + # Non-overridden deterministic slots survive merge → render (u5 + # dict.update semantics, not dict-replace; dropped 절대 룰 honoured + # at the render seam, not just in slot_payload memory). + assert "DETERMINISTIC_COL_A_LABEL" in rendered + assert "DETERMINISTIC_COL_B_LABEL" in rendered + assert "DET_ROW_LABEL" in rendered + assert "DET_ROW_A" in rendered + assert "DET_ROW_B" in rendered + # Frame template id is preserved end-to-end (no auto frame swap). + assert 'data-template-id="bim_dx_comparison_table"' in rendered + assert unit.frame_template_id == "bim_dx_comparison_table" + + # MDX 원문 100% 보존 — coverage invariant + status surfacing. + coverage = _check_post_ai_coverage_invariant([unit], records) + assert coverage["status"] == "ok" + assert coverage["dropped_section_ids"] == [] + status = _summarize_ai_repair_status(records, coverage) + assert status["status"] == "applied" + assert status.get("human_review_required") is not True + + +def test_e2e_reject_chain_no_text_loss_on_multi_section_unit(monkeypatch): + """Multi-section reject unit — every section id flows through gather, + apply, coverage invariant, and ai_repair_status surfacing without a + drop. Locks the 'MDX 원문 100% 보존' guardrail at unit-multiplicity + granularity (gather copies the list via ``list(...)`` at + src/phase_z2_ai_fallback/step12.py:124 so apply mutations cannot + silently drop it).""" + import src.phase_z2_ai_fallback.step12 as step12_mod + monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback) + + unit = _StubUnit(source_section_ids=["MOCK_S1", "MOCK_S2", "MOCK_S3"]) + records = _run_step12_ai_repair([unit]) + zones = [{ + "position": "top", + "template_id": "MOCK_T_reject", + "slot_payload": {"title": "det", "bullets": ["det"]}, + }] + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + coverage = _check_post_ai_coverage_invariant([unit], records) + assert coverage["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + assert coverage["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"] + assert coverage["dropped_section_ids"] == [] + status = _summarize_ai_repair_status(records, coverage) + assert status["status"] == "applied" + assert status.get("human_review_required") is not True diff --git a/tests/test_imp47b_failure_surface.py b/tests/test_imp47b_failure_surface.py new file mode 100644 index 0000000..8862214 --- /dev/null +++ b/tests/test_imp47b_failure_surface.py @@ -0,0 +1,174 @@ +"""IMP-47B u8 — slide_status.ai_repair_status surfacing tests. + +Scope (this slice): + Helper ``_summarize_ai_repair_status(ai_repair_records, coverage_invariant)`` + (src/phase_z2_pipeline.py) composes u4 gather ``error`` + u5 + ``apply_status`` + u7 ``coverage_invariant`` into a single + ``ai_repair_status`` axis attached to ``slide_status``. Failure-axis + priority (highest → lowest): ``error`` > ``coverage_violated`` > + ``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required`` + flips True on the three failure axes for u11 frontend surfacing. + +The frontend reads ``slide_status.ai_repair_status`` to render a +notification per the IMP-47B policy ("AI 호출 실패 / proposal validation +실패 / coverage 미달 → frontend notification"). u9~u13 are out of scope. +The helper is pure (no IO, no AI call) so synthetic record / invariant +dicts exercise every branch directly. +""" +from __future__ import annotations + +from src.phase_z2_pipeline import _summarize_ai_repair_status + + +def _record( + *, + unit_index: int = 0, + apply_status: str | None = None, + error: str | None = None, + source_section_ids: list[str] | None = None, +) -> dict: + """Minimal Step 12 AI repair record stub — fields u8 reads.""" + return { + "unit_index": unit_index, + "source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"], + "apply_status": apply_status, + "error": error, + } + + +_OK_COVERAGE = {"status": "ok", "dropped_section_ids": []} +_VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]} + + +# ─── Case 1 : empty pipeline → status='ok' ────────────────────────── + + +def test_empty_records_returns_ok_no_human_review(): + """No AI work executed → status='ok', human_review_required=False. + The flag-off default (no provisional units) lands here.""" + result = _summarize_ai_repair_status([], _OK_COVERAGE) + assert result["status"] == "ok" + assert result["human_review_required"] is False + assert result["counts"]["total"] == 0 + assert result["unsupported_kind_records"] == [] + assert result["error_records"] == [] + assert result["dropped_section_ids"] == [] + + +# ─── Case 2 : applied → status='applied', no human_review ─────────── + + +def test_applied_partial_overrides_marks_applied_no_human_review(): + """Successful AI repair (PARTIAL_OVERRIDES applied) is the happy + path. status='applied', no human_review surfacing.""" + records = [_record(apply_status="applied:partial_overrides")] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "applied" + assert result["human_review_required"] is False + assert result["counts"]["applied"] == 1 + assert result["counts"]["error"] == 0 + + +# ─── Case 3 : unsupported kind → status='unsupported_kind' ────────── + + +def test_unsupported_kind_marks_human_review_required(): + """u5 surfaces ``unsupported_kind_for_reject_route:`` for + builder_options_patch / slot_mapping_proposal. u8 must classify as + human_review_required so the frontend renders a notification.""" + records = [ + _record( + unit_index=1, + apply_status="unsupported_kind_for_reject_route:builder_options_patch", + source_section_ids=["MOCK_S1"], + ), + ] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "unsupported_kind" + assert result["human_review_required"] is True + assert result["counts"]["unsupported_kind"] == 1 + assert result["unsupported_kind_records"] == [ + { + "unit_index": 1, + "source_section_ids": ["MOCK_S1"], + "apply_status": "unsupported_kind_for_reject_route:builder_options_patch", + } + ] + + +# ─── Case 4 : gather error → status='error' (highest priority) ────── + + +def test_gather_error_marks_status_error_with_records(): + """``record['error']`` set means ``gather_step12_ai_repair_proposals`` + caught a router exception (AI call / validator). status='error' + is the highest-priority failure axis.""" + records = [_record( + unit_index=2, + error="ValueError: missing slot 'title'", + source_section_ids=["MOCK_S2"], + )] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "error" + assert result["human_review_required"] is True + assert result["counts"]["error"] == 1 + assert result["error_records"] == [ + { + "unit_index": 2, + "source_section_ids": ["MOCK_S2"], + "error": "ValueError: missing slot 'title'", + } + ] + + +# ─── Case 5 : coverage violated → status='coverage_violated' ──────── + + +def test_coverage_violation_surfaces_dropped_sections(): + """u7 coverage_invariant 'violated' means the AI repair dropped a + section_id from the post-AI superset. dropped 절대 룰 — surface as + human_review_required.""" + records = [_record(apply_status="applied:partial_overrides")] + result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE) + assert result["status"] == "coverage_violated" + assert result["human_review_required"] is True + assert result["coverage_status"] == "violated" + assert result["dropped_section_ids"] == ["MOCK_S2"] + + +# ─── Case 6 : priority order — error > coverage > unsupported ─────── + + +def test_error_dominates_over_coverage_and_unsupported(): + """When multiple failure axes coexist, priority order is + error > coverage_violated > unsupported_kind > applied > ok.""" + records = [ + _record(unit_index=0, error="RuntimeError"), + _record(unit_index=1, + apply_status="unsupported_kind_for_reject_route:slot_mapping_proposal"), + _record(unit_index=2, apply_status="applied:partial_overrides"), + ] + result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE) + assert result["status"] == "error" + assert result["human_review_required"] is True + assert result["counts"]["error"] == 1 + assert result["counts"]["unsupported_kind"] == 1 + assert result["counts"]["applied"] == 1 + + +# ─── Case 7 : no_proposal + no_zone_match counted, not failure ────── + + +def test_no_proposal_and_no_zone_match_do_not_trigger_human_review(): + """Flag-off short-circuit, not_provisional, route_not_ai_adaptation, + and B4-mismatch (no_zone_match) are structural skips — not AI + failures. They count but do not flip human_review_required.""" + records = [ + _record(unit_index=0, apply_status="no_proposal"), + _record(unit_index=1, apply_status="no_zone_match"), + ] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "ok" + assert result["human_review_required"] is False + assert result["counts"]["no_proposal"] == 1 + assert result["counts"]["no_zone_match"] == 1 diff --git a/tests/test_imp47b_mixed_reject_fill.py b/tests/test_imp47b_mixed_reject_fill.py new file mode 100644 index 0000000..551ce81 --- /dev/null +++ b/tests/test_imp47b_mixed_reject_fill.py @@ -0,0 +1,304 @@ +"""IMP-47B u12 — Initial plan_composition allow_provisional_fill for mixed direct+reject. + +Scope (this slice): + The u12 glue inserted in ``run_phase_z2_mvp1`` (src/phase_z2_pipeline.py, + right after the initial plan_composition + telemetry build, before the + Step 7-A layout override block) detects the mixed direct+reject case + (initial plan_composition returns a viable layout but some sections + remain uncovered) and re-runs plan_composition with: + + * a lookup_fn that passes ``allow_provisional=True`` (so chain_exhausted + sections synthesize a provisional rank-1 V4Match), and + * ``allow_provisional_fill=True`` (so uncovered sections receive a + last-resort provisional candidate fill in select_composition_units). + + This admits the mixed direct+reject case to the AI repair path + (IMP-47B u4/u5) on first render — the reject section becomes a + provisional unit (``provisional=True`` + ``label="reject"``) which Step + 12's reject route gather (u4) routes to AI fallback. + +Gate predicates (mirrored from src/phase_z2_pipeline.py u12 block): + * units non-empty (all-reject case is handled by IMP-30 u4 retry below) + * layout_preset is not None + * not override_section_assignments (operator override bypasses the gate) + * at least one section_id is uncovered after initial pass + +Guardrails proven by these tests: + * MDX 원문 100% 보존 — every section_id covered after mixed admission + (no silent drop). + * 자동 frame swap 금지 — mixed admission only re-runs plan_composition + with provisional flags; rank-1 reject judgment is preserved as the + provisional V4Match (no template_id swap to a different rank). + * Normal-path AI=0 — the mixed admission still emits the reject label; + AI activation is gated separately in router (config.py:19 default OFF). + * All-direct slides are a no-op — gate skips when no uncovered sections. + +This test file exercises ``plan_composition`` directly with synthetic +stub V4 matches + a stub lookup_fn that mirrors the u12 retry seam. +Stub naming follows the IMP-30 u3 convention (MOCK_ prefix mandatory, +no real catalog template_id / frame_id leakage). +""" +from __future__ import annotations + +from dataclasses import dataclass +from typing import Optional + +from src.phase_z2_composition import plan_composition + + +# ─── Synthetic V4Match duck-type (mirrors IMP-30 _StubV4Match) ─────────── + +@dataclass +class _StubV4Match: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + v4_rank: Optional[int] = None + selection_path: str = "rank_1" + fallback_reason: Optional[str] = None + provisional: bool = False + + +@dataclass +class _StubSection: + section_id: str + title: str = "" + raw_content: str = "" + + +_LABEL_TO_STATUS = { + "use_as_is": "matched_zone", + "light_edit": "adapt_matched_zone", + "restructure": "extract_matched_zone", + "reject": "fallback_candidate", +} + +_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"} + + +def _make_normal_lookup(matches_by_section: dict[str, _StubV4Match]): + """Lookup_fn that returns the synthetic rank-1 match (no provisional path). + + Mirrors the pipeline initial ``lookup_fn`` at + src/phase_z2_pipeline.py:3456-3465 (no ``allow_provisional`` kwarg). + """ + def _fn(section_id: str): + return matches_by_section.get(section_id) + return _fn + + +def _make_provisional_lookup(matches_by_section: dict[str, _StubV4Match]): + """Lookup_fn that flags reject rank-1 matches provisional. + + Mirrors the pipeline u12 retry ``_lookup_fn_mixed_admission`` at the + inserted block — for reject judgments, returns a provisional=True + rank-1 V4Match-shaped stub so plan_composition's last-resort fill + pool can see it (provisional candidates are otherwise filtered out + of the normal greedy pass). + """ + def _fn(section_id: str): + m = matches_by_section.get(section_id) + if m is not None and m.label == "reject": + # Synthesize the provisional shape that + # lookup_v4_match_with_fallback returns when allow_provisional + # is True: provisional=True + selection_path="provisional_rank_1". + return _StubV4Match( + template_id=m.template_id, + frame_id=m.frame_id, + frame_number=m.frame_number, + confidence=m.confidence, + label=m.label, + v4_rank=1, + selection_path="provisional_rank_1", + provisional=True, + ) + return m + return _fn + + +def _make_candidates_lookup_empty(): + def _fn(section_id: str): + return [] + return _fn + + +# ─── u12 case 1 : mechanic — mixed admission via provisional lookup + fill ──── + + +def test_u12_mechanic_mixed_admission_covers_reject_section_via_provisional_fill(): + """Positive proof. Mixed direct+reject (S1=use_as_is, S2=reject). + + Without u12 (initial path: normal lookup + allow_provisional_fill=False), + plan_composition returns only the S1 unit and S2 is silently dropped. + + With u12 (retry: provisional lookup + allow_provisional_fill=True), + plan_composition returns both units; S2 is a provisional unit with + label="reject" — ready to be picked up by Step 12's reject route + gather (IMP-47B u4). + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", + frame_number=1, + confidence=0.92, + label="use_as_is", + v4_rank=1, + ), + "S2": _StubV4Match( + template_id="MOCK_template_reject_a", + frame_id="MOCK_frame_002", + frame_number=2, + confidence=0.30, + label="reject", + v4_rank=1, + ), + } + + # Pre-u12 baseline — normal lookup, no provisional fill. + units_pre, preset_pre, _ = plan_composition( + sections, + _make_normal_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + ) + covered_pre = {sid for u in units_pre for sid in u.source_section_ids} + assert "S1" in covered_pre, "S1 (use_as_is) must cover pre-u12" + assert "S2" not in covered_pre, ( + "Pre-u12 baseline regression: reject S2 should be uncovered (no provisional fill)" + ) + + # u12 mixed-admission retry — provisional lookup + allow_provisional_fill=True. + units_post, preset_post, _ = plan_composition( + sections, + _make_provisional_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + allow_provisional_fill=True, + ) + covered_post = {sid for u in units_post for sid in u.source_section_ids} + assert covered_post == {"S1", "S2"}, ( + "u12 mixed admission must cover every section (no text loss)" + ) + assert preset_post is not None + # The S2 unit must be marked provisional so the reject route gather + # (src/phase_z2_ai_fallback/step12.py:133-136) admits it. + s2_unit = next(u for u in units_post if "S2" in u.source_section_ids) + assert s2_unit.provisional is True, ( + "Reject S2 unit must be provisional so Step 12 reject route admits it" + ) + assert s2_unit.label == "reject" + # Frame template id is preserved — no auto frame swap. + assert s2_unit.frame_template_id == "MOCK_template_reject_a" + + +# ─── u12 case 2 : gate — all-direct slides are a no-op ────────────────────── + + +def test_u12_gate_all_direct_yields_no_uncovered_sections(): + """No-op proof. When every section is auto-renderable (use_as_is or + light_edit), the initial plan_composition covers everything — the + u12 mixed-admission gate's ``_u12_uncovered_ids`` list is empty and + the retry is skipped. + """ + sections = [_StubSection("S1"), _StubSection("S2")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_direct_a", + frame_id="MOCK_frame_001", + frame_number=1, + confidence=0.92, + label="use_as_is", + v4_rank=1, + ), + "S2": _StubV4Match( + template_id="MOCK_template_direct_b", + frame_id="MOCK_frame_002", + frame_number=2, + confidence=0.81, + label="light_edit", + v4_rank=1, + ), + } + units, preset, _ = plan_composition( + sections, + _make_normal_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + ) + covered = {sid for u in units for sid in u.source_section_ids} + assert covered == {"S1", "S2"}, "All-direct must cover every section pre-u12" + # Predicate from src/phase_z2_pipeline.py u12 block: + uncovered = [s.section_id for s in sections if s.section_id not in covered] + assert uncovered == [], ( + "u12 gate must classify all-direct as no-op (uncovered list empty)" + ) + assert preset is not None + + +# ─── u12 case 3 : gate — initial empty units bypass u12 (IMP-30 retry owns it) ── + + +def test_u12_gate_skips_when_initial_units_empty(): + """All-reject case is owned by IMP-30 u4 retry (units=[] guard at + src/phase_z2_pipeline.py:3646). u12 mixed-admission must NOT compete + with that path; the gate ``units and layout_preset is not None`` + short-circuits when the initial plan_composition returns nothing. + """ + sections = [_StubSection("S1")] + matches = { + "S1": _StubV4Match( + template_id="MOCK_template_reject_a", + frame_id="MOCK_frame_002", + frame_number=2, + confidence=0.30, + label="reject", + v4_rank=1, + ), + } + units, preset, _ = plan_composition( + sections, + _make_normal_lookup(matches), + _LABEL_TO_STATUS, + _ALLOWED_STATUSES, + v4_candidates_lookup_fn=_make_candidates_lookup_empty(), + ) + # All-reject initial pass: no auto-renderable units, no layout preset. + assert units == [] and preset is None + # u12 gate predicate would short-circuit on `units` truthiness: + gate_active = bool(units) and preset is not None + assert gate_active is False, ( + "u12 mixed-admission gate must skip the all-reject case (IMP-30 u4 owns it)" + ) + + +# ─── u12 case 4 : code-path anchor — pipeline source contains u12 marker ──── + + +def test_u12_pipeline_source_contains_mixed_admission_marker(): + """Anchor test. Ensures the inserted u12 block in src/phase_z2_pipeline.py + is reachable (not silently removed by a future refactor). + + Asserts on the marker comment + ``imp47b_u12_mixed_admission`` debug key + + ``allow_provisional_fill=True`` invocation co-located in the file. + Cheap structural guard — does not run the heavy pipeline. + """ + from pathlib import Path + src_path = Path(__file__).resolve().parent.parent / "src" / "phase_z2_pipeline.py" + text = src_path.read_text(encoding="utf-8") + assert "IMP-47B u12 — mixed direct+reject first-render admission" in text, ( + "u12 marker comment missing from pipeline — block may have been removed" + ) + assert "imp47b_u12_mixed_admission" in text, ( + "u12 comp_debug telemetry key missing" + ) + # The mixed-admission retry must pass allow_provisional_fill=True. + # Anchor against the helper function name + the kwarg co-occurrence. + assert "_lookup_fn_mixed_admission" in text + assert "allow_provisional_fill=True" in text diff --git a/tests/test_imp47b_override_provisional.py b/tests/test_imp47b_override_provisional.py new file mode 100644 index 0000000..d048786 --- /dev/null +++ b/tests/test_imp47b_override_provisional.py @@ -0,0 +1,180 @@ +"""IMP-47B u3 — override-selected reject frames are admitted as provisional. + +Scope (this slice): + Helper `_apply_frame_override_to_unit` (src/phase_z2_pipeline.py) covers + the three probe layers used by the `--override-frame` path: + + 1. ``v4_candidates`` exact match (non-reject; existing behaviour). + 2. Full 32 V4 judgments probe (reject inclusive) — when the user + picks a reject frame, the unit is promoted to + ``provisional=True`` with ``label="reject"`` so Step 12 + (IMP-47B u4) admits the AI repair path. + 3. Raw fall-through (template_id only) — no provisional promotion, + no label mutation. + +Frame visual / contract stay untouched per the AI isolation contract +(frame auto-swap forbidden — AI re-places content into the existing +frame only). Sibling test confirms a non-reject override still goes +through the v4_candidates path without provisional promotion. + +Synthetic naming convention mirrors tests/test_phase_z2_imp30_first_render.py +(MOCK_ prefix mandatory, no real catalog template_id / frame_id leakage). +""" +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Optional + +from src.phase_z2_pipeline import _apply_frame_override_to_unit + + +@dataclass +class _StubCandidate: + template_id: str + frame_id: str + frame_number: int + confidence: float + label: str + + +@dataclass +class _StubUnit: + source_section_ids: list[str] + frame_template_id: Optional[str] = None + frame_id: Optional[str] = None + frame_number: int = 0 + confidence: float = 0.0 + label: Optional[str] = None + provisional: bool = False + v4_candidates: list = field(default_factory=list) + + +def _v4_with_reject(section_id: str, target_tid: str) -> dict: + """Synthetic V4 dict with target_tid mapped to a reject judgment. + + Mirrors the production V4 schema surface (``mdx_sections`` → + ``judgments_full32`` → list of judgment dicts with template_id / + frame_id / frame_number / confidence / label). Two judgments so we + can also assert that the helper picks the reject entry rather than + the first non-reject one when the template_ids differ. + """ + return { + "mdx_sections": { + section_id: { + "judgments_full32": [ + { + "template_id": "MOCK_T_other", + "frame_id": "F_other", + "frame_number": 1, + "confidence": 0.85, + "label": "use_as_is", + }, + { + "template_id": target_tid, + "frame_id": "F_reject", + "frame_number": 32, + "confidence": 0.40, + "label": "reject", + }, + ], + }, + }, + } + + +# ─── Case 1 : reject override → provisional promotion ──────────── + + +def test_override_to_reject_judgment_marks_unit_provisional(): + """User picks a reject frame → unit.label=reject, provisional=True. + + Frame metadata is sourced from the reject judgment (frame_id / + frame_number / confidence) so Step 9 metadata stays consistent. + """ + unit = _StubUnit( + source_section_ids=["MOCK_S1"], + frame_template_id="MOCK_T_auto", + frame_id="F_auto", + frame_number=5, + confidence=0.90, + label="use_as_is", + provisional=False, + ) + v4 = _v4_with_reject("MOCK_S1", "MOCK_T_reject") + + meta = _apply_frame_override_to_unit(unit, "MOCK_T_reject", v4) + + assert meta == "v4_reject_judgment_provisional" + assert unit.frame_template_id == "MOCK_T_reject" + assert unit.frame_id == "F_reject" + assert unit.frame_number == 32 + assert unit.confidence == 0.40 + assert unit.label == "reject" + assert unit.provisional is True + + +# ─── Case 2 : non-reject override → existing v4_candidates path ─── + + +def test_override_to_v4_candidate_keeps_non_provisional(): + """User picks a non-reject candidate → existing v4_candidates path. + + Helper takes the early v4_candidates branch without consulting the + full 32 judgments. provisional remains False (normal-path AI=0 + contract — IMP-30 / IMP-47B router gate intact for this unit). + """ + unit = _StubUnit( + source_section_ids=["MOCK_S2"], + frame_template_id="MOCK_T_auto", + frame_id="F_auto", + frame_number=3, + confidence=0.95, + label="use_as_is", + provisional=False, + v4_candidates=[ + _StubCandidate( + template_id="MOCK_T_pick", + frame_id="F_pick", + frame_number=2, + confidence=0.85, + label="light_edit", + ), + ], + ) + v4 = {"mdx_sections": {}} # full-judgment probe must NOT be reached + + meta = _apply_frame_override_to_unit(unit, "MOCK_T_pick", v4) + + assert meta == "v4_candidates" + assert unit.frame_template_id == "MOCK_T_pick" + assert unit.frame_id == "F_pick" + assert unit.label == "light_edit" + assert unit.provisional is False + + +# ─── Case 3 : unknown template → raw fall-through (no provisional) ─ + + +def test_override_unknown_template_falls_through_without_provisional(): + """Template ID absent from v4_candidates AND from judgments_full32 → + raw_template_id_only path. No provisional flag, no label change. + """ + unit = _StubUnit( + source_section_ids=["MOCK_S3"], + frame_template_id="MOCK_T_auto", + frame_id="F_auto", + frame_number=4, + confidence=0.92, + label="use_as_is", + provisional=False, + ) + v4 = {"mdx_sections": {}} + + meta = _apply_frame_override_to_unit(unit, "MOCK_T_unknown", v4) + + assert meta == "raw_template_id_only" + assert unit.frame_template_id == "MOCK_T_unknown" + # frame_id / label unchanged — caller's print path warns on this case. + assert unit.frame_id == "F_auto" + assert unit.label == "use_as_is" + assert unit.provisional is False diff --git a/tests/test_imp47b_payload_apply.py b/tests/test_imp47b_payload_apply.py new file mode 100644 index 0000000..3d63489 --- /dev/null +++ b/tests/test_imp47b_payload_apply.py @@ -0,0 +1,223 @@ +"""IMP-47B u5 — PARTIAL_OVERRIDES apply tests. + +Scope (this slice): + Helper ``_apply_ai_repair_proposals_to_zones`` (src/phase_z2_pipeline.py) + merges ``proposal.payload.slots`` into ``zones_data[k]["slot_payload"]`` + for PARTIAL_OVERRIDES proposals only, and loud-fails out-of-scope + proposal kinds (builder_options_patch, slot_mapping_proposal) with an + explicit ``apply_status`` marker. + +The IMP-33 u5 validator inside ``route_ai_fallback`` already enforces +declared-slot completeness — the apply helper is therefore a structural +merge over the validator's contract, not a per-slot guard re-implementation. + +u6 (step12_ai_repair.json audit), u7 (coverage invariant), and u8 +(slide_status surfacing) are out of scope for this unit. +""" +from __future__ import annotations + +from src.phase_z2_pipeline import _apply_ai_repair_proposals_to_zones + + +def _record( + *, + unit_index: int, + proposal: dict | None, + source_section_ids: list[str] | None = None, +) -> dict: + """Synthetic gather_step12_ai_repair_proposals record.""" + return { + "unit_index": unit_index, + "source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"], + "frame_template_id": "MOCK_T", + "label": "reject", + "route_hint": "ai_adaptation_required", + "provisional": True, + "ai_called": proposal is not None, + "skip_reason": None, + "proposal": proposal, + "error": None, + "cache_key": "MOCK_F::abc" if proposal is not None else None, + "fingerprints": {"contract_sha": "x", "partial_sha": "y", "catalog_sha": ""} + if proposal is not None + else None, + } + + +def _zone(*, position: str, slot_payload: dict | None = None) -> dict: + """Synthetic zones_data entry — only fields the apply helper touches.""" + return { + "position": position, + "template_id": "MOCK_T", + "slot_payload": slot_payload if slot_payload is not None else {}, + } + + +# ─── Case 1 : PARTIAL_OVERRIDES → merged + applied marker ────────── + + +def test_partial_overrides_merges_slots_into_zone_slot_payload(): + """The validator already guarantees declared-slot completeness, so + apply is a structural ``dict.update``. Pre-existing meta keys + (``_truncated_count``) survive; declared slot values are replaced + by the AI proposal values.""" + proposal = { + "proposal_kind": "partial_overrides", + "payload": { + "slots": { + "title": "AI title", + "bullets": ["AI bullet 1", "AI bullet 2"], + } + }, + "rationale": "MOCK", + } + records = [_record(unit_index=0, proposal=proposal)] + zones = [ + _zone( + position="top", + slot_payload={ + "title": "deterministic title", + "bullets": ["det bullet"], + "_truncated_count": 0, + }, + ) + ] + + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + + assert records[0]["apply_status"] == "applied:partial_overrides" + assert zones[0]["slot_payload"]["title"] == "AI title" + assert zones[0]["slot_payload"]["bullets"] == ["AI bullet 1", "AI bullet 2"] + # meta keys not in proposal must survive the merge + assert zones[0]["slot_payload"]["_truncated_count"] == 0 + + +# ─── Case 2 : BUILDER_OPTIONS_PATCH → loud-fail unsupported_kind ─── + + +def test_builder_options_patch_is_unsupported_for_reject_route(): + """Builder-options application is out-of-scope for IMP-47B reject + route (see Stage 2 plan). u5 must mark, not apply — the zone + slot_payload stays byte-identical and the record carries the + ``unsupported_kind_for_reject_route:`` marker so u8 can + surface human_review downstream.""" + proposal = { + "proposal_kind": "builder_options_patch", + "payload": {"font_size_px": 14}, + "rationale": "MOCK", + } + records = [_record(unit_index=0, proposal=proposal)] + original_slot_payload = {"title": "deterministic"} + zones = [_zone(position="top", slot_payload=dict(original_slot_payload))] + + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + + assert ( + records[0]["apply_status"] + == "unsupported_kind_for_reject_route:builder_options_patch" + ) + assert zones[0]["slot_payload"] == original_slot_payload + + +# ─── Case 3 : SLOT_MAPPING_PROPOSAL → loud-fail unsupported_kind ─── + + +def test_slot_mapping_proposal_is_unsupported_for_reject_route(): + """Slot-mapping (restructuring) application is also out-of-scope — + builder-options + slot-mapping share the same marker path.""" + proposal = { + "proposal_kind": "slot_mapping_proposal", + "payload": {"slots": {"title": "x"}}, + "rationale": "MOCK", + } + records = [_record(unit_index=0, proposal=proposal)] + zones = [_zone(position="top", slot_payload={"title": "deterministic"})] + + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + + assert ( + records[0]["apply_status"] + == "unsupported_kind_for_reject_route:slot_mapping_proposal" + ) + assert zones[0]["slot_payload"] == {"title": "deterministic"} + + +# ─── Case 4 : no proposal (router short-circuit / not_provisional) ── + + +def test_record_without_proposal_marked_no_proposal_and_zone_untouched(): + """Flag-off short-circuit and non-AI-route units carry + ``proposal=None``. apply_status must distinguish "no proposal to + apply" from real apply outcomes so u8 can categorise the per-unit + status without re-reading skip_reason.""" + records = [_record(unit_index=0, proposal=None)] + zones = [_zone(position="top", slot_payload={"title": "deterministic"})] + + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + + assert records[0]["apply_status"] == "no_proposal" + assert zones[0]["slot_payload"] == {"title": "deterministic"} + + +# ─── Case 5 : proposal exists but no matching zone (B4 mismatch) ──── + + +def test_proposal_for_unit_without_zone_match_marked_no_zone_match(): + """When a unit is dropped from zones_data (B4 mismatch or FitError + in the Step 12 render loop) but still gathered an AI proposal, + apply must surface the mismatch via ``no_zone_match`` rather than + silently dropping the proposal or writing into a wrong zone.""" + proposal = { + "proposal_kind": "partial_overrides", + "payload": {"slots": {"title": "AI title"}}, + "rationale": "MOCK", + } + records = [_record(unit_index=0, proposal=proposal)] + # unit_positions[0]="top" but zones_data has only the bottom zone + # → no match for the dropped unit's position. + zones = [_zone(position="bottom", slot_payload={"title": "other zone"})] + + _apply_ai_repair_proposals_to_zones(records, ["top"], zones) + + assert records[0]["apply_status"] == "no_zone_match" + # untouched zone — apply must not bleed into a different position + assert zones[0]["slot_payload"] == {"title": "other zone"} + + +# ─── Case 6 : mixed records — independent per-record classification ── + + +def test_mixed_records_classified_independently(): + """All five apply_status branches coexist in one batch — confirms + the helper does not short-circuit on the first non-applied record.""" + records = [ + _record(unit_index=0, proposal={ + "proposal_kind": "partial_overrides", + "payload": {"slots": {"title": "AI"}}, + "rationale": "", + }), + _record(unit_index=1, proposal={ + "proposal_kind": "builder_options_patch", + "payload": {"font_size_px": 14}, + "rationale": "", + }), + _record(unit_index=2, proposal=None), + ] + zones = [ + _zone(position="top", slot_payload={"title": "det"}), + _zone(position="middle", slot_payload={"title": "det"}), + _zone(position="bottom", slot_payload={"title": "det"}), + ] + + _apply_ai_repair_proposals_to_zones( + records, ["top", "middle", "bottom"], zones, + ) + + assert [r["apply_status"] for r in records] == [ + "applied:partial_overrides", + "unsupported_kind_for_reject_route:builder_options_patch", + "no_proposal", + ] + assert zones[0]["slot_payload"]["title"] == "AI" + assert zones[1]["slot_payload"]["title"] == "det" + assert zones[2]["slot_payload"]["title"] == "det" diff --git a/tests/test_imp47b_step12_ai_wiring.py b/tests/test_imp47b_step12_ai_wiring.py new file mode 100644 index 0000000..8135239 --- /dev/null +++ b/tests/test_imp47b_step12_ai_wiring.py @@ -0,0 +1,154 @@ +"""IMP-47B u4 + u6 — Step 12 AI repair wiring + audit artifact tests. + +Scope (this slice): + * u4 — Helper ``_run_step12_ai_repair`` (src/phase_z2_pipeline.py) + wires the pipeline's local route-hint helper (``_imp05_route_hint``), + the frame contract loader (``get_contract``), and a + templates/phase_z2/families partial reader + (``_load_frame_partial_html``) into + ``gather_step12_ai_repair_proposals``. + * u6 — The gather records flow into ``_write_step_artifact`` under + ``step12_ai_repair.json``. The audit shape must stay + JSON-serialisable (no Pydantic / dataclass leakage) so the artifact + write never raises on real runs. + +The router short-circuits when ``settings.ai_fallback_enabled`` is +False (default), so AI=0 for non-AI-route units stays a structural +guarantee. Synthetic naming mirrors tests/test_imp47b_override_provisional.py +(MOCK_ prefix; no real catalog template_id / frame_id leakage). + +u5 (PARTIAL_OVERRIDES apply), u7 (coverage invariant), and u8 +(slide_status surfacing) are out of scope for this unit. +""" +from __future__ import annotations + +import json +from dataclasses import dataclass, field + +from src.phase_z2_pipeline import ( + _load_frame_partial_html, + _run_step12_ai_repair, + _write_step_artifact, +) + + +@dataclass +class _StubUnit: + label: str | None + provisional: bool + frame_template_id: str = "MOCK_T_x" + frame_id: str = "MOCK_F_x" + source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"]) + raw_content: str = "MOCK_raw" + v4_rank: int | None = 1 + cardinality: int | None = None + layout_preset: str = "" + zone_position: str = "" + source_shape: str = "paragraph" + h3_count: int = 0 + char_count: int = 0 + + +# ─── Case 1 : mixed units → per-unit skip_reason classification ───── + + +def test_mixed_units_classified_by_route_and_provisional_flag(): + """Reject + restructure provisional both route to ai_adaptation; + use_as_is / light_edit / non-provisional skip without router call. + + With ai_fallback_enabled=False (default) the router returns None, + so the two ai_adaptation provisional units record + ``skip_reason='router_short_circuit'``; the rest record their + structural skip_reason (not_provisional / route_not_ai_adaptation). + """ + units = [ + _StubUnit(label="use_as_is", provisional=False), + _StubUnit(label="light_edit", provisional=True), + _StubUnit(label="restructure", provisional=True), + _StubUnit(label="reject", provisional=True), + _StubUnit(label="restructure", provisional=False), + ] + records = _run_step12_ai_repair(units) + assert [r["skip_reason"] for r in records] == [ + "not_provisional", + "route_not_ai_adaptation:deterministic_minor_adjustment", + "router_short_circuit", + "router_short_circuit", + "not_provisional", + ] + assert [r["route_hint"] for r in records] == [ + "direct_render", + "deterministic_minor_adjustment", + "ai_adaptation_required", + "ai_adaptation_required", + "ai_adaptation_required", + ] + assert all(r["ai_called"] is False for r in records) + + +# ─── Case 2 : reject provisional unit reaches AI gate ─────────────── + + +def test_reject_provisional_unit_reaches_router_short_circuit(): + """Reject + provisional → route_hint=ai_adaptation_required. + + Router short-circuit (flag-off default) is the only thing keeping + AI from firing; the wiring proves reject is no longer blocked by + Step 12's bespoke design_reference_only skip (removed by u2). + """ + records = _run_step12_ai_repair([_StubUnit(label="reject", provisional=True)]) + assert records[0]["route_hint"] == "ai_adaptation_required" + assert records[0]["skip_reason"] == "router_short_circuit" + assert records[0]["ai_called"] is False + # cache_key / fingerprints populated only after the route + provisional + # gates pass — confirms gather reached the AI-eligible code path. + assert records[0]["cache_key"] is not None + assert records[0]["fingerprints"] is not None + + +# ─── Case 3 : frame visual loader degrades on missing partial ────── + + +def test_load_frame_partial_html_returns_empty_for_missing_file(): + """__empty__ shell (IMP-30) and any unknown template_id → "". + + Keeps gather() crash-free for the IMP-30 first-render-invariant + path where the synthesized empty-shell unit has no families partial. + """ + assert _load_frame_partial_html("__empty__") == "" + assert _load_frame_partial_html("MOCK_T_does_not_exist") == "" + + +# ─── Case 4 (u6) : audit artifact write is JSON-serialisable ──────── + + +def test_step12_ai_repair_artifact_writes_json_serialisable_records(tmp_path): + """IMP-47B u6 — gather records feed ``_write_step_artifact`` as the + ``step12_ai_repair.json`` audit. Confirms the gather schema contains + only JSON-native primitives (str / int / None / bool / list / dict) + so the artifact write never raises on real runs and the audit + payload preserves per-unit ``route_hint`` / ``skip_reason`` / + ``ai_called`` for reviewers. + """ + records = _run_step12_ai_repair([ + _StubUnit(label="reject", provisional=True), + _StubUnit(label="use_as_is", provisional=False), + ]) + fpath = _write_step_artifact( + tmp_path, 12, "ai_repair", + data={"per_unit": records}, + outputs=["step12_ai_repair.json"], + ) + assert fpath.is_file() + assert fpath.name == "step12_ai_repair.json" + payload = json.loads(fpath.read_text(encoding="utf-8")) + assert payload["step_num"] == 12 + assert payload["step_name"] == "ai_repair" + assert payload["step_status"] == "done" + per_unit = payload["data"]["per_unit"] + assert len(per_unit) == 2 + assert per_unit[0]["route_hint"] == "ai_adaptation_required" + assert per_unit[0]["skip_reason"] == "router_short_circuit" + assert per_unit[0]["ai_called"] is False + assert per_unit[1]["route_hint"] == "direct_render" + assert per_unit[1]["skip_reason"] == "not_provisional" diff --git a/tests/test_phase_z2_ai_fallback_config.py b/tests/test_phase_z2_ai_fallback_config.py index 877dab3..ee80048 100644 --- a/tests/test_phase_z2_ai_fallback_config.py +++ b/tests/test_phase_z2_ai_fallback_config.py @@ -44,3 +44,43 @@ def test_ai_fallback_budget_and_circuit_defaults_locked() -> None: s = Settings() assert s.ai_fallback_budget_per_run == 10 assert s.ai_fallback_circuit_breaker_threshold == 5 + + +# IMP-46 u5 — auto-cache opt-in setting default lock. +# The CLI flag ``--auto-cache`` in src/phase_z2_pipeline.py mutates this +# setting at parse time. The default MUST stay OFF so the dual-gate +# contract (visual_check_passed AND user_approved) survives without an +# explicit operator opt-in. + + +def test_ai_fallback_auto_cache_default_off() -> None: + s = Settings() + assert s.ai_fallback_auto_cache is False, ( + "IMP-46 u5 auto-cache MUST default OFF; the dual-gate contract " + "(visual_check_passed AND user_approved) survives without an " + "explicit --auto-cache opt-in." + ) + + +# IMP-47B u1 — reject route hint policy correction. +# Prior to 2026-05-21 the reject V4 label routed to ``design_reference_only`` +# (no AI). The user policy correction (issue #76) reroutes reject to +# ``ai_adaptation_required`` so the rank-1 reject frame is kept and the AI +# re-maps MDX content into its declared slots. Activation remains gated by +# ``ai_fallback_enabled`` (default OFF preserves the normal-path AI=0 +# contract — see test_ai_fallback_master_flag_default_off above). + + +def test_reject_route_hint_routes_to_ai_adaptation() -> None: + from src.phase_z2_pipeline import _IMP05_ROUTE_HINTS, _imp05_route_hint + + assert _IMP05_ROUTE_HINTS["reject"] == "ai_adaptation_required", ( + "IMP-47B u1: reject must route to ai_adaptation_required so the " + "rank-1 reject frame is retained and AI re-maps MDX content into " + "its slots (frame auto-swap forbidden)." + ) + assert _imp05_route_hint("reject") == "ai_adaptation_required" + # Sibling routes unchanged — guardrail against accidental drift. + assert _imp05_route_hint("use_as_is") == "direct_render" + assert _imp05_route_hint("light_edit") == "deterministic_minor_adjustment" + assert _imp05_route_hint("restructure") == "ai_adaptation_required" diff --git a/tests/test_phase_z2_v4_fallback.py b/tests/test_phase_z2_v4_fallback.py index 158f42e..87ad6c3 100644 --- a/tests/test_phase_z2_v4_fallback.py +++ b/tests/test_phase_z2_v4_fallback.py @@ -237,10 +237,10 @@ def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps by_rank = {c["rank"]: c for c in candidates} assert set(by_rank.keys()) == {1, 2, 3} - # rank-1 reject — non-direct, design_reference_only + # rank-1 reject — non-direct, ai_adaptation_required (IMP-47B u1 policy correction) assert by_rank[1]["v4_label"] == "reject" assert by_rank[1]["filtered_for_direct_execution"] is True - assert by_rank[1]["route_hint"] == "design_reference_only" + assert by_rank[1]["route_hint"] == "ai_adaptation_required" # rank-2 restructure — non-direct, ai_adaptation_required assert by_rank[2]["v4_label"] == "restructure"