feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)
- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook - u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage) - u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks) - u12: coverage_invariant guard - u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
6
.gitignore
vendored
6
.gitignore
vendored
@@ -8,7 +8,11 @@ dist/
|
|||||||
build/
|
build/
|
||||||
.venv/
|
.venv/
|
||||||
node_modules/
|
node_modules/
|
||||||
data/
|
data/*
|
||||||
|
# IMP-46 u6 — track only the frame_cache directory marker; cached payloads stay ignored.
|
||||||
|
!data/frame_cache/
|
||||||
|
data/frame_cache/*
|
||||||
|
!data/frame_cache/.gitkeep
|
||||||
|
|
||||||
# session workspace (push X — 작업 흐름 trace, 사용자 결정 2026-05-08)
|
# session workspace (push X — 작업 흐름 trace, 사용자 결정 2026-05-08)
|
||||||
forex/
|
forex/
|
||||||
|
|||||||
@@ -51,5 +51,5 @@ Phase Q `content_editor.py` 는 **Archive Candidate** ([`PHASE-Q-AUDIT.md`](PHAS
|
|||||||
| Step 12 entry | `src.phase_z2_ai_fallback.step12.gather_step12_ai_repair_proposals` — IMP-30 provisional gate (`not_provisional` skip) AND reject gate (`design_reference_only_no_ai` skip) AND non-AI route catch-all run BEFORE `route_ai_fallback`. |
|
| Step 12 entry | `src.phase_z2_ai_fallback.step12.gather_step12_ai_repair_proposals` — IMP-30 provisional gate (`not_provisional` skip) AND reject gate (`design_reference_only_no_ai` skip) AND non-AI route catch-all run BEFORE `route_ai_fallback`. |
|
||||||
| Step 17 entry | `src.phase_z2_ai_fallback.step17.gather_step17_ai_repair_proposals` — STRUCTURALLY BLOCKED. Every unit returns `skip_reason="step17_ai_blocked_imp_34_35_prerequisites_missing"`. Module does NOT import `route_ai_fallback` / `AiFallbackClient` / `anthropic`. |
|
| Step 17 entry | `src.phase_z2_ai_fallback.step17.gather_step17_ai_repair_proposals` — STRUCTURALLY BLOCKED. Every unit returns `skip_reason="step17_ai_blocked_imp_34_35_prerequisites_missing"`. Module does NOT import `route_ai_fallback` / `AiFallbackClient` / `anthropic`. |
|
||||||
| Cascade order | `src.phase_z2_ai_fallback.step17.OVERFLOW_CASCADE_ORDER = (DETERMINISTIC, POPUP, AI_REPAIR, USER_OVERRIDE)` — single source of truth for Step 17 consumers. Aligns with line 16 of this doc. |
|
| Cascade order | `src.phase_z2_ai_fallback.step17.OVERFLOW_CASCADE_ORDER = (DETERMINISTIC, POPUP, AI_REPAIR, USER_OVERRIDE)` — single source of truth for Step 17 consumers. Aligns with line 16 of this doc. |
|
||||||
| IMP-46 cache gate | `src.phase_z2_ai_fallback.cache.save_proposal(..., visual_check_passed, user_approved)` raises `AiFallbackCacheGateError` unless BOTH gates are True; storage backend then raises `NotImplementedError` (IMP-46 marker). `read_proposal` returns `None` until IMP-46 lands a backend. |
|
| IMP-46 cache gate | `src.phase_z2_ai_fallback.cache.save_proposal(..., visual_check_passed, user_approved, auto_cache=False)` raises `AiFallbackCacheGateError` unless `visual_check_passed=True` AND (`user_approved=True` OR `auto_cache=True`). Persistent JSON backend at `data/frame_cache/{frame_id}/{signature_hash}.json` (u2); cache key = structural signature over 8 axes (u1+u4); read-side fingerprint invalidation via `read_proposal(..., fingerprints=...)` strict equality (u3); `--auto-cache` CLI flag + `settings.ai_fallback_auto_cache` (default `False`) bypasses ONLY the `user_approved` gate (u5); repo root tracked via `data/frame_cache/.gitkeep` with cached payloads git-ignored (u6). `read_proposal` returns `None` on missing / corrupt / fingerprint-mismatched entries — cache is a hint, never a hard dependency. |
|
||||||
| AST isolation | `tests/phase_z2_ai_fallback/test_ast_isolation.py` parses every `*.py` under `src/phase_z2_ai_fallback/` and forbids Phase Q runtime / Kei client / `src.phase_z2_*` (non-fallback) imports. Whitelist = `src.config` + intra-package + stdlib + `anthropic` + `pydantic`. |
|
| AST isolation | `tests/phase_z2_ai_fallback/test_ast_isolation.py` parses every `*.py` under `src/phase_z2_ai_fallback/` and forbids Phase Q runtime / Kei client / `src.phase_z2_*` (non-fallback) imports. Whitelist = `src.config` + intra-package + stdlib + `anthropic` + `pydantic`. |
|
||||||
|
|||||||
@@ -26,6 +26,14 @@ class Settings(BaseSettings):
|
|||||||
ai_fallback_budget_per_run: int = 10
|
ai_fallback_budget_per_run: int = 10
|
||||||
ai_fallback_circuit_breaker_threshold: int = 5
|
ai_fallback_circuit_breaker_threshold: int = 5
|
||||||
|
|
||||||
|
# IMP-46 u5 — auto-cache flag. When True, `save_proposal` bypasses the
|
||||||
|
# `user_approved` gate only (`visual_check_passed` is never bypassed).
|
||||||
|
# Default OFF preserves the dual-gate contract; the CLI flag
|
||||||
|
# `--auto-cache` in `src/phase_z2_pipeline.py` mutates this setting at
|
||||||
|
# parse time. Downstream callers MUST source the flag from Settings,
|
||||||
|
# never inline literals.
|
||||||
|
ai_fallback_auto_cache: bool = False
|
||||||
|
|
||||||
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
model_config = {"env_file": ".env", "env_file_encoding": "utf-8"}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,48 +1,158 @@
|
|||||||
"""IMP-33 u6 — AI fallback proposal cache (IMP-46 gate, no persistent storage).
|
"""IMP-46 u2 + u3 + u5 — Persistent JSON cache backend for AI fallback proposals.
|
||||||
|
|
||||||
This module defines the cache contract that IMP-33 callers use to remember
|
Replaces the IMP-33 u6 ``NotImplementedError`` stub with a content-addressed
|
||||||
AI fallback proposals across runs. The persistent storage layer itself is
|
store at ``data/frame_cache/{frame_id}/{signature_hash}.json``.
|
||||||
out-of-scope for IMP-33 and is owned by IMP-46 (frame transformation cache).
|
|
||||||
|
|
||||||
Behaviour locked by Stage 2 plan (u6):
|
Key format:
|
||||||
|
|
||||||
* ``read_proposal(key)`` always returns ``None`` until IMP-46 lands a
|
* ``read_proposal(key)`` / ``save_proposal(key, ...)`` accept a string ``key``
|
||||||
persistent backend. Callers MUST handle the cache-miss path.
|
of the form ``"{frame_id}::{signature_hash}"``. The two components are
|
||||||
* ``save_proposal(key, proposal, *, visual_check_passed, user_approved)``
|
parsed inside this module so that upstream callers (router, step 12)
|
||||||
enforces the IMP-46 gate before any storage write is attempted:
|
remain unaware of the on-disk layout.
|
||||||
|
* ``read_proposal`` on a malformed (legacy) key silently returns ``None``
|
||||||
|
— the IMP-33 u7 router currently passes a legacy ``cache_key`` string,
|
||||||
|
and u4 will switch to the structural form. Until then, all such reads
|
||||||
|
must miss safely (no exception, no false hit).
|
||||||
|
* ``save_proposal`` on a malformed key raises ``ValueError`` (loud, never
|
||||||
|
silent) — writes are gated and must use the structural form.
|
||||||
|
|
||||||
- ``visual_check_passed=False`` -> ``AiFallbackCacheGateError``
|
Stored payload (one JSON file per (frame_id, signature_hash) pair):
|
||||||
- ``user_approved=False`` -> ``AiFallbackCacheGateError``
|
|
||||||
|
|
||||||
Only when BOTH gates are True does control reach the storage layer,
|
{
|
||||||
which currently raises ``NotImplementedError`` (the IMP-46 marker).
|
"schema_version": 1,
|
||||||
|
"proposal": <AiFallbackProposal.model_dump(mode="json")>,
|
||||||
|
"slide_css": <str | null>,
|
||||||
|
"fingerprints": {"contract_sha": ..., "partial_sha": ..., "catalog_sha": ...}
|
||||||
|
}
|
||||||
|
|
||||||
Guardrails:
|
u3 invalidation contract (this module is a *comparator*, not a *computer*):
|
||||||
|
|
||||||
* No Anthropic import; cache is pure proposal bookkeeping.
|
* ``save_proposal`` persists the ``fingerprints`` dict supplied by the
|
||||||
* No MDX read/write; proposals are u2 ``AiFallbackProposal`` instances.
|
caller verbatim. Cache.py never computes any fingerprint — the three
|
||||||
* No silent persistence: gate violations are loud, not skipped writes
|
declared shas (``contract_sha`` / ``partial_sha`` / ``catalog_sha``) are
|
||||||
(`feedback_artifact_status_naming`).
|
computed by callers from the live contract YAML / partial templates /
|
||||||
|
catalog payloads and handed in. Keeping the computation out of cache.py
|
||||||
|
preserves AI isolation (no Phase Z runtime knowledge in the cache
|
||||||
|
module) and keeps the cache schema-agnostic — additional fingerprint
|
||||||
|
axes can be added without editing cache.py.
|
||||||
|
* ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When
|
||||||
|
supplied, the stored ``fingerprints`` dict must equal the caller's dict
|
||||||
|
exactly (strict equality, NOT subset). Any mismatch — including a key
|
||||||
|
the caller demands but the stored entry lacks, OR a key the stored
|
||||||
|
entry has but the caller does not pass — returns ``None``. Default
|
||||||
|
``fingerprints=None`` performs no comparison (back-compat for legacy
|
||||||
|
callers that have not yet adopted fingerprint-aware lookup).
|
||||||
|
|
||||||
|
Guardrails (locked by Stage 2 plan):
|
||||||
|
|
||||||
|
* Both write gates preserved — ``visual_check_passed=False`` always
|
||||||
|
raises ``AiFallbackCacheGateError`` BEFORE any filesystem touch.
|
||||||
|
``user_approved=False`` also raises by default; the IMP-46 u5
|
||||||
|
``auto_cache=True`` override bypasses ONLY the ``user_approved`` gate
|
||||||
|
(``visual_check_passed`` is never bypassed). Gate violation never
|
||||||
|
silently no-ops.
|
||||||
|
* Missing or corrupt files cause ``read_proposal`` to return ``None`` —
|
||||||
|
the cache is a hint, never a hard dependency. Errors are not propagated
|
||||||
|
to callers because the AI fallback path can always recompute.
|
||||||
|
* ``mkdir(parents=True, exist_ok=True)`` is performed lazily on save.
|
||||||
|
* No Anthropic / MDX / Phase Z runtime imports (AI isolation contract).
|
||||||
|
* Cache root is held as a module-level :data:`CACHE_ROOT` so tests can
|
||||||
|
redirect writes via ``monkeypatch.setattr`` without subclassing.
|
||||||
|
|
||||||
|
u5 auto-cache contract (CLI ``--auto-cache`` + ``settings.ai_fallback_auto_cache``):
|
||||||
|
|
||||||
|
* ``save_proposal(..., auto_cache=True)`` only bypasses the
|
||||||
|
``user_approved`` gate; ``visual_check_passed`` remains mandatory.
|
||||||
|
* ``auto_cache`` is keyword-only and defaults to ``False`` — existing
|
||||||
|
callers (and the test suite) see the original dual-gate behaviour
|
||||||
|
unless they opt in explicitly.
|
||||||
|
* The truth table over ``(visual_check_passed, user_approved, auto_cache)``
|
||||||
|
has eight cells; exactly three succeed:
|
||||||
|
``(True, True, False)``, ``(True, True, True)``, and
|
||||||
|
``(True, False, True)``. Every other cell raises
|
||||||
|
``AiFallbackCacheGateError``.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA_VERSION = 1
|
||||||
|
KEY_DELIMITER = "::"
|
||||||
|
CACHE_ROOT: pathlib.Path = pathlib.Path("data/frame_cache")
|
||||||
|
|
||||||
|
|
||||||
class AiFallbackCacheGateError(RuntimeError):
|
class AiFallbackCacheGateError(RuntimeError):
|
||||||
"""Raised when ``save_proposal`` is called without both IMP-46 gates True."""
|
"""Raised when ``save_proposal`` is called without both IMP-46 gates True."""
|
||||||
|
|
||||||
|
|
||||||
def read_proposal(key: str) -> AiFallbackProposal | None:
|
def _parse_key(key: str) -> tuple[str, str] | None:
|
||||||
|
"""Parse a ``frame_id::signature_hash`` key. Returns ``None`` if malformed."""
|
||||||
|
if KEY_DELIMITER not in key:
|
||||||
|
return None
|
||||||
|
frame_id, _, signature_hash = key.partition(KEY_DELIMITER)
|
||||||
|
if not frame_id or not signature_hash:
|
||||||
|
return None
|
||||||
|
if KEY_DELIMITER in signature_hash:
|
||||||
|
return None
|
||||||
|
return frame_id, signature_hash
|
||||||
|
|
||||||
|
|
||||||
|
def _cache_path(frame_id: str, signature_hash: str) -> pathlib.Path:
|
||||||
|
return CACHE_ROOT / frame_id / f"{signature_hash}.json"
|
||||||
|
|
||||||
|
|
||||||
|
def read_proposal(
|
||||||
|
key: str,
|
||||||
|
*,
|
||||||
|
fingerprints: dict | None = None,
|
||||||
|
) -> AiFallbackProposal | None:
|
||||||
"""Look up a previously cached proposal by ``key``.
|
"""Look up a previously cached proposal by ``key``.
|
||||||
|
|
||||||
IMP-33 ships without a persistent backend; this stub always returns
|
Returns ``None`` for:
|
||||||
``None`` so callers exercise the cache-miss path. The persistent
|
|
||||||
backend will be wired by IMP-46.
|
* empty / non-string key → ``ValueError`` (loud);
|
||||||
|
* non-dict ``fingerprints`` (when supplied) → ``TypeError`` (loud,
|
||||||
|
symmetric with :func:`save_proposal`);
|
||||||
|
* legacy key format (no ``::`` delimiter) → silent ``None`` (router
|
||||||
|
back-compat until u4 switches to the structural form);
|
||||||
|
* missing file under ``data/frame_cache/{frame_id}/{signature_hash}.json``;
|
||||||
|
* corrupt JSON / payload schema mismatch — read errors never propagate;
|
||||||
|
* ``fingerprints`` supplied AND stored ``fingerprints`` field is not a
|
||||||
|
dict OR does not equal the supplied dict (strict equality,
|
||||||
|
u3 invalidation).
|
||||||
"""
|
"""
|
||||||
if not isinstance(key, str) or not key:
|
if not isinstance(key, str) or not key:
|
||||||
raise ValueError("cache key must be a non-empty string")
|
raise ValueError("cache key must be a non-empty string")
|
||||||
return None
|
if fingerprints is not None and not isinstance(fingerprints, dict):
|
||||||
|
raise TypeError("fingerprints must be a dict or None")
|
||||||
|
parsed = _parse_key(key)
|
||||||
|
if parsed is None:
|
||||||
|
return None
|
||||||
|
frame_id, signature_hash = parsed
|
||||||
|
path = _cache_path(frame_id, signature_hash)
|
||||||
|
if not path.is_file():
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
except (OSError, json.JSONDecodeError):
|
||||||
|
return None
|
||||||
|
if not isinstance(data, dict):
|
||||||
|
return None
|
||||||
|
if fingerprints is not None:
|
||||||
|
stored = data.get("fingerprints")
|
||||||
|
if not isinstance(stored, dict) or stored != fingerprints:
|
||||||
|
return None
|
||||||
|
proposal_dict = data.get("proposal")
|
||||||
|
if not isinstance(proposal_dict, dict):
|
||||||
|
return None
|
||||||
|
try:
|
||||||
|
return AiFallbackProposal.model_validate(proposal_dict)
|
||||||
|
except Exception: # noqa: BLE001 — corrupt payload must miss, not raise
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
def save_proposal(
|
def save_proposal(
|
||||||
@@ -51,13 +161,39 @@ def save_proposal(
|
|||||||
*,
|
*,
|
||||||
visual_check_passed: bool,
|
visual_check_passed: bool,
|
||||||
user_approved: bool,
|
user_approved: bool,
|
||||||
) -> None:
|
slide_css: str | None = None,
|
||||||
"""Persist ``proposal`` under ``key`` once both IMP-46 gates are True.
|
fingerprints: dict | None = None,
|
||||||
|
auto_cache: bool = False,
|
||||||
|
) -> pathlib.Path:
|
||||||
|
"""Persist ``proposal`` under ``key`` once the IMP-46 gates clear.
|
||||||
|
|
||||||
Raises ``AiFallbackCacheGateError`` if either gate is False — the
|
Gate contract (IMP-46 u5 truth table):
|
||||||
proposal is NOT written. When both gates are True, storage raises
|
|
||||||
``NotImplementedError`` (the IMP-46 persistent backend has not landed
|
* ``visual_check_passed=False`` -> :class:`AiFallbackCacheGateError`
|
||||||
yet).
|
always (never bypassable; ``auto_cache`` cannot override).
|
||||||
|
* ``user_approved=False`` AND ``auto_cache=False`` ->
|
||||||
|
:class:`AiFallbackCacheGateError`.
|
||||||
|
* ``user_approved=False`` AND ``auto_cache=True`` -> bypass the
|
||||||
|
user-approval gate (IMP-46 u5 CLI / settings opt-in).
|
||||||
|
* Otherwise (``visual_check_passed=True`` AND either
|
||||||
|
``user_approved=True`` OR ``auto_cache=True``) -> persist payload.
|
||||||
|
|
||||||
|
Gate violations are raised BEFORE any filesystem touch — no parent
|
||||||
|
directory is created, no file is written. When the gates clear the
|
||||||
|
JSON payload (schema_version + proposal + slide_css + fingerprints)
|
||||||
|
is written to ``data/frame_cache/{frame_id}/{signature_hash}.json``
|
||||||
|
and the resolved :class:`pathlib.Path` is returned.
|
||||||
|
|
||||||
|
``slide_css`` may be ``None`` (no slide-level CSS captured) or a
|
||||||
|
string. ``fingerprints`` may be ``None`` (treated as empty dict) or a
|
||||||
|
dict mapping fingerprint name to SHA hex digest.
|
||||||
|
|
||||||
|
``auto_cache`` is keyword-only and defaults to ``False``. It is wired
|
||||||
|
from :data:`src.config.settings.ai_fallback_auto_cache`, which the
|
||||||
|
``--auto-cache`` CLI flag in ``src/phase_z2_pipeline.py`` toggles at
|
||||||
|
parse time. The cache module never reads the setting itself — the
|
||||||
|
caller passes the resolved boolean — so AI-isolation contracts
|
||||||
|
(no Phase Z runtime / no Anthropic import) remain intact.
|
||||||
"""
|
"""
|
||||||
if not isinstance(key, str) or not key:
|
if not isinstance(key, str) or not key:
|
||||||
raise ValueError("cache key must be a non-empty string")
|
raise ValueError("cache key must be a non-empty string")
|
||||||
@@ -66,17 +202,42 @@ def save_proposal(
|
|||||||
"proposal must be an AiFallbackProposal instance "
|
"proposal must be an AiFallbackProposal instance "
|
||||||
f"(got {type(proposal).__name__})"
|
f"(got {type(proposal).__name__})"
|
||||||
)
|
)
|
||||||
|
if not isinstance(auto_cache, bool):
|
||||||
|
raise TypeError("auto_cache must be a bool")
|
||||||
if not visual_check_passed:
|
if not visual_check_passed:
|
||||||
raise AiFallbackCacheGateError(
|
raise AiFallbackCacheGateError(
|
||||||
"IMP-46 gate: visual_check_passed=False; refusing to cache an "
|
"IMP-46 gate: visual_check_passed=False; refusing to cache an "
|
||||||
"unverified proposal."
|
"unverified proposal. (auto_cache cannot bypass this gate.)"
|
||||||
)
|
)
|
||||||
if not user_approved:
|
if not user_approved and not auto_cache:
|
||||||
raise AiFallbackCacheGateError(
|
raise AiFallbackCacheGateError(
|
||||||
"IMP-46 gate: user_approved=False; refusing to cache without "
|
"IMP-46 gate: user_approved=False and auto_cache=False; "
|
||||||
"explicit user approval."
|
"refusing to cache without explicit user approval. Pass "
|
||||||
|
"auto_cache=True (or --auto-cache on the CLI) to bypass."
|
||||||
)
|
)
|
||||||
raise NotImplementedError(
|
if slide_css is not None and not isinstance(slide_css, str):
|
||||||
"IMP-46 persistent cache storage is not implemented yet; "
|
raise TypeError("slide_css must be a string or None")
|
||||||
"this is the IMP-33 u6 stub marker."
|
if fingerprints is None:
|
||||||
|
fingerprints = {}
|
||||||
|
elif not isinstance(fingerprints, dict):
|
||||||
|
raise TypeError("fingerprints must be a dict or None")
|
||||||
|
parsed = _parse_key(key)
|
||||||
|
if parsed is None:
|
||||||
|
raise ValueError(
|
||||||
|
"cache key must be in "
|
||||||
|
f"'frame_id{KEY_DELIMITER}signature_hash' format; got {key!r}"
|
||||||
|
)
|
||||||
|
frame_id, signature_hash = parsed
|
||||||
|
path = _cache_path(frame_id, signature_hash)
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
payload = {
|
||||||
|
"schema_version": SCHEMA_VERSION,
|
||||||
|
"proposal": proposal.model_dump(mode="json"),
|
||||||
|
"slide_css": slide_css,
|
||||||
|
"fingerprints": dict(fingerprints),
|
||||||
|
}
|
||||||
|
path.write_text(
|
||||||
|
json.dumps(payload, sort_keys=True, ensure_ascii=False, indent=2),
|
||||||
|
encoding="utf-8",
|
||||||
)
|
)
|
||||||
|
return path
|
||||||
|
|||||||
91
src/phase_z2_ai_fallback/signature.py
Normal file
91
src/phase_z2_ai_fallback/signature.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
"""IMP-46 u1 — Frame transformation cache signature builder.
|
||||||
|
|
||||||
|
Deterministic SHA256 over the 8 declared structural axes:
|
||||||
|
frame_id, v4_label, cardinality, source_shape,
|
||||||
|
h3_count, char_count_bucket, layout_preset, zone_position
|
||||||
|
|
||||||
|
Guardrails:
|
||||||
|
* No sample/section identifiers in the signature surface (no-hardcoding lock).
|
||||||
|
* source_shape constrained to the bullet/paragraph/table/mixed enum.
|
||||||
|
* char_count_bucket is the *bucket label*; numeric counts must be projected
|
||||||
|
via :func:`bucket_char_count` before being fed to :func:`build_signature`.
|
||||||
|
* Schema version is embedded in the hashed payload so a future axis change
|
||||||
|
breaks the digest by design (cache invalidation on schema bump).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
from enum import Enum
|
||||||
|
|
||||||
|
|
||||||
|
SCHEMA_VERSION = 1
|
||||||
|
|
||||||
|
|
||||||
|
class SourceShape(str, Enum):
|
||||||
|
BULLET = "bullet"
|
||||||
|
PARAGRAPH = "paragraph"
|
||||||
|
TABLE = "table"
|
||||||
|
MIXED = "mixed"
|
||||||
|
|
||||||
|
|
||||||
|
_CHAR_COUNT_BUCKETS: tuple[tuple[int, str], ...] = (
|
||||||
|
(50, "0-50"),
|
||||||
|
(150, "51-150"),
|
||||||
|
(400, "151-400"),
|
||||||
|
(1000, "401-1000"),
|
||||||
|
)
|
||||||
|
_CHAR_COUNT_BUCKET_OVERFLOW = "1001+"
|
||||||
|
CHAR_COUNT_BUCKET_LABELS: tuple[str, ...] = tuple(
|
||||||
|
label for _, label in _CHAR_COUNT_BUCKETS
|
||||||
|
) + (_CHAR_COUNT_BUCKET_OVERFLOW,)
|
||||||
|
|
||||||
|
|
||||||
|
def bucket_char_count(char_count: int) -> str:
|
||||||
|
"""Project a non-negative character count to its fixed bucket label."""
|
||||||
|
if isinstance(char_count, bool) or not isinstance(char_count, int):
|
||||||
|
raise TypeError("char_count must be a non-negative int")
|
||||||
|
if char_count < 0:
|
||||||
|
raise ValueError("char_count must be non-negative")
|
||||||
|
for upper, label in _CHAR_COUNT_BUCKETS:
|
||||||
|
if char_count <= upper:
|
||||||
|
return label
|
||||||
|
return _CHAR_COUNT_BUCKET_OVERFLOW
|
||||||
|
|
||||||
|
|
||||||
|
def build_signature(
|
||||||
|
*,
|
||||||
|
frame_id: str,
|
||||||
|
v4_label: str,
|
||||||
|
cardinality: int | None,
|
||||||
|
source_shape: SourceShape | str,
|
||||||
|
h3_count: int,
|
||||||
|
char_count_bucket: str,
|
||||||
|
layout_preset: str,
|
||||||
|
zone_position: str,
|
||||||
|
) -> str:
|
||||||
|
"""Return a deterministic SHA256 hex digest over the 8 declared axes."""
|
||||||
|
if isinstance(source_shape, SourceShape):
|
||||||
|
source_shape_value = source_shape.value
|
||||||
|
elif isinstance(source_shape, str):
|
||||||
|
source_shape_value = SourceShape(source_shape).value
|
||||||
|
else:
|
||||||
|
raise TypeError("source_shape must be SourceShape or str")
|
||||||
|
if char_count_bucket not in CHAR_COUNT_BUCKET_LABELS:
|
||||||
|
raise ValueError(
|
||||||
|
f"char_count_bucket={char_count_bucket!r} is not a known bucket "
|
||||||
|
f"label (expected one of {CHAR_COUNT_BUCKET_LABELS})"
|
||||||
|
)
|
||||||
|
payload = {
|
||||||
|
"schema_version": SCHEMA_VERSION,
|
||||||
|
"frame_id": frame_id,
|
||||||
|
"v4_label": v4_label,
|
||||||
|
"cardinality": cardinality,
|
||||||
|
"source_shape": source_shape_value,
|
||||||
|
"h3_count": h3_count,
|
||||||
|
"char_count_bucket": char_count_bucket,
|
||||||
|
"layout_preset": layout_preset,
|
||||||
|
"zone_position": zone_position,
|
||||||
|
}
|
||||||
|
encoded = json.dumps(payload, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
||||||
|
return hashlib.sha256(encoded).hexdigest()
|
||||||
@@ -1,32 +1,72 @@
|
|||||||
"""IMP-33 u8 — Step 12 AI repair wiring (IMP-30 provisional units only).
|
"""IMP-33 u8 + IMP-46 u4 — Step 12 AI repair wiring with structural cache key.
|
||||||
|
|
||||||
Phase Z Step 12 = slot_payload (the runtime "light_edit / restructure" surface
|
Phase Z Step 12 = slot_payload (the runtime "light_edit / restructure" surface
|
||||||
where AI-assisted frame-aware adaptation is allowed per IMP-17 carve-out).
|
where AI-assisted frame-aware adaptation is allowed per IMP-17 carve-out).
|
||||||
This module is the only call site that pipes Phase Z composition units into
|
This module is the only call site that pipes Phase Z composition units into
|
||||||
``src.phase_z2_ai_fallback.router.route_ai_fallback``. Two structural gates
|
``src.phase_z2_ai_fallback.router.route_ai_fallback``. One structural gate
|
||||||
preserve the AI isolation contract:
|
preserves the AI isolation contract:
|
||||||
|
|
||||||
* IMP-30 provisional gate — units with ``provisional=False`` are skipped
|
* IMP-30 provisional gate — units with ``provisional=False`` are skipped
|
||||||
before any route classification. AI repair is reserved for first-render
|
before any route classification. AI repair is reserved for first-render
|
||||||
invariant survivors (no rank-1 V4 evidence, recovered as provisional).
|
invariant survivors (no rank-1 V4 evidence, recovered as provisional).
|
||||||
* Reject gate — units whose V4 label maps to ``design_reference_only``
|
|
||||||
(``reject``) are skipped with ``skip_reason="design_reference_only_no_ai"``.
|
Per IMP-47B u1+u2, the ``reject`` V4 label routes to
|
||||||
Reject path is design reference only — never an AI call.
|
``ai_adaptation_required`` (no longer ``design_reference_only``) and is
|
||||||
|
admitted to the AI repair path; the legacy "reject gate" short-circuit is
|
||||||
|
removed. Any unit whose ``route_hint`` is not ``ai_adaptation_required``
|
||||||
|
still falls through to the catch-all ``route_not_ai_adaptation:<hint>``
|
||||||
|
skip — that single gate continues to enforce the AI=0 normal path.
|
||||||
|
|
||||||
Combined with the u7 router's flag-off + route-gate short-circuits, the
|
Combined with the u7 router's flag-off + route-gate short-circuits, the
|
||||||
default Phase Z run path performs zero AI calls (PZ-1). Save to cache is
|
default Phase Z run path performs zero AI calls (PZ-1). Save to cache is
|
||||||
NOT performed here — that is the caller's responsibility AFTER
|
NOT performed here — that is the caller's responsibility AFTER
|
||||||
``visual_check_passed=True`` AND ``user_approved=True`` (u6 IMP-46 gate).
|
``visual_check_passed=True`` AND ``user_approved=True`` (u6 IMP-46 gate).
|
||||||
|
|
||||||
|
IMP-46 u4 — structural cache key + fingerprints
|
||||||
|
------------------------------------------------
|
||||||
|
|
||||||
|
The legacy ``cache_key`` was ``"{template_id}::{sorted(source_section_ids)}"``
|
||||||
|
which leaked sample / section identity into the cache surface
|
||||||
|
(no-hardcoding lock violation: structurally identical content with
|
||||||
|
different MDX section ids would miss). u4 replaces it with
|
||||||
|
``"{frame_id}::{signature_hash}"`` where ``signature_hash`` is the
|
||||||
|
deterministic SHA256 over the 8 declared structural axes (see
|
||||||
|
``src.phase_z2_ai_fallback.signature``). Per-unit signature inputs are
|
||||||
|
read from unit attributes:
|
||||||
|
|
||||||
|
* ``cardinality`` (int | None) — also forwarded to ``v4_result``
|
||||||
|
* ``layout_preset`` (str)
|
||||||
|
* ``zone_position`` (str)
|
||||||
|
* ``source_shape`` (str) — bullet / paragraph / table / mixed
|
||||||
|
* ``h3_count`` (int)
|
||||||
|
* ``char_count`` (int) — bucketed via ``bucket_char_count``
|
||||||
|
|
||||||
|
In parallel the three invalidation fingerprints
|
||||||
|
(``contract_sha`` / ``partial_sha`` / ``catalog_sha``) are computed and
|
||||||
|
attached to the record. The cache.py module remains a *comparator* — all
|
||||||
|
fingerprint *computation* happens here (or via injected loaders) so the
|
||||||
|
cache schema-agnostic contract is preserved. The router's existing
|
||||||
|
``read_proposal(cache_key)`` continues to perform exact-match lookup only
|
||||||
|
(fuzzy is deferred per Stage 2 plan); read-side fingerprint validation
|
||||||
|
through the router is a follow-up axis.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
from typing import Any, Callable, Iterable
|
from typing import Any, Callable, Iterable
|
||||||
|
|
||||||
from src.phase_z2_ai_fallback.router import route_ai_fallback
|
from src.phase_z2_ai_fallback.router import route_ai_fallback
|
||||||
|
from src.phase_z2_ai_fallback.signature import bucket_char_count, build_signature
|
||||||
|
|
||||||
|
|
||||||
_AI_ADAPTATION_ROUTE = "ai_adaptation_required"
|
_AI_ADAPTATION_ROUTE = "ai_adaptation_required"
|
||||||
_DESIGN_REFERENCE_ROUTE = "design_reference_only"
|
|
||||||
|
|
||||||
|
def _sha256_of(payload: Any) -> str:
|
||||||
|
"""Deterministic SHA256 hex digest over a JSON-serialisable payload."""
|
||||||
|
encoded = json.dumps(payload, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
||||||
|
return hashlib.sha256(encoded).hexdigest()
|
||||||
|
|
||||||
|
|
||||||
def gather_step12_ai_repair_proposals(
|
def gather_step12_ai_repair_proposals(
|
||||||
@@ -38,6 +78,7 @@ def gather_step12_ai_repair_proposals(
|
|||||||
figma_partial_loader: Callable[[str], dict] | None = None,
|
figma_partial_loader: Callable[[str], dict] | None = None,
|
||||||
internal_region_lookup: Callable[[Any], dict] | None = None,
|
internal_region_lookup: Callable[[Any], dict] | None = None,
|
||||||
mdx_text_loader: Callable[[Any], str] | None = None,
|
mdx_text_loader: Callable[[Any], str] | None = None,
|
||||||
|
catalog_sha_loader: Callable[[], str] | None = None,
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Return one record per unit describing the Step 12 AI repair decision.
|
"""Return one record per unit describing the Step 12 AI repair decision.
|
||||||
|
|
||||||
@@ -55,8 +96,16 @@ def gather_step12_ai_repair_proposals(
|
|||||||
"skip_reason": str | None,
|
"skip_reason": str | None,
|
||||||
"proposal": dict | None,
|
"proposal": dict | None,
|
||||||
"error": str | None,
|
"error": str | None,
|
||||||
|
"cache_key": str | None, # IMP-46 u4
|
||||||
|
"fingerprints": dict | None, # IMP-46 u4
|
||||||
}
|
}
|
||||||
|
|
||||||
|
``cache_key`` and ``fingerprints`` are populated only when the unit
|
||||||
|
reaches the AI-eligible code path (provisional + ai_adaptation route).
|
||||||
|
Skipped units retain ``None`` for both — the structural axes
|
||||||
|
(layout_preset / zone_position / source_shape / h3_count / char_count)
|
||||||
|
are not guaranteed to be set for non-AI paths.
|
||||||
|
|
||||||
``ai_called`` is True only when ``route_ai_fallback`` was invoked AND
|
``ai_called`` is True only when ``route_ai_fallback`` was invoked AND
|
||||||
returned a proposal OR raised. Flag-off / route-mismatch returns
|
returned a proposal OR raised. Flag-off / route-mismatch returns
|
||||||
``None`` from the router and is surfaced as ``ai_called=False`` with
|
``None`` from the router and is surfaced as ``ai_called=False`` with
|
||||||
@@ -64,6 +113,9 @@ def gather_step12_ai_repair_proposals(
|
|||||||
"router decided not to run" from "router ran and returned a proposal".
|
"router decided not to run" from "router ran and returned a proposal".
|
||||||
"""
|
"""
|
||||||
records: list[dict] = []
|
records: list[dict] = []
|
||||||
|
catalog_sha = (
|
||||||
|
catalog_sha_loader() if catalog_sha_loader is not None else ""
|
||||||
|
)
|
||||||
for index, unit in enumerate(units):
|
for index, unit in enumerate(units):
|
||||||
label = getattr(unit, "label", None)
|
label = getattr(unit, "label", None)
|
||||||
route_hint = route_for_label(label)
|
route_hint = route_for_label(label)
|
||||||
@@ -78,15 +130,13 @@ def gather_step12_ai_repair_proposals(
|
|||||||
"skip_reason": None,
|
"skip_reason": None,
|
||||||
"proposal": None,
|
"proposal": None,
|
||||||
"error": None,
|
"error": None,
|
||||||
|
"cache_key": None,
|
||||||
|
"fingerprints": None,
|
||||||
}
|
}
|
||||||
if not record["provisional"]:
|
if not record["provisional"]:
|
||||||
record["skip_reason"] = "not_provisional"
|
record["skip_reason"] = "not_provisional"
|
||||||
records.append(record)
|
records.append(record)
|
||||||
continue
|
continue
|
||||||
if route_hint == _DESIGN_REFERENCE_ROUTE:
|
|
||||||
record["skip_reason"] = "design_reference_only_no_ai"
|
|
||||||
records.append(record)
|
|
||||||
continue
|
|
||||||
if route_hint != _AI_ADAPTATION_ROUTE:
|
if route_hint != _AI_ADAPTATION_ROUTE:
|
||||||
record["skip_reason"] = f"route_not_ai_adaptation:{route_hint}"
|
record["skip_reason"] = f"route_not_ai_adaptation:{route_hint}"
|
||||||
records.append(record)
|
records.append(record)
|
||||||
@@ -106,15 +156,40 @@ def gather_step12_ai_repair_proposals(
|
|||||||
if mdx_text_loader is not None
|
if mdx_text_loader is not None
|
||||||
else (getattr(unit, "raw_content", "") or "")
|
else (getattr(unit, "raw_content", "") or "")
|
||||||
)
|
)
|
||||||
cache_key = "::".join(
|
|
||||||
[template_id, ",".join(sorted(record["source_section_ids"]))]
|
frame_id_value = getattr(unit, "frame_id", "") or ""
|
||||||
|
cardinality = getattr(unit, "cardinality", None)
|
||||||
|
layout_preset = getattr(unit, "layout_preset", "") or ""
|
||||||
|
zone_position = getattr(unit, "zone_position", "") or ""
|
||||||
|
source_shape = getattr(unit, "source_shape", "paragraph") or "paragraph"
|
||||||
|
h3_count = int(getattr(unit, "h3_count", 0) or 0)
|
||||||
|
char_count = int(getattr(unit, "char_count", 0) or 0)
|
||||||
|
char_count_bucket = bucket_char_count(char_count)
|
||||||
|
signature_hash = build_signature(
|
||||||
|
frame_id=frame_id_value,
|
||||||
|
v4_label=label or "",
|
||||||
|
cardinality=cardinality,
|
||||||
|
source_shape=source_shape,
|
||||||
|
h3_count=h3_count,
|
||||||
|
char_count_bucket=char_count_bucket,
|
||||||
|
layout_preset=layout_preset,
|
||||||
|
zone_position=zone_position,
|
||||||
)
|
)
|
||||||
|
cache_key = f"{frame_id_value}::{signature_hash}"
|
||||||
|
fingerprints = {
|
||||||
|
"contract_sha": _sha256_of(frame_contract),
|
||||||
|
"partial_sha": _sha256_of(figma_partial_json),
|
||||||
|
"catalog_sha": catalog_sha,
|
||||||
|
}
|
||||||
|
record["cache_key"] = cache_key
|
||||||
|
record["fingerprints"] = fingerprints
|
||||||
|
|
||||||
v4_result = {
|
v4_result = {
|
||||||
"route": route_hint,
|
"route": route_hint,
|
||||||
"label": label,
|
"label": label,
|
||||||
"frame_id": getattr(unit, "frame_id", None),
|
"frame_id": getattr(unit, "frame_id", None),
|
||||||
"rank": getattr(unit, "v4_rank", None),
|
"rank": getattr(unit, "v4_rank", None),
|
||||||
"cardinality": None,
|
"cardinality": cardinality,
|
||||||
}
|
}
|
||||||
try:
|
try:
|
||||||
proposal = route_ai_fallback(
|
proposal = route_ai_fallback(
|
||||||
|
|||||||
@@ -78,6 +78,12 @@ from phase_z2_failure_router import (
|
|||||||
from phase_z2_content_extractor import extract_content_objects, extract_rich_content_objects
|
from phase_z2_content_extractor import extract_content_objects, extract_rich_content_objects
|
||||||
from phase_z2_placement_planner import plan_placement
|
from phase_z2_placement_planner import plan_placement
|
||||||
|
|
||||||
|
# IMP-47B u4 — Step 12 AI repair wiring. gather() short-circuits at the
|
||||||
|
# router when settings.ai_fallback_enabled is False (default), so import
|
||||||
|
# at module load is safe for the AI=0 normal path (PZ-1). Activation gate
|
||||||
|
# stays in src/config.py + src/phase_z2_ai_fallback/router.py.
|
||||||
|
from src.phase_z2_ai_fallback.step12 import gather_step12_ai_repair_proposals
|
||||||
|
|
||||||
|
|
||||||
# ─── Constants ──────────────────────────────────────────────────
|
# ─── Constants ──────────────────────────────────────────────────
|
||||||
|
|
||||||
@@ -569,12 +575,15 @@ def lookup_v4_match(
|
|||||||
# use_as_is → Phase Z direct render
|
# use_as_is → Phase Z direct render
|
||||||
# light_edit → deterministic minor adjustment
|
# light_edit → deterministic minor adjustment
|
||||||
# restructure → AI-assisted frame-aware adaptation (deferred to IMP-17 — carve-out, AI fallback only, normal path 밖)
|
# restructure → AI-assisted frame-aware adaptation (deferred to IMP-17 — carve-out, AI fallback only, normal path 밖)
|
||||||
# reject → design reference only (deferred to IMP-29 frontend override)
|
# reject → AI re-construction over the rank-1 reject frame (IMP-47B u1, 2026-05-21);
|
||||||
|
# policy correction supersedes the legacy "design reference only" disposition.
|
||||||
|
# Frame visual / contract stays untouched; AI only re-maps MDX content into
|
||||||
|
# declared slots. Activation still gated by ai_fallback_enabled (default OFF).
|
||||||
_IMP05_ROUTE_HINTS: dict[str, str] = {
|
_IMP05_ROUTE_HINTS: dict[str, str] = {
|
||||||
"use_as_is": "direct_render",
|
"use_as_is": "direct_render",
|
||||||
"light_edit": "deterministic_minor_adjustment",
|
"light_edit": "deterministic_minor_adjustment",
|
||||||
"restructure": "ai_adaptation_required",
|
"restructure": "ai_adaptation_required",
|
||||||
"reject": "design_reference_only",
|
"reject": "ai_adaptation_required",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -585,6 +594,249 @@ def _imp05_route_hint(label: Optional[str]) -> Optional[str]:
|
|||||||
return _IMP05_ROUTE_HINTS.get(label)
|
return _IMP05_ROUTE_HINTS.get(label)
|
||||||
|
|
||||||
|
|
||||||
|
def _load_frame_partial_html(template_id: str) -> str:
|
||||||
|
"""IMP-47B u4 — Read templates/phase_z2/families/{template_id}.html.
|
||||||
|
|
||||||
|
Missing partial (e.g., ``__empty__`` shell from IMP-30) returns an
|
||||||
|
empty string so gather_step12_ai_repair_proposals can still build a
|
||||||
|
record with skip_reason without raising on file IO.
|
||||||
|
"""
|
||||||
|
partial_path = TEMPLATE_DIR / "families" / f"{template_id}.html"
|
||||||
|
if not partial_path.is_file():
|
||||||
|
return ""
|
||||||
|
return partial_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
|
||||||
|
def _run_step12_ai_repair(units) -> list[dict]:
|
||||||
|
"""IMP-47B u4 — Wire gather_step12_ai_repair_proposals into Step 12.
|
||||||
|
|
||||||
|
Routes provisional units whose IMP-05 hint maps to
|
||||||
|
``ai_adaptation_required`` (``restructure`` + ``reject`` per u1)
|
||||||
|
through ``src.phase_z2_ai_fallback.router``. Normal-path units
|
||||||
|
(``use_as_is`` / ``light_edit`` / non-provisional) record a
|
||||||
|
skip_reason without invoking the router; flag-off runs short-circuit
|
||||||
|
at the router (``settings.ai_fallback_enabled=False`` default).
|
||||||
|
Returns the per-unit record list — u5 consumes records for
|
||||||
|
PARTIAL_OVERRIDES apply and u6 writes the audit artifact.
|
||||||
|
"""
|
||||||
|
return gather_step12_ai_repair_proposals(
|
||||||
|
units,
|
||||||
|
route_for_label=_imp05_route_hint,
|
||||||
|
get_contract_fn=get_contract,
|
||||||
|
frame_visual_loader=_load_frame_partial_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
_REJECT_SUPPORTED_PROPOSAL_KINDS: frozenset[str] = frozenset({"partial_overrides"})
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_ai_repair_proposals_to_zones(
|
||||||
|
ai_repair_records: list[dict],
|
||||||
|
unit_positions: list[str],
|
||||||
|
zones_data: list[dict],
|
||||||
|
) -> None:
|
||||||
|
"""IMP-47B u5 — Apply PARTIAL_OVERRIDES into zones_data.slot_payload.
|
||||||
|
|
||||||
|
Mutates each record's ``apply_status`` in place and merges
|
||||||
|
``proposal.payload.slots`` into the matching zone. Out-of-scope
|
||||||
|
kinds (``builder_options_patch``, ``slot_mapping_proposal``)
|
||||||
|
loud-fail with ``unsupported_kind_for_reject_route:<kind>`` — zones
|
||||||
|
untouched (human_review surfacing → u8). IMP-33 u5 validator
|
||||||
|
guarantees declared-slot completeness, so ``dict.update`` is the
|
||||||
|
structural merge (``feedback_ai_isolation_contract``).
|
||||||
|
"""
|
||||||
|
zone_by_position = {z["position"]: z for z in zones_data}
|
||||||
|
for record in ai_repair_records:
|
||||||
|
proposal = record.get("proposal")
|
||||||
|
if proposal is None:
|
||||||
|
record["apply_status"] = "no_proposal"
|
||||||
|
continue
|
||||||
|
kind = proposal.get("proposal_kind")
|
||||||
|
if kind not in _REJECT_SUPPORTED_PROPOSAL_KINDS:
|
||||||
|
record["apply_status"] = f"unsupported_kind_for_reject_route:{kind}"
|
||||||
|
print(
|
||||||
|
f" [ai-repair-apply] unit {record['unit_index']} "
|
||||||
|
f"proposal_kind='{kind}' out-of-scope for reject route — "
|
||||||
|
"skipping apply; human_review required.",
|
||||||
|
file=sys.stderr,
|
||||||
|
)
|
||||||
|
continue
|
||||||
|
unit_index = record["unit_index"]
|
||||||
|
position = (
|
||||||
|
unit_positions[unit_index]
|
||||||
|
if 0 <= unit_index < len(unit_positions) else None
|
||||||
|
)
|
||||||
|
zone = zone_by_position.get(position) if position is not None else None
|
||||||
|
if zone is None:
|
||||||
|
record["apply_status"] = "no_zone_match"
|
||||||
|
continue
|
||||||
|
slots = (proposal.get("payload") or {}).get("slots") or {}
|
||||||
|
zone["slot_payload"].update(slots)
|
||||||
|
record["apply_status"] = "applied:partial_overrides"
|
||||||
|
|
||||||
|
|
||||||
|
def _check_post_ai_coverage_invariant(
|
||||||
|
units,
|
||||||
|
ai_repair_records: list[dict],
|
||||||
|
) -> dict:
|
||||||
|
"""IMP-47B u7 — Verify AI repair preserved every source_section_id.
|
||||||
|
|
||||||
|
Compares the union of unit-level ``source_section_ids`` (pre-AI) to
|
||||||
|
the union present on ``ai_repair_records`` post-apply. Per the AI
|
||||||
|
isolation contract + dropped 절대 룰
|
||||||
|
(``feedback_ai_isolation_contract``), AI repair never removes a
|
||||||
|
unit's section coverage. Any divergence indicates a regression that
|
||||||
|
u8 surfaces through ``slide_status.ai_repair_status``. The check is
|
||||||
|
structural (set membership); the per-record ``source_section_ids``
|
||||||
|
list is a copy populated by ``gather_step12_ai_repair_proposals``
|
||||||
|
(``step12.py:124``) so apply mutations cannot silently drop it.
|
||||||
|
"""
|
||||||
|
pre_ai_ids: set[str] = set()
|
||||||
|
for unit in units:
|
||||||
|
pre_ai_ids.update(getattr(unit, "source_section_ids", []) or [])
|
||||||
|
post_ai_ids: set[str] = set()
|
||||||
|
for record in ai_repair_records:
|
||||||
|
post_ai_ids.update(record.get("source_section_ids") or [])
|
||||||
|
dropped = sorted(pre_ai_ids - post_ai_ids)
|
||||||
|
return {
|
||||||
|
"pre_ai_section_ids": sorted(pre_ai_ids),
|
||||||
|
"post_ai_section_ids": sorted(post_ai_ids),
|
||||||
|
"dropped_section_ids": dropped,
|
||||||
|
"status": "ok" if not dropped else "violated",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _persist_ai_repair_proposals_to_cache(
|
||||||
|
ai_repair_records: list[dict],
|
||||||
|
*,
|
||||||
|
visual_check_passed: bool,
|
||||||
|
user_approved: bool,
|
||||||
|
auto_cache: bool,
|
||||||
|
) -> None:
|
||||||
|
"""IMP-47B u13 — Persist applied AI repair proposals through IMP-46 gates.
|
||||||
|
|
||||||
|
Mutates each record in place with a ``cache_save_status`` axis.
|
||||||
|
Only records whose ``apply_status`` starts with ``"applied:"`` and
|
||||||
|
that still carry the original ``cache_key`` + ``fingerprints`` + a
|
||||||
|
serialized ``proposal`` dict are eligible — everything else marked
|
||||||
|
``not_applied``. Eligible records go through
|
||||||
|
``cache.save_proposal`` with the IMP-46 dual-gate truth table; the
|
||||||
|
helper catches :class:`AiFallbackCacheGateError` so a gate block is
|
||||||
|
surfaced (``gate_blocked:<reason>``) without raising into the
|
||||||
|
pipeline runtime (the cache is a hint, never a hard dependency —
|
||||||
|
cache.py contract). ``visual_check_passed`` is never bypassable;
|
||||||
|
``auto_cache=True`` bypasses ONLY the ``user_approved`` gate per
|
||||||
|
IMP-46 u5. Pure save layer: no AI call, no MDX touch.
|
||||||
|
"""
|
||||||
|
from src.phase_z2_ai_fallback.cache import (
|
||||||
|
AiFallbackCacheGateError,
|
||||||
|
save_proposal,
|
||||||
|
)
|
||||||
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal
|
||||||
|
for record in ai_repair_records:
|
||||||
|
apply_status = record.get("apply_status") or ""
|
||||||
|
proposal_dict = record.get("proposal")
|
||||||
|
cache_key = record.get("cache_key")
|
||||||
|
fingerprints = record.get("fingerprints")
|
||||||
|
if (
|
||||||
|
not apply_status.startswith("applied:")
|
||||||
|
or not isinstance(proposal_dict, dict)
|
||||||
|
or not cache_key
|
||||||
|
or not isinstance(fingerprints, dict)
|
||||||
|
):
|
||||||
|
record["cache_save_status"] = "not_applied"
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
proposal_obj = AiFallbackProposal.model_validate(proposal_dict)
|
||||||
|
except Exception as exc: # noqa: BLE001 — invalid payload → skip, never raise
|
||||||
|
record["cache_save_status"] = f"invalid_proposal:{type(exc).__name__}"
|
||||||
|
continue
|
||||||
|
try:
|
||||||
|
save_proposal(
|
||||||
|
cache_key,
|
||||||
|
proposal_obj,
|
||||||
|
visual_check_passed=visual_check_passed,
|
||||||
|
user_approved=user_approved,
|
||||||
|
auto_cache=auto_cache,
|
||||||
|
fingerprints=fingerprints,
|
||||||
|
)
|
||||||
|
except AiFallbackCacheGateError as gate_exc:
|
||||||
|
record["cache_save_status"] = f"gate_blocked:{gate_exc}"
|
||||||
|
continue
|
||||||
|
record["cache_save_status"] = "saved"
|
||||||
|
|
||||||
|
|
||||||
|
def _summarize_ai_repair_status(
|
||||||
|
ai_repair_records: list[dict],
|
||||||
|
coverage_invariant: dict,
|
||||||
|
) -> dict:
|
||||||
|
"""IMP-47B u8 — Classify Step 12 AI repair outcomes for slide_status surfacing.
|
||||||
|
|
||||||
|
Reads u4 gather ``error`` + u5 ``apply_status`` + u7 coverage_invariant
|
||||||
|
to derive a single ``ai_repair_status`` axis attached to
|
||||||
|
``slide_status``. Failure-axis priority (highest → lowest):
|
||||||
|
``error`` > ``coverage_violated`` > ``unsupported_kind`` > ``applied`` > ``ok``.
|
||||||
|
``human_review_required`` flips True on the three failure axes so the
|
||||||
|
frontend (u11) can surface a notification per the IMP-47B policy
|
||||||
|
("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification").
|
||||||
|
Pure: no IO, no AI call.
|
||||||
|
"""
|
||||||
|
counts = {
|
||||||
|
"total": len(ai_repair_records),
|
||||||
|
"applied": 0,
|
||||||
|
"no_proposal": 0,
|
||||||
|
"no_zone_match": 0,
|
||||||
|
"unsupported_kind": 0,
|
||||||
|
"error": 0,
|
||||||
|
}
|
||||||
|
unsupported_records: list[dict] = []
|
||||||
|
error_records: list[dict] = []
|
||||||
|
for record in ai_repair_records:
|
||||||
|
if record.get("error"):
|
||||||
|
counts["error"] += 1
|
||||||
|
error_records.append({
|
||||||
|
"unit_index": record.get("unit_index"),
|
||||||
|
"source_section_ids": list(record.get("source_section_ids") or []),
|
||||||
|
"error": record.get("error"),
|
||||||
|
})
|
||||||
|
continue
|
||||||
|
apply_status = record.get("apply_status") or ""
|
||||||
|
if apply_status.startswith("applied:"):
|
||||||
|
counts["applied"] += 1
|
||||||
|
elif apply_status.startswith("unsupported_kind_for_reject_route:"):
|
||||||
|
counts["unsupported_kind"] += 1
|
||||||
|
unsupported_records.append({
|
||||||
|
"unit_index": record.get("unit_index"),
|
||||||
|
"source_section_ids": list(record.get("source_section_ids") or []),
|
||||||
|
"apply_status": apply_status,
|
||||||
|
})
|
||||||
|
elif apply_status == "no_zone_match":
|
||||||
|
counts["no_zone_match"] += 1
|
||||||
|
else:
|
||||||
|
counts["no_proposal"] += 1
|
||||||
|
coverage_status = (coverage_invariant or {}).get("status", "ok")
|
||||||
|
dropped = list((coverage_invariant or {}).get("dropped_section_ids") or [])
|
||||||
|
if counts["error"]:
|
||||||
|
status = "error"
|
||||||
|
elif coverage_status != "ok":
|
||||||
|
status = "coverage_violated"
|
||||||
|
elif counts["unsupported_kind"]:
|
||||||
|
status = "unsupported_kind"
|
||||||
|
elif counts["applied"]:
|
||||||
|
status = "applied"
|
||||||
|
else:
|
||||||
|
status = "ok"
|
||||||
|
return {
|
||||||
|
"status": status,
|
||||||
|
"counts": counts,
|
||||||
|
"unsupported_kind_records": unsupported_records,
|
||||||
|
"error_records": error_records,
|
||||||
|
"coverage_status": coverage_status,
|
||||||
|
"dropped_section_ids": dropped,
|
||||||
|
"human_review_required": status in {"error", "coverage_violated", "unsupported_kind"},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def lookup_v4_match_with_fallback(
|
def lookup_v4_match_with_fallback(
|
||||||
v4: dict,
|
v4: dict,
|
||||||
section_id: str,
|
section_id: str,
|
||||||
@@ -878,6 +1130,54 @@ def lookup_v4_candidates(
|
|||||||
return candidates
|
return candidates
|
||||||
|
|
||||||
|
|
||||||
|
def _apply_frame_override_to_unit(unit, new_tid: str, v4: dict) -> str:
|
||||||
|
"""IMP-47B u3 — apply a frame override to *unit* in place.
|
||||||
|
|
||||||
|
Returns a meta_source string for the override book-keeping. Three
|
||||||
|
probe layers, in order:
|
||||||
|
|
||||||
|
1. ``unit.v4_candidates`` (non-reject, max_n bounded). Copies
|
||||||
|
frame_id / frame_number / confidence / label from the matching
|
||||||
|
candidate so Step 9 metadata stays consistent. Returns
|
||||||
|
``"v4_candidates"``.
|
||||||
|
2. Full 32 V4 judgments (reject inclusive). When the override
|
||||||
|
target matches a reject judgment for the unit's primary section,
|
||||||
|
the unit is promoted to ``provisional=True`` with ``label="reject"``
|
||||||
|
so Step 12 (IMP-47B u4) admits the AI repair path. Returns
|
||||||
|
``"v4_reject_judgment_provisional"``.
|
||||||
|
3. Raw fall-through. Updates only ``frame_template_id``; returns
|
||||||
|
``"raw_template_id_only"``.
|
||||||
|
|
||||||
|
Frame visual / contract stay untouched per the AI isolation contract
|
||||||
|
(frame auto-swap forbidden — AI re-places content into the existing
|
||||||
|
frame only). The caller validates catalog contract presence before
|
||||||
|
invoking this helper.
|
||||||
|
"""
|
||||||
|
for cand in (unit.v4_candidates or []):
|
||||||
|
if getattr(cand, "template_id", None) == new_tid:
|
||||||
|
unit.frame_template_id = cand.template_id
|
||||||
|
unit.frame_id = cand.frame_id
|
||||||
|
unit.frame_number = cand.frame_number
|
||||||
|
unit.confidence = cand.confidence
|
||||||
|
unit.label = cand.label
|
||||||
|
return "v4_candidates"
|
||||||
|
primary_sid = (
|
||||||
|
unit.source_section_ids[0] if unit.source_section_ids else None
|
||||||
|
)
|
||||||
|
if primary_sid:
|
||||||
|
for j in lookup_v4_all_judgments(v4, primary_sid):
|
||||||
|
if j.template_id == new_tid and j.label == "reject":
|
||||||
|
unit.frame_template_id = j.template_id
|
||||||
|
unit.frame_id = j.frame_id
|
||||||
|
unit.frame_number = j.frame_number
|
||||||
|
unit.confidence = j.confidence
|
||||||
|
unit.label = "reject"
|
||||||
|
unit.provisional = True
|
||||||
|
return "v4_reject_judgment_provisional"
|
||||||
|
unit.frame_template_id = new_tid
|
||||||
|
return "raw_template_id_only"
|
||||||
|
|
||||||
|
|
||||||
# ─── Content weight + zone layout 계산 ─────────────────────────
|
# ─── Content weight + zone layout 계산 ─────────────────────────
|
||||||
# layout preset 선택은 phase_z2_composition.select_layout_preset (composition v0) 가 담당.
|
# layout preset 선택은 phase_z2_composition.select_layout_preset (composition v0) 가 담당.
|
||||||
# 본 모듈의 select_layout_preset 은 이전 단순 count-based 구현이었고 dead code 로 제거 (2026-04-29).
|
# 본 모듈의 select_layout_preset 은 이전 단순 count-based 구현이었고 dead code 로 제거 (2026-04-29).
|
||||||
@@ -3336,6 +3636,57 @@ def run_phase_z2_mvp1(
|
|||||||
),
|
),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# IMP-47B u12 — mixed direct+reject first-render admission.
|
||||||
|
# When initial plan_composition produces a viable layout but at least one
|
||||||
|
# section remains uncovered (typically chain_exhausted / reject), re-run
|
||||||
|
# with allow_provisional in the lookup + allow_provisional_fill=True so
|
||||||
|
# reject sections gain a provisional rank-1 V4Match and a last-resort
|
||||||
|
# provisional candidate fill. This admits the mixed direct+reject case
|
||||||
|
# to the AI repair path (IMP-47B u4/u5) on first render. Skipped under
|
||||||
|
# --override-section-assignments to preserve the operator's plan and
|
||||||
|
# mirror the IMP-30 u4 retry's section_assignment_plan gate. All-direct
|
||||||
|
# slides have no uncovered sections so this is a no-op. The all-reject
|
||||||
|
# case is still handled by the IMP-30 u4 retry block below (initial
|
||||||
|
# plan_composition returns units=[]).
|
||||||
|
if units and layout_preset is not None and not override_section_assignments:
|
||||||
|
_u12_covered_ids: set[str] = set()
|
||||||
|
for _u in units:
|
||||||
|
_u12_covered_ids.update(_u.source_section_ids)
|
||||||
|
_u12_uncovered_ids = [
|
||||||
|
s.section_id for s in sections if s.section_id not in _u12_covered_ids
|
||||||
|
]
|
||||||
|
if _u12_uncovered_ids:
|
||||||
|
def _lookup_fn_mixed_admission(sid: str) -> Optional[V4Match]:
|
||||||
|
match, trace = lookup_v4_match_with_fallback(
|
||||||
|
v4,
|
||||||
|
sid,
|
||||||
|
raw_content=section_content_by_id.get(sid),
|
||||||
|
alias_keys=section_alias_by_id.get(sid),
|
||||||
|
allow_provisional=True,
|
||||||
|
)
|
||||||
|
v4_fallback_traces[sid] = trace
|
||||||
|
return match
|
||||||
|
|
||||||
|
units_mixed, layout_preset_mixed, _comp_debug_mixed = plan_composition(
|
||||||
|
sections,
|
||||||
|
_lookup_fn_mixed_admission,
|
||||||
|
V4_LABEL_TO_PHASE_Z_STATUS,
|
||||||
|
MVP1_ALLOWED_STATUSES,
|
||||||
|
capacity_fit_fn=compute_capacity_fit,
|
||||||
|
v4_candidates_lookup_fn=candidates_lookup_fn,
|
||||||
|
allow_provisional_fill=True,
|
||||||
|
)
|
||||||
|
if units_mixed and layout_preset_mixed is not None:
|
||||||
|
units = units_mixed
|
||||||
|
layout_preset = layout_preset_mixed
|
||||||
|
comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values())
|
||||||
|
comp_debug["imp47b_u12_mixed_admission"] = {
|
||||||
|
"applied": True,
|
||||||
|
"uncovered_before": _u12_uncovered_ids,
|
||||||
|
"result_unit_count": len(units_mixed),
|
||||||
|
"result_layout_preset": layout_preset_mixed,
|
||||||
|
}
|
||||||
|
|
||||||
# ── Step 7-A axis : layout override ──
|
# ── Step 7-A axis : layout override ──
|
||||||
# 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경.
|
# 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경.
|
||||||
# 길이 mismatch (positions count vs unit count) 는 zone loop 의 fallback (zone_{i})
|
# 길이 mismatch (positions count vs unit count) 는 zone loop 의 fallback (zone_{i})
|
||||||
@@ -3684,7 +4035,10 @@ def run_phase_z2_mvp1(
|
|||||||
# {unit_id: template_id} 형식. unit_id 매칭 시 unit.frame_template_id 강제 변경.
|
# {unit_id: template_id} 형식. unit_id 매칭 시 unit.frame_template_id 강제 변경.
|
||||||
# v4_candidates 안에서 같은 template_id 를 가진 entry 를 찾으면 frame_id /
|
# v4_candidates 안에서 같은 template_id 를 가진 entry 를 찾으면 frame_id /
|
||||||
# frame_number / confidence / label 까지 그 entry 에서 가져와 갱신 — 그래야 step09
|
# frame_number / confidence / label 까지 그 entry 에서 가져와 갱신 — 그래야 step09
|
||||||
# artifact 의 메타가 일관됨.
|
# artifact 의 메타가 일관됨. IMP-47B u3 (2026-05-21) : v4_candidates miss 시
|
||||||
|
# 전 32 judgments 까지 probe — reject 라벨 frame 을 사용자가 선택한 경우
|
||||||
|
# unit 을 provisional=True 로 승격해 Step 12 AI 재구성 게이트를 통과시킴
|
||||||
|
# (frame 유지, 자동 frame swap 금지 — [[feedback_ai_isolation_contract]]).
|
||||||
# frame contract 가 catalog 에 등록 안 된 template_id 면 skip + warning —
|
# frame contract 가 catalog 에 등록 안 된 template_id 면 skip + warning —
|
||||||
# crash 방지 (V4 score 는 매겨지지만 catalog partial 은 없는 후보 존재).
|
# crash 방지 (V4 score 는 매겨지지만 catalog partial 은 없는 후보 존재).
|
||||||
frame_overrides_applied: list[dict] = []
|
frame_overrides_applied: list[dict] = []
|
||||||
@@ -3713,21 +4067,7 @@ def run_phase_z2_mvp1(
|
|||||||
file=sys.stderr,
|
file=sys.stderr,
|
||||||
)
|
)
|
||||||
continue
|
continue
|
||||||
match = None
|
meta_source = _apply_frame_override_to_unit(unit, new_tid, v4)
|
||||||
for cand in (unit.v4_candidates or []):
|
|
||||||
if getattr(cand, "template_id", None) == new_tid:
|
|
||||||
match = cand
|
|
||||||
break
|
|
||||||
if match is not None:
|
|
||||||
unit.frame_template_id = match.template_id
|
|
||||||
unit.frame_id = match.frame_id
|
|
||||||
unit.frame_number = match.frame_number
|
|
||||||
unit.confidence = match.confidence
|
|
||||||
unit.label = match.label
|
|
||||||
meta_source = "v4_candidates"
|
|
||||||
else:
|
|
||||||
unit.frame_template_id = new_tid
|
|
||||||
meta_source = "raw_template_id_only"
|
|
||||||
frame_overrides_applied.append({
|
frame_overrides_applied.append({
|
||||||
"unit_id": unit_id,
|
"unit_id": unit_id,
|
||||||
"from": old_tid,
|
"from": old_tid,
|
||||||
@@ -4329,6 +4669,58 @@ def run_phase_z2_mvp1(
|
|||||||
note="B4 PlacementPlan slot_assignments — render path 미연결. 실제 render slot 매핑은 mapper.py 의 builder.",
|
note="B4 PlacementPlan slot_assignments — render path 미연결. 실제 render slot 매핑은 mapper.py 의 builder.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ─── Step 12 IMP-47B u4 — AI repair proposal gather ───
|
||||||
|
# Wire gather_step12_ai_repair_proposals so reject / restructure
|
||||||
|
# provisional units reach the AI fallback router. Normal-path units
|
||||||
|
# (use_as_is / light_edit / non-provisional) skip via the catch-all
|
||||||
|
# route gate; flag-off runs short-circuit at the router. Stored locally
|
||||||
|
# for u5 (PARTIAL_OVERRIDES apply) + u6 (step12_ai_repair.json audit).
|
||||||
|
ai_repair_records = _run_step12_ai_repair(units)
|
||||||
|
|
||||||
|
# ─── Step 12 IMP-47B u5 — Apply PARTIAL_OVERRIDES proposals ───
|
||||||
|
# Mirror the per-unit position derivation from the render loop above
|
||||||
|
# (L3789-3796); apply merges slots into zone slot_payload, loud-fails
|
||||||
|
# unsupported kinds via apply_status marker.
|
||||||
|
unit_positions: list[str] = []
|
||||||
|
for _i, _unit in enumerate(units):
|
||||||
|
_pos = positions[_i] if _i < len(positions) else f"zone_{_i}"
|
||||||
|
_plan_record = render_record_by_unit_id.get(id(_unit))
|
||||||
|
if _plan_record is not None and _plan_record.get("position"):
|
||||||
|
_pos = _plan_record["position"]
|
||||||
|
unit_positions.append(_pos)
|
||||||
|
_apply_ai_repair_proposals_to_zones(ai_repair_records, unit_positions, zones_data)
|
||||||
|
|
||||||
|
# ─── Step 12 IMP-47B u7 — Post-AI source_section_ids coverage invariant ───
|
||||||
|
# Structural defense: AI repair must not silently drop a unit's
|
||||||
|
# source_section_ids. dropped 절대 룰 — text_block / table / image /
|
||||||
|
# details deletion forbidden. Result feeds u6 audit (below) and
|
||||||
|
# u8 slide_status.ai_repair_status surfacing.
|
||||||
|
ai_repair_coverage_invariant = _check_post_ai_coverage_invariant(
|
||||||
|
units, ai_repair_records,
|
||||||
|
)
|
||||||
|
|
||||||
|
# ─── Step 12 IMP-47B u6 — AI repair audit artifact ───
|
||||||
|
# Persist per-unit gather/apply outcomes (route_hint, skip_reason,
|
||||||
|
# apply_status, ai_called, proposal kind, cache_key, fingerprints)
|
||||||
|
# so reviewers can audit which units reached the AI fallback router
|
||||||
|
# and what happened. Flag-off default → every record has
|
||||||
|
# ai_called=False + apply_status='no_proposal'; flag-on +
|
||||||
|
# provisional reject/restructure → router_short_circuit (cache miss
|
||||||
|
# without client) or applied:partial_overrides (cache hit / live AI).
|
||||||
|
# u7 coverage_invariant rides alongside per_unit for reviewers.
|
||||||
|
_write_step_artifact(
|
||||||
|
run_dir, 12, "ai_repair",
|
||||||
|
data={
|
||||||
|
"per_unit": ai_repair_records,
|
||||||
|
"coverage_invariant": ai_repair_coverage_invariant,
|
||||||
|
},
|
||||||
|
step_status="done",
|
||||||
|
pipeline_path_connected=True,
|
||||||
|
inputs=["step10_frame_contract.json", "step02_normalized.json"],
|
||||||
|
outputs=["step12_ai_repair.json"],
|
||||||
|
note="IMP-47B u6 — Step 12 AI repair gather + apply records per unit (route, skip_reason, apply_status, proposal). u7 coverage_invariant = pre/post AI source_section_ids set comparison.",
|
||||||
|
)
|
||||||
|
|
||||||
# ─── Step 12: Slot Payload (actual values, mapper.py 결과) ───
|
# ─── Step 12: Slot Payload (actual values, mapper.py 결과) ───
|
||||||
_write_step_artifact(
|
_write_step_artifact(
|
||||||
run_dir, 12, "slot_payload",
|
run_dir, 12, "slot_payload",
|
||||||
@@ -4943,6 +5335,24 @@ def run_phase_z2_mvp1(
|
|||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# ─── IMP-47B u13: Persist validated AI repair proposals to cache ───
|
||||||
|
# Saves each applied PARTIAL_OVERRIDES proposal AFTER Step 14 visual
|
||||||
|
# check + per IMP-46 dual-gate. ``visual_check_passed`` reads the
|
||||||
|
# Selenium overflow result; ``auto_cache`` sourced from Settings
|
||||||
|
# (CLI --auto-cache wires settings.ai_fallback_auto_cache at parse
|
||||||
|
# time, src/phase_z2_pipeline.py:5631-5633). ``user_approved`` stays
|
||||||
|
# False — the pipeline has no UX approval gate; the auto_cache
|
||||||
|
# opt-in is the documented bypass per IMP-46 u5. Gate violations
|
||||||
|
# surface as ``cache_save_status='gate_blocked:<reason>'`` on the
|
||||||
|
# record (cache is a hint, never a hard dependency).
|
||||||
|
from src.config import settings as _ai_cache_settings
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
ai_repair_records,
|
||||||
|
visual_check_passed=bool(overflow.get("passed")),
|
||||||
|
user_approved=False,
|
||||||
|
auto_cache=bool(_ai_cache_settings.ai_fallback_auto_cache),
|
||||||
|
)
|
||||||
|
|
||||||
# 10. fit_classifier v0 (A1) — Selenium 결과 → spec §3 category 분류 layer.
|
# 10. fit_classifier v0 (A1) — Selenium 결과 → spec §3 category 분류 layer.
|
||||||
# *분류만*. action / router / rerender X. behavior 변경 0.
|
# *분류만*. action / router / rerender X. behavior 변경 0.
|
||||||
fit_classification = classify_visual_runtime_check(overflow, debug_zones)
|
fit_classification = classify_visual_runtime_check(overflow, debug_zones)
|
||||||
@@ -5126,6 +5536,16 @@ def run_phase_z2_mvp1(
|
|||||||
debug_zones=debug_zones,
|
debug_zones=debug_zones,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# IMP-47B u8 — Surface Step 12 AI repair outcomes through slide_status.
|
||||||
|
# Composes u4 gather errors + u5 apply_status + u7 coverage_invariant
|
||||||
|
# into a single ``ai_repair_status`` axis the frontend (u11) reads to
|
||||||
|
# render human_review notifications. Auto pipeline first
|
||||||
|
# ([[feedback_auto_pipeline_first]]) — no review_queue insertion;
|
||||||
|
# explicit status enum + human_review_required flag.
|
||||||
|
slide_status["ai_repair_status"] = _summarize_ai_repair_status(
|
||||||
|
ai_repair_records, ai_repair_coverage_invariant,
|
||||||
|
)
|
||||||
|
|
||||||
# ─── Step 20: Slide Status ───
|
# ─── Step 20: Slide Status ───
|
||||||
_write_step_artifact(
|
_write_step_artifact(
|
||||||
run_dir, 20, "slide_status",
|
run_dir, 20, "slide_status",
|
||||||
@@ -5147,6 +5567,11 @@ def run_phase_z2_mvp1(
|
|||||||
_aligned = slide_status.get("aligned_section_ids") or []
|
_aligned = slide_status.get("aligned_section_ids") or []
|
||||||
_covered = slide_status.get("covered_section_ids") or []
|
_covered = slide_status.get("covered_section_ids") or []
|
||||||
_filtered = slide_status.get("filtered_section_ids") or []
|
_filtered = slide_status.get("filtered_section_ids") or []
|
||||||
|
_ai_repair = slide_status.get("ai_repair_status") or {}
|
||||||
|
_ai_repair_label = (
|
||||||
|
f'{_ai_repair.get("status", "?")} '
|
||||||
|
f'(human_review_required={_ai_repair.get("human_review_required", False)})'
|
||||||
|
)
|
||||||
_write_step_html(
|
_write_step_html(
|
||||||
run_dir, 20, "final_status",
|
run_dir, 20, "final_status",
|
||||||
title="Final Slide Status",
|
title="Final Slide Status",
|
||||||
@@ -5161,6 +5586,7 @@ def run_phase_z2_mvp1(
|
|||||||
f'<tr><th>filtered_section_ids</th><td>{_filtered}</td></tr>'
|
f'<tr><th>filtered_section_ids</th><td>{_filtered}</td></tr>'
|
||||||
f'<tr><th>adapter_needed_count</th><td>{slide_status.get("adapter_needed_count", 0)}</td></tr>'
|
f'<tr><th>adapter_needed_count</th><td>{slide_status.get("adapter_needed_count", 0)}</td></tr>'
|
||||||
f'<tr><th>content_truncated_count</th><td>{slide_status.get("content_truncated_count", 0)}</td></tr>'
|
f'<tr><th>content_truncated_count</th><td>{slide_status.get("content_truncated_count", 0)}</td></tr>'
|
||||||
|
f'<tr><th>ai_repair_status</th><td>{_ai_repair_label}</td></tr>'
|
||||||
f'</table>'
|
f'</table>'
|
||||||
f'<h2>Visual Fail Reasons</h2>{_vfs_html}'
|
f'<h2>Visual Fail Reasons</h2>{_vfs_html}'
|
||||||
f'<h2>Note</h2><p>{slide_status.get("note", "")}</p>'
|
f'<h2>Note</h2><p>{slide_status.get("note", "")}</p>'
|
||||||
@@ -5331,8 +5757,29 @@ if __name__ == "__main__":
|
|||||||
"--override-section-assignment bottom=03-2,03-3"
|
"--override-section-assignment bottom=03-2,03-3"
|
||||||
),
|
),
|
||||||
)
|
)
|
||||||
|
# IMP-46 u5 — auto-cache opt-in. When set, ``cache.save_proposal``
|
||||||
|
# bypasses the ``user_approved`` gate only (``visual_check_passed``
|
||||||
|
# is never bypassable). Source of truth is
|
||||||
|
# ``settings.ai_fallback_auto_cache`` (src/config.py); this flag
|
||||||
|
# mutates the setting in-process so downstream callers read the
|
||||||
|
# same value through Settings rather than parsing args themselves.
|
||||||
|
parser.add_argument(
|
||||||
|
"--auto-cache",
|
||||||
|
dest="auto_cache",
|
||||||
|
action="store_true",
|
||||||
|
default=False,
|
||||||
|
help=(
|
||||||
|
"Allow cache.save_proposal to bypass the user_approved gate "
|
||||||
|
"(visual_check_passed remains mandatory). Sets "
|
||||||
|
"settings.ai_fallback_auto_cache=True for this run."
|
||||||
|
),
|
||||||
|
)
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.auto_cache:
|
||||||
|
from src.config import settings as _settings
|
||||||
|
_settings.ai_fallback_auto_cache = True
|
||||||
|
|
||||||
overrides_frames: dict[str, str] = {}
|
overrides_frames: dict[str, str] = {}
|
||||||
for ov in args.override_frames:
|
for ov in args.override_frames:
|
||||||
if "=" not in ov:
|
if "=" not in ov:
|
||||||
|
|||||||
@@ -36,6 +36,7 @@ _ALLOWED_TOP_LEVEL: frozenset[str] = frozenset(
|
|||||||
"ast",
|
"ast",
|
||||||
"dataclasses",
|
"dataclasses",
|
||||||
"enum",
|
"enum",
|
||||||
|
"hashlib",
|
||||||
"json",
|
"json",
|
||||||
"pathlib",
|
"pathlib",
|
||||||
"random",
|
"random",
|
||||||
|
|||||||
@@ -1,32 +1,67 @@
|
|||||||
"""IMP-33 u6 — AI fallback cache gate tests.
|
"""IMP-46 u2 — Persistent JSON cache backend tests.
|
||||||
|
|
||||||
Verifies the IMP-46 gate contract:
|
Scope (Stage 2 plan, u2):
|
||||||
* ``read_proposal`` is a stub (returns None until IMP-46).
|
|
||||||
* ``save_proposal`` enforces both gates before any write attempt.
|
* Replaced ``NotImplementedError`` marker with a real persistent backend
|
||||||
* Storage itself raises NotImplementedError (IMP-46 marker).
|
at ``data/frame_cache/{frame_id}/{signature_hash}.json``.
|
||||||
|
* Preserved IMP-33 u6 dual write gate: ``visual_check_passed`` AND
|
||||||
|
``user_approved`` BOTH required (loud :class:`AiFallbackCacheGateError`
|
||||||
|
before any filesystem touch).
|
||||||
|
* Round-trip every :class:`ProposalKind`; round-trip ``slide_css`` None
|
||||||
|
*and* set; missing or corrupt files miss silently.
|
||||||
|
* Fingerprint *comparison* is u3; here we only check that the field is
|
||||||
|
persisted.
|
||||||
|
|
||||||
|
All filesystem writes are scoped to ``tmp_path`` via
|
||||||
|
``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`, so the
|
||||||
|
production directory is never touched by these tests.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
|
|
||||||
|
from src.phase_z2_ai_fallback import cache as cache_mod
|
||||||
from src.phase_z2_ai_fallback.cache import (
|
from src.phase_z2_ai_fallback.cache import (
|
||||||
AiFallbackCacheGateError,
|
AiFallbackCacheGateError,
|
||||||
|
KEY_DELIMITER,
|
||||||
|
SCHEMA_VERSION,
|
||||||
read_proposal,
|
read_proposal,
|
||||||
save_proposal,
|
save_proposal,
|
||||||
)
|
)
|
||||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||||
|
|
||||||
|
|
||||||
def _proposal() -> AiFallbackProposal:
|
_FRAME_ID = "1171281190"
|
||||||
|
_SIG_HASH = "a" * 64 # SHA256-shaped placeholder; cache is shape-agnostic.
|
||||||
|
_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}"
|
||||||
|
|
||||||
|
|
||||||
|
def _proposal(
|
||||||
|
kind: ProposalKind = ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||||
|
payload: dict | None = None,
|
||||||
|
) -> AiFallbackProposal:
|
||||||
return AiFallbackProposal(
|
return AiFallbackProposal(
|
||||||
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
|
proposal_kind=kind,
|
||||||
payload={"item_parser": "bullet_v2"},
|
payload=payload if payload is not None else {"item_parser": "bullet_v2"},
|
||||||
rationale="u6-test",
|
rationale="u2-test",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_read_proposal_returns_none_for_any_key():
|
@pytest.fixture(autouse=True)
|
||||||
assert read_proposal("frame=foo|cardinality=3") is None
|
def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
|
||||||
|
"""Redirect the cache root to an isolated tmp directory for every test."""
|
||||||
|
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
|
||||||
|
yield tmp_path / "frame_cache"
|
||||||
|
|
||||||
|
|
||||||
|
# -- read_proposal --------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_for_missing_file():
|
||||||
|
assert read_proposal(_KEY) is None
|
||||||
|
|
||||||
|
|
||||||
def test_read_proposal_rejects_empty_key():
|
def test_read_proposal_rejects_empty_key():
|
||||||
@@ -34,10 +69,65 @@ def test_read_proposal_rejects_empty_key():
|
|||||||
read_proposal("")
|
read_proposal("")
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_rejects_non_string_key():
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
read_proposal(None) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_for_legacy_key_format():
|
||||||
|
"""Router back-compat: pre-u4 cache_key (no '::') misses silently."""
|
||||||
|
assert read_proposal("frame:1171281190:cardinality:many") is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_for_corrupt_json(_isolated_cache_root: pathlib.Path):
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text("{not valid json", encoding="utf-8")
|
||||||
|
assert read_proposal(_KEY) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_for_non_dict_root(_isolated_cache_root: pathlib.Path):
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text("[]", encoding="utf-8")
|
||||||
|
assert read_proposal(_KEY) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_when_payload_proposal_missing(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(json.dumps({"schema_version": 1}), encoding="utf-8")
|
||||||
|
assert read_proposal(_KEY) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_proposal_returns_none_for_forbidden_proposal_kind(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": 1,
|
||||||
|
"proposal": {"proposal_kind": "mdx_text", "payload": {}, "rationale": ""},
|
||||||
|
"slide_css": None,
|
||||||
|
"fingerprints": {},
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
assert read_proposal(_KEY) is None
|
||||||
|
|
||||||
|
|
||||||
|
# -- save_proposal: write gates -------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
def test_save_rejects_when_visual_check_failed():
|
def test_save_rejects_when_visual_check_failed():
|
||||||
with pytest.raises(AiFallbackCacheGateError) as exc:
|
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k", _proposal(), visual_check_passed=False, user_approved=True
|
_KEY, _proposal(), visual_check_passed=False, user_approved=True
|
||||||
)
|
)
|
||||||
assert "visual_check_passed" in str(exc.value)
|
assert "visual_check_passed" in str(exc.value)
|
||||||
|
|
||||||
@@ -45,7 +135,7 @@ def test_save_rejects_when_visual_check_failed():
|
|||||||
def test_save_rejects_when_user_not_approved():
|
def test_save_rejects_when_user_not_approved():
|
||||||
with pytest.raises(AiFallbackCacheGateError) as exc:
|
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k", _proposal(), visual_check_passed=True, user_approved=False
|
_KEY, _proposal(), visual_check_passed=True, user_approved=False
|
||||||
)
|
)
|
||||||
assert "user_approved" in str(exc.value)
|
assert "user_approved" in str(exc.value)
|
||||||
|
|
||||||
@@ -53,16 +143,20 @@ def test_save_rejects_when_user_not_approved():
|
|||||||
def test_save_rejects_when_both_gates_false():
|
def test_save_rejects_when_both_gates_false():
|
||||||
with pytest.raises(AiFallbackCacheGateError):
|
with pytest.raises(AiFallbackCacheGateError):
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k", _proposal(), visual_check_passed=False, user_approved=False
|
_KEY, _proposal(), visual_check_passed=False, user_approved=False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_save_raises_not_implemented_when_both_gates_pass():
|
def test_save_gate_violation_does_not_touch_filesystem(
|
||||||
with pytest.raises(NotImplementedError) as exc:
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
with pytest.raises(AiFallbackCacheGateError):
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k", _proposal(), visual_check_passed=True, user_approved=True
|
_KEY, _proposal(), visual_check_passed=False, user_approved=True
|
||||||
)
|
)
|
||||||
assert "IMP-46" in str(exc.value)
|
# Cache root may or may not exist depending on fixture order, but the
|
||||||
|
# frame_id directory must NOT exist when the gate rejects the write.
|
||||||
|
assert not (_isolated_cache_root / _FRAME_ID).exists()
|
||||||
|
|
||||||
|
|
||||||
def test_save_rejects_empty_key():
|
def test_save_rejects_empty_key():
|
||||||
@@ -75,16 +169,340 @@ def test_save_rejects_empty_key():
|
|||||||
def test_save_rejects_non_proposal_object():
|
def test_save_rejects_non_proposal_object():
|
||||||
with pytest.raises(TypeError):
|
with pytest.raises(TypeError):
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k",
|
_KEY,
|
||||||
{"proposal_kind": "builder_options_patch"}, # type: ignore[arg-type]
|
{"proposal_kind": "builder_options_patch"}, # type: ignore[arg-type]
|
||||||
visual_check_passed=True,
|
visual_check_passed=True,
|
||||||
user_approved=True,
|
user_approved=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def test_gate_error_is_not_notimplementederror():
|
def test_save_rejects_legacy_key_format():
|
||||||
with pytest.raises(AiFallbackCacheGateError):
|
"""Writes must use the structural ``frame_id::signature_hash`` form."""
|
||||||
|
with pytest.raises(ValueError):
|
||||||
save_proposal(
|
save_proposal(
|
||||||
"k", _proposal(), visual_check_passed=False, user_approved=True
|
"frame:1171281190:cardinality:many",
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_rejects_slide_css_non_string():
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
slide_css=123, # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_rejects_fingerprints_non_dict():
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=["contract_sha", "abc"], # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_gate_error_is_not_notimplementederror():
|
||||||
|
"""The persistent backend no longer raises ``NotImplementedError`` —
|
||||||
|
callers must distinguish gate violation from absent persistence."""
|
||||||
assert not issubclass(AiFallbackCacheGateError, NotImplementedError)
|
assert not issubclass(AiFallbackCacheGateError, NotImplementedError)
|
||||||
|
|
||||||
|
|
||||||
|
# -- save_proposal: persistence + round-trip ------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_creates_parent_directories(_isolated_cache_root: pathlib.Path):
|
||||||
|
assert not (_isolated_cache_root / _FRAME_ID).exists()
|
||||||
|
save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_returns_resolved_path(_isolated_cache_root: pathlib.Path):
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
assert path == _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_payload_includes_schema_version(_isolated_cache_root: pathlib.Path):
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert data["schema_version"] == SCHEMA_VERSION
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_payload_includes_proposal_dump(_isolated_cache_root: pathlib.Path):
|
||||||
|
proposal = _proposal(payload={"item_parser": "pillar_item"})
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY, proposal, visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert data["proposal"] == proposal.model_dump(mode="json")
|
||||||
|
|
||||||
|
|
||||||
|
def test_round_trip_default_slide_css_is_none(_isolated_cache_root: pathlib.Path):
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert data["slide_css"] is None
|
||||||
|
assert data["fingerprints"] == {}
|
||||||
|
|
||||||
|
|
||||||
|
def test_round_trip_with_slide_css_set(_isolated_cache_root: pathlib.Path):
|
||||||
|
css = ".slide { padding: 40px; }"
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
slide_css=css,
|
||||||
|
)
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert data["slide_css"] == css
|
||||||
|
|
||||||
|
|
||||||
|
def test_round_trip_with_fingerprints(_isolated_cache_root: pathlib.Path):
|
||||||
|
fingerprints = {
|
||||||
|
"contract_sha": "c" * 64,
|
||||||
|
"partial_sha": "p" * 64,
|
||||||
|
"catalog_sha": "x" * 64,
|
||||||
|
}
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=fingerprints,
|
||||||
|
)
|
||||||
|
data = json.loads(path.read_text(encoding="utf-8"))
|
||||||
|
assert data["fingerprints"] == fingerprints
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_returns_proposal_after_save(_isolated_cache_root: pathlib.Path):
|
||||||
|
original = _proposal(payload={"key": "value"})
|
||||||
|
save_proposal(
|
||||||
|
_KEY, original, visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.proposal_kind == original.proposal_kind
|
||||||
|
assert loaded.payload == original.payload
|
||||||
|
assert loaded.rationale == original.rationale
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("kind", list(ProposalKind))
|
||||||
|
def test_round_trip_all_proposal_kinds(
|
||||||
|
kind: ProposalKind, _isolated_cache_root: pathlib.Path
|
||||||
|
):
|
||||||
|
"""Every whitelisted ProposalKind survives save → read unchanged."""
|
||||||
|
if kind is ProposalKind.PARTIAL_OVERRIDES:
|
||||||
|
payload = {"slots": {"pillar_1": "alpha"}}
|
||||||
|
elif kind is ProposalKind.SLOT_MAPPING_PROPOSAL:
|
||||||
|
payload = {"mapping": [{"from": "a", "to": "b"}]}
|
||||||
|
else:
|
||||||
|
payload = {"item_parser": "bullet_v2"}
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(kind=kind, payload=payload),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.proposal_kind is kind
|
||||||
|
assert loaded.payload == payload
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_overwrites_existing_entry(_isolated_cache_root: pathlib.Path):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(payload={"v": 1}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(payload={"v": 2}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.payload == {"v": 2}
|
||||||
|
|
||||||
|
|
||||||
|
def test_file_layout_uses_frame_id_directory(_isolated_cache_root: pathlib.Path):
|
||||||
|
"""Storage layout = ``frame_id/`` directory, ``signature_hash.json`` file."""
|
||||||
|
other_frame_key = f"{_FRAME_ID}_other{KEY_DELIMITER}{_SIG_HASH}"
|
||||||
|
save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=True
|
||||||
|
)
|
||||||
|
save_proposal(
|
||||||
|
other_frame_key,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
assert (_isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json").is_file()
|
||||||
|
assert (
|
||||||
|
_isolated_cache_root / f"{_FRAME_ID}_other" / f"{_SIG_HASH}.json"
|
||||||
|
).is_file()
|
||||||
|
|
||||||
|
|
||||||
|
def test_different_signature_hashes_isolated(_isolated_cache_root: pathlib.Path):
|
||||||
|
"""Two distinct signature hashes under the same frame_id never collide."""
|
||||||
|
key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}"
|
||||||
|
key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}"
|
||||||
|
save_proposal(
|
||||||
|
key_a,
|
||||||
|
_proposal(payload={"sig": "a"}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
save_proposal(
|
||||||
|
key_b,
|
||||||
|
_proposal(payload={"sig": "b"}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
)
|
||||||
|
loaded_a = read_proposal(key_a)
|
||||||
|
loaded_b = read_proposal(key_b)
|
||||||
|
assert loaded_a is not None and loaded_a.payload == {"sig": "a"}
|
||||||
|
assert loaded_b is not None and loaded_b.payload == {"sig": "b"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_parse_key_rejects_triple_delimiter():
|
||||||
|
"""Two ``::`` markers (extra delimiter inside signature) is rejected."""
|
||||||
|
assert (
|
||||||
|
read_proposal(
|
||||||
|
f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}{KEY_DELIMITER}extra"
|
||||||
|
)
|
||||||
|
is None
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# -- IMP-46 u5: auto_cache gate (2^3 truth table) -------------------------
|
||||||
|
#
|
||||||
|
# Three booleans: visual_check_passed (V), user_approved (U), auto_cache (A).
|
||||||
|
# Contract: V=True AND (U=True OR A=True) -> persist; else gate-raise.
|
||||||
|
# V is never bypassable; A=True only relaxes U=False.
|
||||||
|
|
||||||
|
_GATE_TRUTH_TABLE = [
|
||||||
|
# (V, U, A, expect_persist)
|
||||||
|
(False, False, False, False),
|
||||||
|
(False, False, True, False),
|
||||||
|
(False, True, False, False),
|
||||||
|
(False, True, True, False),
|
||||||
|
(True, False, False, False),
|
||||||
|
(True, False, True, True),
|
||||||
|
(True, True, False, True),
|
||||||
|
(True, True, True, True),
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize("v,u,a,expect_persist", _GATE_TRUTH_TABLE)
|
||||||
|
def test_save_gate_truth_table(
|
||||||
|
v: bool,
|
||||||
|
u: bool,
|
||||||
|
a: bool,
|
||||||
|
expect_persist: bool,
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
) -> None:
|
||||||
|
"""IMP-46 u5 — exhaustive 2^3 enumeration of (V, U, A) -> {persist, raise}."""
|
||||||
|
if expect_persist:
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(payload={"v": int(v), "u": int(u), "a": int(a)}),
|
||||||
|
visual_check_passed=v,
|
||||||
|
user_approved=u,
|
||||||
|
auto_cache=a,
|
||||||
|
)
|
||||||
|
assert path.is_file(), f"truth row (V={v}, U={u}, A={a}) must persist"
|
||||||
|
else:
|
||||||
|
with pytest.raises(AiFallbackCacheGateError):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=v,
|
||||||
|
user_approved=u,
|
||||||
|
auto_cache=a,
|
||||||
|
)
|
||||||
|
# Gate violations must never touch the filesystem (parent dir absent).
|
||||||
|
assert not (_isolated_cache_root / _FRAME_ID).exists(), (
|
||||||
|
f"truth row (V={v}, U={u}, A={a}) leaked a directory"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_auto_cache_default_off_preserves_dual_gate_semantics(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
) -> None:
|
||||||
|
"""Calling save_proposal without ``auto_cache`` keeps the IMP-46 u2 behaviour."""
|
||||||
|
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||||
|
save_proposal(
|
||||||
|
_KEY, _proposal(), visual_check_passed=True, user_approved=False
|
||||||
|
)
|
||||||
|
assert "user_approved" in str(exc.value)
|
||||||
|
assert not (_isolated_cache_root / _FRAME_ID).exists()
|
||||||
|
|
||||||
|
|
||||||
|
def test_auto_cache_cannot_bypass_visual_check() -> None:
|
||||||
|
"""``visual_check_passed=False`` raises even with ``auto_cache=True``."""
|
||||||
|
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=False,
|
||||||
|
user_approved=True,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert "visual_check_passed" in str(exc.value)
|
||||||
|
|
||||||
|
|
||||||
|
def test_auto_cache_bypass_user_approved_persists(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
) -> None:
|
||||||
|
"""``auto_cache=True`` with ``user_approved=False`` persists the proposal."""
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(payload={"bypass": "user"}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=False,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert path.is_file()
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.payload == {"bypass": "user"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_auto_cache_rejects_non_bool() -> None:
|
||||||
|
"""``auto_cache`` must be a bool (loud TypeError, symmetric with other kwargs)."""
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
auto_cache="yes", # type: ignore[arg-type]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_auto_cache_is_keyword_only() -> None:
|
||||||
|
"""``auto_cache`` must be passed by keyword (positional rejected)."""
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
sig = inspect.signature(save_proposal)
|
||||||
|
param = sig.parameters["auto_cache"]
|
||||||
|
assert param.kind is inspect.Parameter.KEYWORD_ONLY
|
||||||
|
assert param.default is False
|
||||||
|
|||||||
347
tests/phase_z2_ai_fallback/test_cache_invalidation.py
Normal file
347
tests/phase_z2_ai_fallback/test_cache_invalidation.py
Normal file
@@ -0,0 +1,347 @@
|
|||||||
|
"""IMP-46 u3 — Fingerprint-based cache invalidation tests.
|
||||||
|
|
||||||
|
Scope (Stage 2 plan, u3):
|
||||||
|
|
||||||
|
* ``save_proposal`` persists ``fingerprints`` verbatim (u2 already covers
|
||||||
|
the round-trip; this suite re-asserts the read-side comparator).
|
||||||
|
* ``read_proposal`` accepts an optional ``fingerprints`` kwarg. When
|
||||||
|
supplied, the stored dict must equal the supplied dict EXACTLY (strict
|
||||||
|
equality). Mismatch — including missing keys, extra keys, or value
|
||||||
|
drift — returns ``None``.
|
||||||
|
* Default ``fingerprints=None`` performs no comparison (back-compat for
|
||||||
|
legacy callers).
|
||||||
|
* Fingerprint *computation* stays outside ``cache.py`` — these tests
|
||||||
|
treat the three declared shas (``contract_sha`` / ``partial_sha`` /
|
||||||
|
``catalog_sha``) as opaque hex strings, never recomputing them. The
|
||||||
|
cache layer is a content-addressed *comparator*, not a content
|
||||||
|
*hasher*.
|
||||||
|
|
||||||
|
All filesystem writes are scoped to ``tmp_path`` via
|
||||||
|
``monkeypatch.setattr`` on the module-level :data:`CACHE_ROOT`.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.phase_z2_ai_fallback import cache as cache_mod
|
||||||
|
from src.phase_z2_ai_fallback.cache import (
|
||||||
|
KEY_DELIMITER,
|
||||||
|
read_proposal,
|
||||||
|
save_proposal,
|
||||||
|
)
|
||||||
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||||
|
|
||||||
|
|
||||||
|
_FRAME_ID = "1171281190"
|
||||||
|
_SIG_HASH = "f" * 64
|
||||||
|
_KEY = f"{_FRAME_ID}{KEY_DELIMITER}{_SIG_HASH}"
|
||||||
|
|
||||||
|
_FINGERPRINTS_BASELINE: dict[str, str] = {
|
||||||
|
"contract_sha": "c" * 64,
|
||||||
|
"partial_sha": "p" * 64,
|
||||||
|
"catalog_sha": "x" * 64,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _proposal(payload: dict | None = None) -> AiFallbackProposal:
|
||||||
|
return AiFallbackProposal(
|
||||||
|
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||||
|
payload=payload if payload is not None else {"item_parser": "bullet_v2"},
|
||||||
|
rationale="u3-test",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _isolated_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
|
||||||
|
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
|
||||||
|
yield tmp_path / "frame_cache"
|
||||||
|
|
||||||
|
|
||||||
|
# -- save side: fingerprints persisted verbatim ---------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_save_persists_fingerprints_verbatim(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
path = save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
stored = json.loads(path.read_text(encoding="utf-8"))["fingerprints"]
|
||||||
|
assert stored == _FINGERPRINTS_BASELINE
|
||||||
|
|
||||||
|
|
||||||
|
# -- read side: back-compat (no fingerprints kwarg) -----------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_without_fingerprints_kwarg_returns_proposal(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Legacy read path (no kwarg) skips invalidation — round-trip succeeds."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.payload == {"item_parser": "bullet_v2"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_without_fingerprints_kwarg_ignores_stored_mismatch(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""A caller that has not adopted fingerprint-aware lookup must still
|
||||||
|
see the proposal — invalidation only kicks in when explicitly asked."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints={"contract_sha": "old"},
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY)
|
||||||
|
assert loaded is not None
|
||||||
|
|
||||||
|
|
||||||
|
# -- read side: matching fingerprints -------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_with_matching_fingerprints_returns_proposal(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY, fingerprints=dict(_FINGERPRINTS_BASELINE))
|
||||||
|
assert loaded is not None
|
||||||
|
assert loaded.proposal_kind is ProposalKind.BUILDER_OPTIONS_PATCH
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_with_empty_fingerprints_matches_empty_stored(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Both sides empty is an exact match, not a special-case None."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
# default fingerprints=None → stored as {}
|
||||||
|
)
|
||||||
|
loaded = read_proposal(_KEY, fingerprints={})
|
||||||
|
assert loaded is not None
|
||||||
|
|
||||||
|
|
||||||
|
# -- read side: invalidation on mismatch ----------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"drifted_axis",
|
||||||
|
["contract_sha", "partial_sha", "catalog_sha"],
|
||||||
|
)
|
||||||
|
def test_read_invalidates_on_single_axis_drift(
|
||||||
|
drifted_axis: str, _isolated_cache_root: pathlib.Path
|
||||||
|
):
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
supplied = dict(_FINGERPRINTS_BASELINE)
|
||||||
|
supplied[drifted_axis] = "deadbeef" * 8 # 64-char distinct value
|
||||||
|
assert read_proposal(_KEY, fingerprints=supplied) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_invalidates_when_caller_supplies_extra_key(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Strict equality — extra key on caller side is a mismatch."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
supplied = dict(_FINGERPRINTS_BASELINE)
|
||||||
|
supplied["future_axis_sha"] = "z" * 64
|
||||||
|
assert read_proposal(_KEY, fingerprints=supplied) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_invalidates_when_caller_supplies_subset(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Strict equality — subset on caller side is a mismatch."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=_FINGERPRINTS_BASELINE,
|
||||||
|
)
|
||||||
|
subset = {"contract_sha": _FINGERPRINTS_BASELINE["contract_sha"]}
|
||||||
|
assert read_proposal(_KEY, fingerprints=subset) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_invalidates_when_entry_saved_without_fingerprints(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""A pre-invalidation cache entry (empty stored fingerprints) MUST NOT
|
||||||
|
satisfy a fingerprint-aware lookup — caller demands proof of freshness."""
|
||||||
|
save_proposal(
|
||||||
|
_KEY,
|
||||||
|
_proposal(),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
# default fingerprints=None → stored as {}
|
||||||
|
)
|
||||||
|
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_invalidates_when_stored_fingerprints_not_dict(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Hand-corrupted payload (fingerprints serialized as non-dict) → None."""
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": 1,
|
||||||
|
"proposal": _proposal().model_dump(mode="json"),
|
||||||
|
"slide_css": None,
|
||||||
|
"fingerprints": ["contract_sha", "c" * 64],
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_invalidates_when_stored_fingerprints_field_missing(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Legacy payload (no ``fingerprints`` field at all) → None when caller
|
||||||
|
demands fingerprint comparison."""
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text(
|
||||||
|
json.dumps(
|
||||||
|
{
|
||||||
|
"schema_version": 1,
|
||||||
|
"proposal": _proposal().model_dump(mode="json"),
|
||||||
|
"slide_css": None,
|
||||||
|
# fingerprints field deliberately omitted
|
||||||
|
}
|
||||||
|
),
|
||||||
|
encoding="utf-8",
|
||||||
|
)
|
||||||
|
assert read_proposal(_KEY, fingerprints={"contract_sha": "c" * 64}) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_with_matching_fingerprints_still_loses_to_missing_file():
|
||||||
|
"""File missing takes precedence over fingerprint check — no false hit."""
|
||||||
|
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_with_matching_fingerprints_still_loses_to_corrupt_json(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
path = _isolated_cache_root / _FRAME_ID / f"{_SIG_HASH}.json"
|
||||||
|
path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
path.write_text("{not valid json", encoding="utf-8")
|
||||||
|
assert read_proposal(_KEY, fingerprints=_FINGERPRINTS_BASELINE) is None
|
||||||
|
|
||||||
|
|
||||||
|
# -- read side: input validation symmetry with save -----------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_rejects_non_dict_fingerprints():
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
read_proposal(_KEY, fingerprints=["contract_sha", "c" * 64]) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_rejects_non_dict_fingerprints_string():
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
read_proposal(_KEY, fingerprints="contract_sha=c" * 8) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_read_rejects_non_dict_fingerprints_int():
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
read_proposal(_KEY, fingerprints=42) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
# -- isolation: cache.py never computes fingerprints ----------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_module_has_no_fingerprint_computer():
|
||||||
|
"""Guardrail: cache.py is a *comparator*, not a *hasher*. The three
|
||||||
|
declared shas are computed outside this module (step 12 / pipeline
|
||||||
|
glue). Adding a fingerprint computer here would leak Phase Z runtime
|
||||||
|
knowledge into the cache layer and violate AI isolation."""
|
||||||
|
public_surface = [
|
||||||
|
name
|
||||||
|
for name in dir(cache_mod)
|
||||||
|
if not name.startswith("_") and callable(getattr(cache_mod, name))
|
||||||
|
]
|
||||||
|
forbidden_substrings = ("hash", "sha", "fingerprint")
|
||||||
|
leaks = [
|
||||||
|
name
|
||||||
|
for name in public_surface
|
||||||
|
if any(sub in name.lower() for sub in forbidden_substrings)
|
||||||
|
]
|
||||||
|
assert leaks == [], (
|
||||||
|
f"cache.py public surface leaks fingerprint computation: {leaks}; "
|
||||||
|
"computation must live outside cache.py per IMP-46 u3 contract."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# -- isolation across distinct fingerprint sets ---------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_distinct_fingerprint_sets_isolated_per_signature(
|
||||||
|
_isolated_cache_root: pathlib.Path,
|
||||||
|
):
|
||||||
|
"""Two entries under different signature hashes keep their own
|
||||||
|
fingerprints; reading one with the other's fingerprints misses."""
|
||||||
|
key_a = f"{_FRAME_ID}{KEY_DELIMITER}{'a' * 64}"
|
||||||
|
key_b = f"{_FRAME_ID}{KEY_DELIMITER}{'b' * 64}"
|
||||||
|
fps_a = {"contract_sha": "a" * 64}
|
||||||
|
fps_b = {"contract_sha": "b" * 64}
|
||||||
|
save_proposal(
|
||||||
|
key_a,
|
||||||
|
_proposal(payload={"sig": "a"}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=fps_a,
|
||||||
|
)
|
||||||
|
save_proposal(
|
||||||
|
key_b,
|
||||||
|
_proposal(payload={"sig": "b"}),
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
fingerprints=fps_b,
|
||||||
|
)
|
||||||
|
# Crossed lookups miss.
|
||||||
|
assert read_proposal(key_a, fingerprints=fps_b) is None
|
||||||
|
assert read_proposal(key_b, fingerprints=fps_a) is None
|
||||||
|
# Aligned lookups hit.
|
||||||
|
a_hit = read_proposal(key_a, fingerprints=fps_a)
|
||||||
|
b_hit = read_proposal(key_b, fingerprints=fps_b)
|
||||||
|
assert a_hit is not None and a_hit.payload == {"sig": "a"}
|
||||||
|
assert b_hit is not None and b_hit.payload == {"sig": "b"}
|
||||||
93
tests/phase_z2_ai_fallback/test_cache_repo_layout.py
Normal file
93
tests/phase_z2_ai_fallback/test_cache_repo_layout.py
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
"""IMP-46 u6 — repository layout coverage for the persistent frame cache.
|
||||||
|
|
||||||
|
This module is a *layout* contract test, not a runtime test. It asserts the
|
||||||
|
files committed to source control that make ``data/frame_cache/`` exist on a
|
||||||
|
fresh checkout while keeping cached JSON payloads ignored by git:
|
||||||
|
|
||||||
|
* ``data/frame_cache/.gitkeep`` is tracked (so the cache root exists for a
|
||||||
|
fresh clone before any AI fallback run materialises payloads).
|
||||||
|
* ``.gitignore`` ignores ``data/*`` broadly, re-includes the
|
||||||
|
``data/frame_cache/`` directory, ignores its contents, and re-includes
|
||||||
|
``data/frame_cache/.gitkeep`` so cache payloads under
|
||||||
|
``data/frame_cache/{frame_id}/{signature_hash}.json`` remain ignored.
|
||||||
|
|
||||||
|
If somebody removes the ``.gitkeep`` marker, drops the negation lines from
|
||||||
|
``.gitignore``, or commits a real cache payload, this test fails. The cache
|
||||||
|
module surface (cache.py) is exercised by ``test_cache.py`` /
|
||||||
|
``test_cache_invalidation.py`` and is intentionally *not* re-asserted here —
|
||||||
|
this file is the layout-only lock that Stage 2 u6 declared.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
REPO_ROOT = Path(__file__).resolve().parents[2]
|
||||||
|
GITIGNORE_PATH = REPO_ROOT / ".gitignore"
|
||||||
|
CACHE_ROOT = REPO_ROOT / "data" / "frame_cache"
|
||||||
|
GITKEEP_PATH = CACHE_ROOT / ".gitkeep"
|
||||||
|
|
||||||
|
|
||||||
|
def _gitignore_lines() -> list[str]:
|
||||||
|
assert GITIGNORE_PATH.is_file(), f".gitignore missing at {GITIGNORE_PATH}"
|
||||||
|
text = GITIGNORE_PATH.read_text(encoding="utf-8")
|
||||||
|
return [line.strip() for line in text.splitlines()]
|
||||||
|
|
||||||
|
|
||||||
|
def test_frame_cache_root_directory_exists() -> None:
|
||||||
|
"""``data/frame_cache/`` must exist on disk as the cache root."""
|
||||||
|
assert CACHE_ROOT.is_dir(), (
|
||||||
|
f"frame cache root missing: {CACHE_ROOT}. The directory must exist "
|
||||||
|
"for save_proposal to write JSON payloads without first conjuring a "
|
||||||
|
"parent on demand from outside the cache module."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_gitkeep_marker_is_tracked_file() -> None:
|
||||||
|
"""``data/frame_cache/.gitkeep`` is the marker that keeps the dir tracked."""
|
||||||
|
assert GITKEEP_PATH.is_file(), (
|
||||||
|
f".gitkeep marker missing: {GITKEEP_PATH}. Without it the cache root "
|
||||||
|
"would disappear on a fresh clone (everything under data/ is "
|
||||||
|
"ignored by default)."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"rule",
|
||||||
|
[
|
||||||
|
# Broad ignore for everything under data/ (cache payloads, runs/, etc.).
|
||||||
|
"data/*",
|
||||||
|
# Re-include the frame_cache directory itself so child negations work.
|
||||||
|
"!data/frame_cache/",
|
||||||
|
# Ignore everything inside frame_cache/ (cached JSON payloads).
|
||||||
|
"data/frame_cache/*",
|
||||||
|
# Re-include the .gitkeep marker only.
|
||||||
|
"!data/frame_cache/.gitkeep",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_gitignore_contains_frame_cache_exception(rule: str) -> None:
|
||||||
|
"""The four ignore rules together pin the 'track marker only' contract."""
|
||||||
|
lines = _gitignore_lines()
|
||||||
|
assert rule in lines, (
|
||||||
|
f".gitignore missing IMP-46 u6 rule: {rule!r}. The four-line block "
|
||||||
|
"(data/*, !data/frame_cache/, data/frame_cache/*, "
|
||||||
|
"!data/frame_cache/.gitkeep) together ensure the cache root is "
|
||||||
|
"tracked while cached payloads remain ignored."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_gitignore_rule_order_keeps_payloads_ignored() -> None:
|
||||||
|
"""Rule order matters: the ``data/frame_cache/*`` re-ignore must come
|
||||||
|
AFTER the ``!data/frame_cache/`` directory re-include, otherwise the
|
||||||
|
re-include would shadow it and cached JSON payloads would be tracked."""
|
||||||
|
lines = _gitignore_lines()
|
||||||
|
reinclude_dir = lines.index("!data/frame_cache/")
|
||||||
|
reignore_contents = lines.index("data/frame_cache/*")
|
||||||
|
reinclude_marker = lines.index("!data/frame_cache/.gitkeep")
|
||||||
|
assert reinclude_dir < reignore_contents < reinclude_marker, (
|
||||||
|
"gitignore IMP-46 u6 block out of order: expected "
|
||||||
|
"'!data/frame_cache/' < 'data/frame_cache/*' < "
|
||||||
|
"'!data/frame_cache/.gitkeep' so cached payloads stay ignored while "
|
||||||
|
"only the marker is tracked."
|
||||||
|
)
|
||||||
184
tests/phase_z2_ai_fallback/test_signature.py
Normal file
184
tests/phase_z2_ai_fallback/test_signature.py
Normal file
@@ -0,0 +1,184 @@
|
|||||||
|
"""IMP-46 u1 — Frame cache signature builder tests.
|
||||||
|
|
||||||
|
Verifies:
|
||||||
|
* Determinism — identical inputs yield the same SHA256 digest.
|
||||||
|
* Axis-change sensitivity — every one of the 8 declared axes mutates the
|
||||||
|
digest when changed in isolation.
|
||||||
|
* Public surface — only the 8 declared axes are accepted (no
|
||||||
|
sample/section identifier leakage).
|
||||||
|
* char_count bucket boundaries (0-50, 51-150, 151-400, 401-1000, 1001+).
|
||||||
|
* source_shape enum equivalence (string and SourceShape inputs match).
|
||||||
|
* schema_version is part of the hashed payload (digest stable for fixture).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import inspect
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.phase_z2_ai_fallback.signature import (
|
||||||
|
CHAR_COUNT_BUCKET_LABELS,
|
||||||
|
SCHEMA_VERSION,
|
||||||
|
SourceShape,
|
||||||
|
bucket_char_count,
|
||||||
|
build_signature,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def _base_kwargs() -> dict:
|
||||||
|
return dict(
|
||||||
|
frame_id="frame_03",
|
||||||
|
v4_label="light_edit",
|
||||||
|
cardinality=3,
|
||||||
|
source_shape=SourceShape.BULLET,
|
||||||
|
h3_count=2,
|
||||||
|
char_count_bucket="51-150",
|
||||||
|
layout_preset="sidebar-right",
|
||||||
|
zone_position="top",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_schema_version_is_one() -> None:
|
||||||
|
assert SCHEMA_VERSION == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_bucket_labels_match_spec() -> None:
|
||||||
|
assert CHAR_COUNT_BUCKET_LABELS == (
|
||||||
|
"0-50",
|
||||||
|
"51-150",
|
||||||
|
"151-400",
|
||||||
|
"401-1000",
|
||||||
|
"1001+",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_is_deterministic() -> None:
|
||||||
|
a = build_signature(**_base_kwargs())
|
||||||
|
b = build_signature(**_base_kwargs())
|
||||||
|
assert a == b
|
||||||
|
assert len(a) == 64
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize(
|
||||||
|
"axis, new_value",
|
||||||
|
[
|
||||||
|
("frame_id", "frame_04"),
|
||||||
|
("v4_label", "restructure"),
|
||||||
|
("cardinality", 5),
|
||||||
|
("source_shape", SourceShape.PARAGRAPH),
|
||||||
|
("h3_count", 3),
|
||||||
|
("char_count_bucket", "151-400"),
|
||||||
|
("layout_preset", "two-column"),
|
||||||
|
("zone_position", "bottom_l"),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
def test_signature_changes_for_each_axis(axis: str, new_value: object) -> None:
|
||||||
|
base = build_signature(**_base_kwargs())
|
||||||
|
kwargs = _base_kwargs()
|
||||||
|
kwargs[axis] = new_value
|
||||||
|
assert build_signature(**kwargs) != base
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_accepts_string_source_shape() -> None:
|
||||||
|
enum_sig = build_signature(**_base_kwargs())
|
||||||
|
kwargs = _base_kwargs()
|
||||||
|
kwargs["source_shape"] = "bullet"
|
||||||
|
assert build_signature(**kwargs) == enum_sig
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_rejects_unknown_source_shape() -> None:
|
||||||
|
kwargs = _base_kwargs()
|
||||||
|
kwargs["source_shape"] = "nonsense"
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
build_signature(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_rejects_unknown_char_count_bucket() -> None:
|
||||||
|
kwargs = _base_kwargs()
|
||||||
|
kwargs["char_count_bucket"] = "999-1234"
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
build_signature(**kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_handles_none_cardinality() -> None:
|
||||||
|
kwargs = _base_kwargs()
|
||||||
|
kwargs["cardinality"] = None
|
||||||
|
sig = build_signature(**kwargs)
|
||||||
|
assert len(sig) == 64
|
||||||
|
kwargs2 = _base_kwargs()
|
||||||
|
kwargs2["cardinality"] = 0
|
||||||
|
assert build_signature(**kwargs2) != sig
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_surface_only_8_declared_axes() -> None:
|
||||||
|
params = set(inspect.signature(build_signature).parameters)
|
||||||
|
expected = {
|
||||||
|
"frame_id",
|
||||||
|
"v4_label",
|
||||||
|
"cardinality",
|
||||||
|
"source_shape",
|
||||||
|
"h3_count",
|
||||||
|
"char_count_bucket",
|
||||||
|
"layout_preset",
|
||||||
|
"zone_position",
|
||||||
|
}
|
||||||
|
assert params == expected
|
||||||
|
|
||||||
|
|
||||||
|
def test_bucket_boundaries() -> None:
|
||||||
|
assert bucket_char_count(0) == "0-50"
|
||||||
|
assert bucket_char_count(50) == "0-50"
|
||||||
|
assert bucket_char_count(51) == "51-150"
|
||||||
|
assert bucket_char_count(150) == "51-150"
|
||||||
|
assert bucket_char_count(151) == "151-400"
|
||||||
|
assert bucket_char_count(400) == "151-400"
|
||||||
|
assert bucket_char_count(401) == "401-1000"
|
||||||
|
assert bucket_char_count(1000) == "401-1000"
|
||||||
|
assert bucket_char_count(1001) == "1001+"
|
||||||
|
assert bucket_char_count(10_000) == "1001+"
|
||||||
|
|
||||||
|
|
||||||
|
def test_bucket_rejects_negative() -> None:
|
||||||
|
with pytest.raises(ValueError):
|
||||||
|
bucket_char_count(-1)
|
||||||
|
|
||||||
|
|
||||||
|
def test_bucket_rejects_non_int() -> None:
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
bucket_char_count(3.14) # type: ignore[arg-type]
|
||||||
|
with pytest.raises(TypeError):
|
||||||
|
bucket_char_count(True) # type: ignore[arg-type]
|
||||||
|
|
||||||
|
|
||||||
|
def test_signature_stable_known_fixture() -> None:
|
||||||
|
"""Lock the digest for a known fixture so a silent payload-shape change
|
||||||
|
(e.g. a new axis sneaks in, or schema_version drifts) breaks this test.
|
||||||
|
"""
|
||||||
|
sig = build_signature(
|
||||||
|
frame_id="frame_03",
|
||||||
|
v4_label="light_edit",
|
||||||
|
cardinality=3,
|
||||||
|
source_shape=SourceShape.BULLET,
|
||||||
|
h3_count=2,
|
||||||
|
char_count_bucket="51-150",
|
||||||
|
layout_preset="sidebar-right",
|
||||||
|
zone_position="top",
|
||||||
|
)
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
|
|
||||||
|
expected_payload = {
|
||||||
|
"schema_version": 1,
|
||||||
|
"frame_id": "frame_03",
|
||||||
|
"v4_label": "light_edit",
|
||||||
|
"cardinality": 3,
|
||||||
|
"source_shape": "bullet",
|
||||||
|
"h3_count": 2,
|
||||||
|
"char_count_bucket": "51-150",
|
||||||
|
"layout_preset": "sidebar-right",
|
||||||
|
"zone_position": "top",
|
||||||
|
}
|
||||||
|
expected = hashlib.sha256(
|
||||||
|
json.dumps(expected_payload, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
assert sig == expected
|
||||||
@@ -1,12 +1,18 @@
|
|||||||
"""IMP-33 u8 — Step 12 AI repair wiring tests.
|
"""IMP-33 u8 + IMP-46 u4 + IMP-47B u2 — Step 12 AI repair wiring tests.
|
||||||
|
|
||||||
Covers the two structural gates layered on top of the u7 router:
|
Covers the structural gates layered on top of the u7 router:
|
||||||
* IMP-30 provisional gate (only provisional units may invoke AI repair)
|
* IMP-30 provisional gate (only provisional units may invoke AI repair)
|
||||||
* Reject gate (route_hint=design_reference_only NEVER calls AI)
|
* Catch-all ``route_not_ai_adaptation:<hint>`` skip — every route_hint
|
||||||
Plus the record-shape contract returned for downstream Step 12 artifacts.
|
other than ``ai_adaptation_required`` (including the legacy
|
||||||
|
``design_reference_only`` hint) falls through to a single uniform skip
|
||||||
|
after the IMP-47B u2 removal of the bespoke reject gate.
|
||||||
|
Plus the record-shape contract returned for downstream Step 12 artifacts
|
||||||
|
and the IMP-46 u4 structural cache key + fingerprints contract.
|
||||||
"""
|
"""
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import hashlib
|
||||||
|
import json
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass, field
|
||||||
from typing import Any
|
from typing import Any
|
||||||
from unittest.mock import MagicMock
|
from unittest.mock import MagicMock
|
||||||
@@ -24,6 +30,12 @@ class FakeUnit:
|
|||||||
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
|
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
|
||||||
raw_content: str = "raw"
|
raw_content: str = "raw"
|
||||||
v4_rank: int | None = 1
|
v4_rank: int | None = 1
|
||||||
|
cardinality: int | None = None
|
||||||
|
layout_preset: str = ""
|
||||||
|
zone_position: str = ""
|
||||||
|
source_shape: str = "paragraph"
|
||||||
|
h3_count: int = 0
|
||||||
|
char_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
_ROUTE_HINTS: dict[str | None, str | None] = {
|
_ROUTE_HINTS: dict[str | None, str | None] = {
|
||||||
@@ -64,6 +76,25 @@ def _call(
|
|||||||
return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs)
|
return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs)
|
||||||
|
|
||||||
|
|
||||||
|
def _ai_unit(**overrides: Any) -> FakeUnit:
|
||||||
|
"""Construct an AI-eligible FakeUnit (provisional + restructure) with sane defaults."""
|
||||||
|
base: dict[str, Any] = dict(
|
||||||
|
label="restructure",
|
||||||
|
provisional=True,
|
||||||
|
frame_template_id="tmpl_x",
|
||||||
|
frame_id="fid_123",
|
||||||
|
source_section_ids=["02-1"],
|
||||||
|
layout_preset="single_column",
|
||||||
|
zone_position="zone_a",
|
||||||
|
source_shape="bullet",
|
||||||
|
h3_count=3,
|
||||||
|
char_count=200,
|
||||||
|
cardinality=5,
|
||||||
|
)
|
||||||
|
base.update(overrides)
|
||||||
|
return FakeUnit(**base)
|
||||||
|
|
||||||
|
|
||||||
def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
|
def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
|
||||||
router = MagicMock()
|
router = MagicMock()
|
||||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
@@ -75,13 +106,20 @@ def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
|
|||||||
router.assert_not_called()
|
router.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
def test_reject_route_is_skipped_without_ai_call(monkeypatch):
|
def test_design_reference_route_falls_through_to_route_not_ai_adaptation(monkeypatch):
|
||||||
|
"""IMP-47B u2 — the bespoke 'design_reference_only_no_ai' skip is gone.
|
||||||
|
|
||||||
|
Any non-AI-adaptation route_hint (including the legacy
|
||||||
|
``design_reference_only`` hint exercised here via the local test mapping
|
||||||
|
of ``reject``) now flows into the single ``route_not_ai_adaptation:<hint>``
|
||||||
|
catch-all. Production reject routing is exercised by u9.
|
||||||
|
"""
|
||||||
router = MagicMock()
|
router = MagicMock()
|
||||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
units = [FakeUnit(label="reject", provisional=True)]
|
units = [FakeUnit(label="reject", provisional=True)]
|
||||||
records = _call(units)
|
records = _call(units)
|
||||||
assert records[0]["ai_called"] is False
|
assert records[0]["ai_called"] is False
|
||||||
assert records[0]["skip_reason"] == "design_reference_only_no_ai"
|
assert records[0]["skip_reason"] == "route_not_ai_adaptation:design_reference_only"
|
||||||
assert records[0]["route_hint"] == "design_reference_only"
|
assert records[0]["route_hint"] == "design_reference_only"
|
||||||
router.assert_not_called()
|
router.assert_not_called()
|
||||||
|
|
||||||
@@ -153,29 +191,206 @@ def test_mixed_units_each_independently_classified(monkeypatch):
|
|||||||
records = _call(units)
|
records = _call(units)
|
||||||
assert [r["skip_reason"] for r in records] == [
|
assert [r["skip_reason"] for r in records] == [
|
||||||
"not_provisional",
|
"not_provisional",
|
||||||
"design_reference_only_no_ai",
|
"route_not_ai_adaptation:design_reference_only",
|
||||||
"router_short_circuit",
|
"router_short_circuit",
|
||||||
"not_provisional",
|
"not_provisional",
|
||||||
]
|
]
|
||||||
assert router.call_count == 1
|
assert router.call_count == 1
|
||||||
|
|
||||||
|
|
||||||
def test_cache_key_includes_template_and_section_ids(monkeypatch):
|
# ---------------------------------------------------------------------------
|
||||||
|
# IMP-46 u4 — structural cache key + fingerprints
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_key_format_is_frame_id_plus_sha256(monkeypatch):
|
||||||
|
"""cache_key is '{frame_id}::{64-hex-sha256}', NOT template_id + section_ids."""
|
||||||
router = MagicMock(return_value=None)
|
router = MagicMock(return_value=None)
|
||||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit()])
|
||||||
|
cache_key = router.call_args.kwargs["cache_key"]
|
||||||
|
assert "::" in cache_key
|
||||||
|
frame_part, _, signature_part = cache_key.partition("::")
|
||||||
|
assert frame_part == "fid_123"
|
||||||
|
assert len(signature_part) == 64
|
||||||
|
assert all(c in "0123456789abcdef" for c in signature_part)
|
||||||
|
# The legacy "template_id::sorted(section_ids)" form is gone.
|
||||||
|
assert "tmpl_x" not in cache_key
|
||||||
|
assert "02-1" not in cache_key
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_key_invariant_to_section_id_changes(monkeypatch):
|
||||||
|
"""Same structural axes → same cache_key regardless of source_section_ids."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit(source_section_ids=["02-1"])])
|
||||||
|
key_a = router.call_args.kwargs["cache_key"]
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(source_section_ids=["05-2", "07-3"])])
|
||||||
|
key_b = router.call_args.kwargs["cache_key"]
|
||||||
|
assert key_a == key_b
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_key_invariant_to_template_id_changes(monkeypatch):
|
||||||
|
"""frame_template_id is NOT part of the structural signature (frame_id is)."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit(frame_template_id="tmpl_x")])
|
||||||
|
key_a = router.call_args.kwargs["cache_key"]
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(frame_template_id="tmpl_OTHER")])
|
||||||
|
key_b = router.call_args.kwargs["cache_key"]
|
||||||
|
assert key_a == key_b
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_key_changes_when_any_signature_axis_changes(monkeypatch):
|
||||||
|
"""Flipping any of the 7 unit-derived signature axes mutates cache_key."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit()])
|
||||||
|
base_key = router.call_args.kwargs["cache_key"]
|
||||||
|
perturbations: dict[str, Any] = {
|
||||||
|
"frame_id": "fid_OTHER",
|
||||||
|
"label": "use_as_is", # v4_label axis change; still routed to AI via _ROUTE_HINTS? No.
|
||||||
|
# ↑ "use_as_is" → "direct_render" → would skip. Use another ai-adaptation-mapped label.
|
||||||
|
# Replace with frame_id-only diff to keep route stable. Drop this entry below.
|
||||||
|
}
|
||||||
|
# Rebuild perturbations restricted to axes that don't change routing.
|
||||||
|
perturbations = {
|
||||||
|
"frame_id": "fid_OTHER",
|
||||||
|
"layout_preset": "two_column",
|
||||||
|
"zone_position": "zone_b",
|
||||||
|
"source_shape": "paragraph",
|
||||||
|
"h3_count": 7,
|
||||||
|
"char_count": 500, # bucket boundary crossing (151-400 → 401-1000)
|
||||||
|
"cardinality": 4,
|
||||||
|
}
|
||||||
|
for axis, value in perturbations.items():
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(**{axis: value})])
|
||||||
|
new_key = router.call_args.kwargs["cache_key"]
|
||||||
|
assert new_key != base_key, f"signature axis {axis!r} did not mutate cache_key"
|
||||||
|
|
||||||
|
|
||||||
|
def test_char_count_bucket_collapses_within_bucket(monkeypatch):
|
||||||
|
"""Different char_counts in the SAME bucket → identical cache_key."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit(char_count=160)])
|
||||||
|
key_low = router.call_args.kwargs["cache_key"]
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(char_count=399)])
|
||||||
|
key_high = router.call_args.kwargs["cache_key"]
|
||||||
|
assert key_low == key_high # both fall in "151-400"
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(char_count=401)])
|
||||||
|
key_overflow = router.call_args.kwargs["cache_key"]
|
||||||
|
assert key_overflow != key_low # crossed into "401-1000"
|
||||||
|
|
||||||
|
|
||||||
|
def test_fingerprints_attached_to_ai_record(monkeypatch):
|
||||||
|
"""AI-called records expose contract_sha + partial_sha + catalog_sha."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
contract = {"frame_id": "fid", "payload": {"x": 1}, "sub_zones": []}
|
||||||
|
partial = {"some": "partial", "deeper": [1, 2, 3]}
|
||||||
|
catalog_value = "deadbeef" * 8
|
||||||
|
recs = _call(
|
||||||
|
[_ai_unit()],
|
||||||
|
get_contract_fn=lambda _t: contract,
|
||||||
|
figma_partial_loader=lambda _t: partial,
|
||||||
|
catalog_sha_loader=lambda: catalog_value,
|
||||||
|
)
|
||||||
|
fps = recs[0]["fingerprints"]
|
||||||
|
assert isinstance(fps, dict)
|
||||||
|
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
|
||||||
|
assert all(isinstance(v, str) for v in fps.values())
|
||||||
|
assert fps["catalog_sha"] == catalog_value
|
||||||
|
# contract_sha and partial_sha must be deterministic SHA256 over JSON-sorted payloads.
|
||||||
|
expected_contract = hashlib.sha256(
|
||||||
|
json.dumps(contract, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
expected_partial = hashlib.sha256(
|
||||||
|
json.dumps(partial, sort_keys=True, ensure_ascii=False).encode("utf-8")
|
||||||
|
).hexdigest()
|
||||||
|
assert fps["contract_sha"] == expected_contract
|
||||||
|
assert fps["partial_sha"] == expected_partial
|
||||||
|
|
||||||
|
|
||||||
|
def test_fingerprints_default_catalog_sha_is_empty_string(monkeypatch):
|
||||||
|
"""No catalog_sha_loader → catalog_sha defaults to '' (sentinel, not missing key)."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
recs = _call([_ai_unit()])
|
||||||
|
fps = recs[0]["fingerprints"]
|
||||||
|
assert fps["catalog_sha"] == ""
|
||||||
|
# contract_sha + partial_sha keys still present (always 3 keys).
|
||||||
|
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
|
||||||
|
|
||||||
|
|
||||||
|
def test_fingerprints_change_when_contract_changes(monkeypatch):
|
||||||
|
"""Different frame_contract → different contract_sha, partial_sha unchanged."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
fps_a = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 1})[0]["fingerprints"]
|
||||||
|
fps_b = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 2})[0]["fingerprints"]
|
||||||
|
assert fps_a["contract_sha"] != fps_b["contract_sha"]
|
||||||
|
assert fps_a["partial_sha"] == fps_b["partial_sha"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_fingerprints_change_when_partial_changes(monkeypatch):
|
||||||
|
"""Different figma_partial_json → different partial_sha, contract_sha unchanged."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
fps_a = _call(
|
||||||
|
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 1}
|
||||||
|
)[0]["fingerprints"]
|
||||||
|
fps_b = _call(
|
||||||
|
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 2}
|
||||||
|
)[0]["fingerprints"]
|
||||||
|
assert fps_a["partial_sha"] != fps_b["partial_sha"]
|
||||||
|
assert fps_a["contract_sha"] == fps_b["contract_sha"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_v4_result_cardinality_uses_unit_value(monkeypatch):
|
||||||
|
"""v4_result['cardinality'] mirrors the unit's cardinality (no longer hardcoded None)."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit(cardinality=7)])
|
||||||
|
assert router.call_args.kwargs["v4_result"]["cardinality"] == 7
|
||||||
|
router.reset_mock()
|
||||||
|
_call([_ai_unit(cardinality=None)])
|
||||||
|
assert router.call_args.kwargs["v4_result"]["cardinality"] is None
|
||||||
|
|
||||||
|
|
||||||
|
def test_skipped_records_have_no_cache_key_or_fingerprints(monkeypatch):
|
||||||
|
"""Non-AI-eligible records keep cache_key and fingerprints as None."""
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
|
||||||
units = [
|
units = [
|
||||||
FakeUnit(
|
FakeUnit(label="restructure", provisional=False),
|
||||||
label="restructure",
|
FakeUnit(label="reject", provisional=True),
|
||||||
provisional=True,
|
FakeUnit(label="light_edit", provisional=True),
|
||||||
frame_template_id="tmpl_abc",
|
|
||||||
source_section_ids=["02-1", "02-2"],
|
|
||||||
)
|
|
||||||
]
|
]
|
||||||
_call(units)
|
recs = _call(units)
|
||||||
assert router.call_args.kwargs["cache_key"] == "tmpl_abc::02-1,02-2"
|
for rec in recs:
|
||||||
|
assert rec["cache_key"] is None
|
||||||
|
assert rec["fingerprints"] is None
|
||||||
|
|
||||||
|
|
||||||
def test_record_shape_contract_is_stable(monkeypatch):
|
def test_catalog_sha_loader_called_once_per_gather(monkeypatch):
|
||||||
|
"""catalog_sha is computed once per gather call, not per unit."""
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
loader = MagicMock(return_value="cafefeed" * 8)
|
||||||
|
_call(
|
||||||
|
[_ai_unit(), _ai_unit(frame_id="fid_other"), _ai_unit(frame_id="fid_third")],
|
||||||
|
catalog_sha_loader=loader,
|
||||||
|
)
|
||||||
|
loader.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch):
|
||||||
|
"""Record schema includes the IMP-46 u4 cache_key + fingerprints fields."""
|
||||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
|
||||||
units = [FakeUnit(label="reject", provisional=True)]
|
units = [FakeUnit(label="reject", provisional=True)]
|
||||||
rec = _call(units)[0]
|
rec = _call(units)[0]
|
||||||
@@ -190,4 +405,98 @@ def test_record_shape_contract_is_stable(monkeypatch):
|
|||||||
"skip_reason",
|
"skip_reason",
|
||||||
"proposal",
|
"proposal",
|
||||||
"error",
|
"error",
|
||||||
|
"cache_key",
|
||||||
|
"fingerprints",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def test_cache_key_is_compatible_with_cache_parse_key(monkeypatch):
|
||||||
|
"""cache_key produced here must round-trip through cache.py's _parse_key."""
|
||||||
|
from src.phase_z2_ai_fallback.cache import KEY_DELIMITER, _parse_key
|
||||||
|
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
_call([_ai_unit()])
|
||||||
|
cache_key = router.call_args.kwargs["cache_key"]
|
||||||
|
parsed = _parse_key(cache_key)
|
||||||
|
assert parsed is not None
|
||||||
|
frame_id, signature_hash = parsed
|
||||||
|
assert frame_id == "fid_123"
|
||||||
|
assert len(signature_hash) == 64
|
||||||
|
assert KEY_DELIMITER not in signature_hash
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# IMP-47B u9 — Step 12 reject eligibility + normal-path AI=0 regression
|
||||||
|
# ---------------------------------------------------------------------------
|
||||||
|
# Locks the end-to-end Step 12 contract against the production route helper
|
||||||
|
# `_imp05_route_hint`. The local `_ROUTE_HINTS` mapping above intentionally
|
||||||
|
# preserves the legacy ``reject -> design_reference_only`` form to exercise
|
||||||
|
# the catch-all fall-through branch; u9 instead drives gather with the real
|
||||||
|
# production map (post-u1 flip) so reject provisional units reach the router
|
||||||
|
# and normal-path labels stay AI=0.
|
||||||
|
|
||||||
|
|
||||||
|
def test_production_reject_route_reaches_router_when_provisional(monkeypatch):
|
||||||
|
"""Post-u1, provisional reject units must reach ``route_ai_fallback``."""
|
||||||
|
from src.phase_z2_pipeline import _imp05_route_hint
|
||||||
|
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
records = step12_mod.gather_step12_ai_repair_proposals(
|
||||||
|
[FakeUnit(label="reject", provisional=True)],
|
||||||
|
route_for_label=_imp05_route_hint,
|
||||||
|
get_contract_fn=_get_contract,
|
||||||
|
frame_visual_loader=_frame_visual,
|
||||||
|
)
|
||||||
|
assert records[0]["route_hint"] == "ai_adaptation_required"
|
||||||
|
assert records[0]["skip_reason"] == "router_short_circuit"
|
||||||
|
assert records[0]["ai_called"] is False
|
||||||
|
router.assert_called_once()
|
||||||
|
|
||||||
|
|
||||||
|
def test_production_normal_route_labels_never_reach_router(monkeypatch):
|
||||||
|
"""Normal-path labels stay AI=0 even when the unit is provisional."""
|
||||||
|
from src.phase_z2_pipeline import _imp05_route_hint
|
||||||
|
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
units = [
|
||||||
|
FakeUnit(label="use_as_is", provisional=True),
|
||||||
|
FakeUnit(label="light_edit", provisional=True),
|
||||||
|
FakeUnit(label=None, provisional=True),
|
||||||
|
]
|
||||||
|
records = step12_mod.gather_step12_ai_repair_proposals(
|
||||||
|
units,
|
||||||
|
route_for_label=_imp05_route_hint,
|
||||||
|
get_contract_fn=_get_contract,
|
||||||
|
frame_visual_loader=_frame_visual,
|
||||||
|
)
|
||||||
|
assert records[0]["skip_reason"] == "route_not_ai_adaptation:direct_render"
|
||||||
|
assert records[1]["skip_reason"] == (
|
||||||
|
"route_not_ai_adaptation:deterministic_minor_adjustment"
|
||||||
|
)
|
||||||
|
assert records[2]["skip_reason"] == "route_not_ai_adaptation:None"
|
||||||
|
router.assert_not_called()
|
||||||
|
|
||||||
|
|
||||||
|
def test_production_non_provisional_reject_skipped_before_route_gate(monkeypatch):
|
||||||
|
"""The provisional gate fires before the route gate (production routing).
|
||||||
|
|
||||||
|
Even with reject routed to ``ai_adaptation_required`` (post-u1), a
|
||||||
|
non-provisional reject unit must short-circuit at ``not_provisional``
|
||||||
|
without ever consulting ``route_for_label`` for an AI dispatch.
|
||||||
|
"""
|
||||||
|
from src.phase_z2_pipeline import _imp05_route_hint
|
||||||
|
|
||||||
|
router = MagicMock(return_value=None)
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||||
|
records = step12_mod.gather_step12_ai_repair_proposals(
|
||||||
|
[FakeUnit(label="reject", provisional=False)],
|
||||||
|
route_for_label=_imp05_route_hint,
|
||||||
|
get_contract_fn=_get_contract,
|
||||||
|
frame_visual_loader=_frame_visual,
|
||||||
|
)
|
||||||
|
assert records[0]["skip_reason"] == "not_provisional"
|
||||||
|
assert records[0]["ai_called"] is False
|
||||||
|
router.assert_not_called()
|
||||||
|
|||||||
213
tests/test_imp47b_cache_save_gate.py
Normal file
213
tests/test_imp47b_cache_save_gate.py
Normal file
@@ -0,0 +1,213 @@
|
|||||||
|
"""IMP-47B u13 — Persist validated proposals through ``save_proposal`` after gates.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
Verify the new ``_persist_ai_repair_proposals_to_cache`` helper in
|
||||||
|
``src/phase_z2_pipeline.py`` honours the IMP-46 dual-gate truth table
|
||||||
|
on the post-Step-14 cache-save seam. The helper is exercised in
|
||||||
|
isolation (no Selenium, no full pipeline) with synthetic AI repair
|
||||||
|
records that mirror the gather → apply → coverage chain shape
|
||||||
|
produced by IMP-47B u4 / u5 / u7.
|
||||||
|
|
||||||
|
Guardrails proven by this test (IMP-46 + IMP-47B policy bullets):
|
||||||
|
* ``visual_check_passed=False`` always blocks — never bypassable, even
|
||||||
|
when ``auto_cache=True`` (IMP-46 u5 truth table cell).
|
||||||
|
* ``user_approved=False`` AND ``auto_cache=False`` → gate blocked
|
||||||
|
(default pipeline path has no UX approval gate; ``--auto-cache`` is
|
||||||
|
the documented bypass).
|
||||||
|
* ``visual_check_passed=True`` AND ``auto_cache=True`` → proposal
|
||||||
|
persisted on disk under ``data/frame_cache/{frame_id}/{hash}.json``
|
||||||
|
via ``cache.save_proposal``.
|
||||||
|
* Non-applied records (no_proposal / no_zone_match / unsupported /
|
||||||
|
error) → ``cache_save_status='not_applied'`` and NEVER reach
|
||||||
|
``save_proposal`` (no filesystem touch).
|
||||||
|
* Settings axis — ``settings.ai_fallback_auto_cache`` sourced through
|
||||||
|
the helper kwargs, never inlined (hardcoding ban).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import pathlib
|
||||||
|
|
||||||
|
import pytest
|
||||||
|
|
||||||
|
from src.phase_z2_ai_fallback import cache as cache_mod
|
||||||
|
from src.phase_z2_ai_fallback.cache import AiFallbackCacheGateError
|
||||||
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||||
|
from src.phase_z2_pipeline import _persist_ai_repair_proposals_to_cache
|
||||||
|
|
||||||
|
|
||||||
|
def _applied_record(
|
||||||
|
*,
|
||||||
|
cache_key: str = "MOCK_FRAME::deadbeef" + "0" * 56,
|
||||||
|
fingerprints: dict | None = None,
|
||||||
|
slots: dict | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Build an IMP-47B u4/u5 shaped record marked ``applied:partial_overrides``."""
|
||||||
|
if fingerprints is None:
|
||||||
|
fingerprints = {"contract_sha": "c1", "partial_sha": "p1", "catalog_sha": "k1"}
|
||||||
|
if slots is None:
|
||||||
|
slots = {"title": "AI repaired", "bullets": ["b1", "b2"]}
|
||||||
|
proposal = AiFallbackProposal(
|
||||||
|
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
||||||
|
payload={"slots": slots},
|
||||||
|
rationale="cache save gate test",
|
||||||
|
)
|
||||||
|
return {
|
||||||
|
"unit_index": 0,
|
||||||
|
"source_section_ids": ["MOCK_S1"],
|
||||||
|
"frame_template_id": "MOCK_FRAME",
|
||||||
|
"label": "reject",
|
||||||
|
"route_hint": "ai_adaptation_required",
|
||||||
|
"provisional": True,
|
||||||
|
"ai_called": True,
|
||||||
|
"skip_reason": None,
|
||||||
|
"proposal": proposal.model_dump(),
|
||||||
|
"error": None,
|
||||||
|
"cache_key": cache_key,
|
||||||
|
"fingerprints": fingerprints,
|
||||||
|
"apply_status": "applied:partial_overrides",
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.fixture(autouse=True)
|
||||||
|
def _isolate_cache_root(tmp_path: pathlib.Path, monkeypatch: pytest.MonkeyPatch):
|
||||||
|
"""Redirect ``cache.CACHE_ROOT`` to a per-test tmp dir so save_proposal
|
||||||
|
writes never touch the real ``data/frame_cache/`` tree."""
|
||||||
|
monkeypatch.setattr(cache_mod, "CACHE_ROOT", tmp_path / "frame_cache")
|
||||||
|
yield tmp_path / "frame_cache"
|
||||||
|
|
||||||
|
|
||||||
|
def test_visual_check_failed_blocks_save_even_with_auto_cache(_isolate_cache_root):
|
||||||
|
"""visual_check_passed=False is never bypassable — auto_cache cannot override."""
|
||||||
|
record = _applied_record()
|
||||||
|
records = [record]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=False,
|
||||||
|
user_approved=True,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert record["cache_save_status"].startswith("gate_blocked:")
|
||||||
|
assert "visual_check_passed=False" in record["cache_save_status"]
|
||||||
|
# No filesystem write occurred.
|
||||||
|
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_user_not_approved_and_no_auto_cache_blocks_save(_isolate_cache_root):
|
||||||
|
"""Default pipeline path (user_approved=False, auto_cache=False) → gate blocked."""
|
||||||
|
record = _applied_record()
|
||||||
|
records = [record]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=False,
|
||||||
|
auto_cache=False,
|
||||||
|
)
|
||||||
|
assert record["cache_save_status"].startswith("gate_blocked:")
|
||||||
|
assert "user_approved=False" in record["cache_save_status"]
|
||||||
|
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_visual_passed_and_auto_cache_persists_proposal(_isolate_cache_root):
|
||||||
|
"""Happy path — visual_check_passed=True + auto_cache=True persists JSON."""
|
||||||
|
record = _applied_record()
|
||||||
|
records = [record]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=False,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert record["cache_save_status"] == "saved"
|
||||||
|
written = list(_isolate_cache_root.rglob("*.json"))
|
||||||
|
assert len(written) == 1
|
||||||
|
# Layout = {CACHE_ROOT}/{frame_id}/{signature_hash}.json.
|
||||||
|
written_path = written[0]
|
||||||
|
assert written_path.parent.name == "MOCK_FRAME"
|
||||||
|
|
||||||
|
|
||||||
|
def test_non_applied_records_are_skipped_without_filesystem_touch(_isolate_cache_root):
|
||||||
|
"""no_proposal / no_zone_match / unsupported_kind / error → never reach save_proposal."""
|
||||||
|
no_proposal_record = {
|
||||||
|
"unit_index": 0,
|
||||||
|
"apply_status": "no_proposal",
|
||||||
|
"proposal": None,
|
||||||
|
"cache_key": None,
|
||||||
|
"fingerprints": None,
|
||||||
|
}
|
||||||
|
no_zone_record = {
|
||||||
|
"unit_index": 1,
|
||||||
|
"apply_status": "no_zone_match",
|
||||||
|
"proposal": {"proposal_kind": "partial_overrides", "payload": {"slots": {}}, "rationale": ""},
|
||||||
|
"cache_key": "MOCK::abc",
|
||||||
|
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||||
|
}
|
||||||
|
unsupported_record = {
|
||||||
|
"unit_index": 2,
|
||||||
|
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
|
||||||
|
"proposal": {"proposal_kind": "builder_options_patch", "payload": {}, "rationale": ""},
|
||||||
|
"cache_key": "MOCK::def",
|
||||||
|
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||||
|
}
|
||||||
|
error_record = {
|
||||||
|
"unit_index": 3,
|
||||||
|
"apply_status": None,
|
||||||
|
"proposal": None,
|
||||||
|
"cache_key": "MOCK::ghi",
|
||||||
|
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||||
|
"error": "RuntimeError: boom",
|
||||||
|
}
|
||||||
|
records = [no_proposal_record, no_zone_record, unsupported_record, error_record]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
for r in records:
|
||||||
|
assert r["cache_save_status"] == "not_applied"
|
||||||
|
# Zero JSON files written because none of the records were applied.
|
||||||
|
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||||
|
|
||||||
|
|
||||||
|
def test_mixed_records_only_persist_applied_ones(_isolate_cache_root):
|
||||||
|
"""Mixed batch — only the ``applied:`` record is persisted."""
|
||||||
|
applied = _applied_record(cache_key="MOCK_FRAME::aaaaaaaa" + "0" * 56)
|
||||||
|
not_applied = {
|
||||||
|
"unit_index": 1,
|
||||||
|
"apply_status": "no_proposal",
|
||||||
|
"proposal": None,
|
||||||
|
"cache_key": None,
|
||||||
|
"fingerprints": None,
|
||||||
|
}
|
||||||
|
records = [applied, not_applied]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=False,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert applied["cache_save_status"] == "saved"
|
||||||
|
assert not_applied["cache_save_status"] == "not_applied"
|
||||||
|
written = list(_isolate_cache_root.rglob("*.json"))
|
||||||
|
assert len(written) == 1
|
||||||
|
|
||||||
|
|
||||||
|
def test_invalid_proposal_payload_surfaces_without_raising(_isolate_cache_root):
|
||||||
|
"""Malformed ``proposal`` dict → ``cache_save_status='invalid_proposal:...'``,
|
||||||
|
no filesystem write, no exception bubbling into the pipeline runtime."""
|
||||||
|
bad_record = {
|
||||||
|
"unit_index": 0,
|
||||||
|
"apply_status": "applied:partial_overrides",
|
||||||
|
"proposal": {"proposal_kind": "not_a_valid_enum_value", "payload": {}, "rationale": ""},
|
||||||
|
"cache_key": "MOCK::bad",
|
||||||
|
"fingerprints": {"contract_sha": "c", "partial_sha": "p", "catalog_sha": "k"},
|
||||||
|
}
|
||||||
|
records = [bad_record]
|
||||||
|
_persist_ai_repair_proposals_to_cache(
|
||||||
|
records,
|
||||||
|
visual_check_passed=True,
|
||||||
|
user_approved=True,
|
||||||
|
auto_cache=True,
|
||||||
|
)
|
||||||
|
assert bad_record["cache_save_status"].startswith("invalid_proposal:")
|
||||||
|
assert not _isolate_cache_root.exists() or not any(_isolate_cache_root.rglob("*.json"))
|
||||||
95
tests/test_imp47b_coverage_invariant.py
Normal file
95
tests/test_imp47b_coverage_invariant.py
Normal file
@@ -0,0 +1,95 @@
|
|||||||
|
"""IMP-47B u7 — Post-AI source_section_ids coverage invariant tests.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
* Helper ``_check_post_ai_coverage_invariant(units, ai_repair_records)``
|
||||||
|
(src/phase_z2_pipeline.py) compares the pre-AI superset (unit
|
||||||
|
``source_section_ids``) to the post-apply superset present on
|
||||||
|
gather records. Per the AI isolation contract + dropped 절대 룰
|
||||||
|
(``feedback_ai_isolation_contract``), AI repair must not silently
|
||||||
|
drop a section.
|
||||||
|
* The helper returns a structured dict (``pre_ai_section_ids``,
|
||||||
|
``post_ai_section_ids``, ``dropped_section_ids``, ``status``) so u8
|
||||||
|
can surface ``status`` through ``slide_status.ai_repair_status``.
|
||||||
|
|
||||||
|
u8 slide_status surfacing and u10 E2E no-text-loss assertion are out
|
||||||
|
of scope for this unit. The helper is pure (no AI call, no IO) so a
|
||||||
|
synthetic stub-unit / stub-record fixture exercises it directly.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from src.phase_z2_pipeline import _check_post_ai_coverage_invariant
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubUnit:
|
||||||
|
source_section_ids: list[str] = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _record(source_section_ids: list[str]) -> dict:
|
||||||
|
"""Minimal gather-record stub — only the field u7 reads."""
|
||||||
|
return {"source_section_ids": list(source_section_ids)}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 1 : matched coverage → status='ok' ────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_coverage_invariant_ok_when_records_match_units():
|
||||||
|
"""Records carry every unit's source_section_ids → no drop, status='ok'."""
|
||||||
|
units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])]
|
||||||
|
records = [_record(["MOCK_S1", "MOCK_S2"]), _record(["MOCK_S3"])]
|
||||||
|
result = _check_post_ai_coverage_invariant(units, records)
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["dropped_section_ids"] == []
|
||||||
|
assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
assert result["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 2 : record drops a section → status='violated' ────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_coverage_invariant_violated_when_record_drops_section():
|
||||||
|
"""If a record loses a unit's section_id (e.g., apply mutation bug),
|
||||||
|
the invariant reports status='violated' + dropped list (dropped 절대 룰).
|
||||||
|
"""
|
||||||
|
units = [_StubUnit(["MOCK_S1", "MOCK_S2"]), _StubUnit(["MOCK_S3"])]
|
||||||
|
records = [_record(["MOCK_S1"]), _record(["MOCK_S3"])] # MOCK_S2 dropped
|
||||||
|
result = _check_post_ai_coverage_invariant(units, records)
|
||||||
|
assert result["status"] == "violated"
|
||||||
|
assert result["dropped_section_ids"] == ["MOCK_S2"]
|
||||||
|
assert "MOCK_S2" in result["pre_ai_section_ids"]
|
||||||
|
assert "MOCK_S2" not in result["post_ai_section_ids"]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 3 : empty inputs → status='ok' (no false positive) ────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_coverage_invariant_ok_on_empty_units_and_records():
|
||||||
|
"""Empty pipeline (no units / no records) is a vacuous pass —
|
||||||
|
avoids false-positive 'violated' on edge-case shapes (no AI work).
|
||||||
|
"""
|
||||||
|
result = _check_post_ai_coverage_invariant([], [])
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["dropped_section_ids"] == []
|
||||||
|
assert result["pre_ai_section_ids"] == []
|
||||||
|
assert result["post_ai_section_ids"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 4 : multiple drops + dedup ────────────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_coverage_invariant_lists_all_dropped_sections_sorted_and_deduped():
|
||||||
|
"""Multiple missing sections → dropped_section_ids is sorted + deduped.
|
||||||
|
Duplicate ids across units / records collapse to a set comparison.
|
||||||
|
"""
|
||||||
|
units = [
|
||||||
|
_StubUnit(["MOCK_S3", "MOCK_S1"]),
|
||||||
|
_StubUnit(["MOCK_S2", "MOCK_S1"]), # MOCK_S1 duplicate
|
||||||
|
]
|
||||||
|
records: list[dict] = [] # full drop — every unit section missing
|
||||||
|
result = _check_post_ai_coverage_invariant(units, records)
|
||||||
|
assert result["status"] == "violated"
|
||||||
|
assert result["dropped_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
assert result["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
assert result["post_ai_section_ids"] == []
|
||||||
269
tests/test_imp47b_end_to_end.py
Normal file
269
tests/test_imp47b_end_to_end.py
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
"""IMP-47B u10 — End-to-end reject smoke (mocked client + full chain + render).
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
E2E chain proving the IMP-47B reject route activates, preserves
|
||||||
|
full coverage, and propagates the AI-repaired ``slot_payload``
|
||||||
|
into the rendered ``final.html`` artifact when the AI fallback
|
||||||
|
client returns a deterministic PARTIAL_OVERRIDES proposal. Wires
|
||||||
|
together the four pipeline helpers introduced by u4 / u5 / u7 / u8
|
||||||
|
plus the Step 13 render step:
|
||||||
|
|
||||||
|
gather → apply → coverage_invariant → ai_repair_status surfacing
|
||||||
|
→ render_slide → final.html
|
||||||
|
|
||||||
|
The chain mirrors the ``run_phase_z2_mvp1`` call sequence between
|
||||||
|
the Step 12 slot_payload write and the Step 20 ``slide_status``
|
||||||
|
attach (src/phase_z2_pipeline.py — u4 call site, u5 apply, u6
|
||||||
|
artifact, u7 invariant, u8 surface). The Step 13 render path
|
||||||
|
(``render_slide`` at src/phase_z2_pipeline.py:2319, called from the
|
||||||
|
production write site at src/phase_z2_pipeline.py:5107-5111)
|
||||||
|
consumes ``zones_data[i]["slot_payload"]`` verbatim, so this test
|
||||||
|
drives that exact production seam: it calls ``render_slide`` on
|
||||||
|
the post-apply ``zones_data`` and writes the resulting HTML to a
|
||||||
|
``final.html`` file inside ``tmp_path``, then asserts the AI
|
||||||
|
proposal text appears in the on-disk artifact. A heavy
|
||||||
|
``run_phase_z2_mvp1`` integration variant with Selenium overflow
|
||||||
|
check remains deferred — this smoke test stops at the rendered
|
||||||
|
HTML.
|
||||||
|
|
||||||
|
Guardrails proven by this test (IMP-47B policy bullets):
|
||||||
|
* AI 호출 = fallback path only → master flag default OFF preserved
|
||||||
|
(test enables for itself only, restores after).
|
||||||
|
* MDX 원문 100% 보존 → coverage_invariant.status == "ok",
|
||||||
|
source_section_ids identical before/after AI.
|
||||||
|
* 자동 frame swap 금지 → frame_template_id unchanged.
|
||||||
|
* frame visual 임의 변경 금지 → frame_contract / partial untouched
|
||||||
|
(apply only merges proposal.payload.slots into slot_payload).
|
||||||
|
* dropped 절대 룰 → slot_payload AI keys merged on top
|
||||||
|
of deterministic keys; pre-existing meta keys survive.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||||
|
from src.phase_z2_pipeline import (
|
||||||
|
_apply_ai_repair_proposals_to_zones,
|
||||||
|
_check_post_ai_coverage_invariant,
|
||||||
|
_run_step12_ai_repair,
|
||||||
|
_summarize_ai_repair_status,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubUnit:
|
||||||
|
"""Synthetic CompositionUnit stand-in (subset of fields gather reads)."""
|
||||||
|
label: str | None = "reject"
|
||||||
|
provisional: bool = True
|
||||||
|
frame_template_id: str = "MOCK_T_reject"
|
||||||
|
frame_id: str = "MOCK_F_reject"
|
||||||
|
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
|
||||||
|
raw_content: str = "MOCK MDX paragraph that must survive AI repair."
|
||||||
|
v4_rank: int | None = 1
|
||||||
|
cardinality: int | None = None
|
||||||
|
layout_preset: str = "two_zone_vertical"
|
||||||
|
zone_position: str = "top"
|
||||||
|
source_shape: str = "paragraph"
|
||||||
|
h3_count: int = 0
|
||||||
|
char_count: int = 48
|
||||||
|
|
||||||
|
|
||||||
|
def _patched_route_ai_fallback(**kwargs):
|
||||||
|
"""Deterministic stand-in for ``route_ai_fallback`` — returns a
|
||||||
|
PARTIAL_OVERRIDES proposal that mirrors the declared frame slots.
|
||||||
|
The validator (src/phase_z2_ai_fallback/validate.py:61-74) is not
|
||||||
|
re-invoked here because this helper bypasses the router; the
|
||||||
|
structural slot completeness is asserted by the apply step + the
|
||||||
|
coverage invariant downstream.
|
||||||
|
"""
|
||||||
|
return AiFallbackProposal(
|
||||||
|
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
||||||
|
payload={
|
||||||
|
"slots": {
|
||||||
|
"title": "AI repaired title",
|
||||||
|
"bullets": ["AI repaired bullet 1", "AI repaired bullet 2"],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
rationale="E2E smoke proposal — deterministic.",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_reject_chain_applies_proposal_and_preserves_coverage(monkeypatch):
|
||||||
|
"""End-to-end reject smoke (synthetic chain, mocked client).
|
||||||
|
|
||||||
|
Drives the four IMP-47B u4/u5/u7/u8 helpers in pipeline order with
|
||||||
|
a single reject+provisional unit. Asserts every guardrail listed
|
||||||
|
in the module docstring + the four E2E invariants
|
||||||
|
(final.html-bound slot_payload / full coverage / no text loss /
|
||||||
|
human_review NOT required on the success path).
|
||||||
|
"""
|
||||||
|
# IMP-47B u4 wiring — patch the router seam in src/phase_z2_ai_fallback/step12.py
|
||||||
|
# so the gather call returns a deterministic PARTIAL_OVERRIDES proposal
|
||||||
|
# without touching the master flag / network / cache layers.
|
||||||
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
||||||
|
|
||||||
|
unit = _StubUnit()
|
||||||
|
units = [unit]
|
||||||
|
|
||||||
|
# Step 12 gather (u4) — eligible reject reaches the patched router.
|
||||||
|
records = _run_step12_ai_repair(units)
|
||||||
|
assert len(records) == 1
|
||||||
|
assert records[0]["route_hint"] == "ai_adaptation_required"
|
||||||
|
assert records[0]["ai_called"] is True
|
||||||
|
assert records[0]["skip_reason"] is None
|
||||||
|
assert records[0]["proposal"]["proposal_kind"] == "partial_overrides"
|
||||||
|
assert records[0]["source_section_ids"] == ["MOCK_S1"]
|
||||||
|
|
||||||
|
# Step 12 apply (u5) — PARTIAL_OVERRIDES merged into the matching zone.
|
||||||
|
# zones_data[0]["slot_payload"] is exactly what render_slide consumes
|
||||||
|
# to emit final.html (src/phase_z2_pipeline.py:5107) — asserting it
|
||||||
|
# here proves the reject route now flows into the rendered HTML.
|
||||||
|
zones = [{
|
||||||
|
"position": "top",
|
||||||
|
"template_id": "MOCK_T_reject",
|
||||||
|
"slot_payload": {
|
||||||
|
"title": "deterministic title",
|
||||||
|
"bullets": ["deterministic bullet"],
|
||||||
|
"_truncated_count": 0,
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
assert records[0]["apply_status"] == "applied:partial_overrides"
|
||||||
|
# final.html-bound slot_payload carries AI proposal values
|
||||||
|
assert zones[0]["slot_payload"]["title"] == "AI repaired title"
|
||||||
|
assert zones[0]["slot_payload"]["bullets"] == [
|
||||||
|
"AI repaired bullet 1",
|
||||||
|
"AI repaired bullet 2",
|
||||||
|
]
|
||||||
|
# frame visual / pre-existing meta keys survive (no silent shrink).
|
||||||
|
assert zones[0]["template_id"] == "MOCK_T_reject"
|
||||||
|
assert zones[0]["slot_payload"]["_truncated_count"] == 0
|
||||||
|
# frame_template_id on the unit is byte-identical (no auto frame swap).
|
||||||
|
assert unit.frame_template_id == "MOCK_T_reject"
|
||||||
|
|
||||||
|
# Step 12 coverage invariant (u7) — full coverage, no text loss.
|
||||||
|
coverage = _check_post_ai_coverage_invariant(units, records)
|
||||||
|
assert coverage["status"] == "ok"
|
||||||
|
assert coverage["pre_ai_section_ids"] == ["MOCK_S1"]
|
||||||
|
assert coverage["post_ai_section_ids"] == ["MOCK_S1"]
|
||||||
|
assert coverage["dropped_section_ids"] == []
|
||||||
|
|
||||||
|
# Step 20 ai_repair_status surfacing (u8) — applied without human review.
|
||||||
|
status = _summarize_ai_repair_status(records, coverage)
|
||||||
|
assert status["status"] == "applied"
|
||||||
|
assert status["counts"]["applied"] == 1
|
||||||
|
assert status["counts"]["error"] == 0
|
||||||
|
assert status["counts"]["unsupported_kind"] == 0
|
||||||
|
assert status["coverage_status"] == "ok"
|
||||||
|
assert status.get("human_review_required") is not True
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_reject_chain_writes_final_html_with_ai_repaired_slot(monkeypatch, tmp_path):
|
||||||
|
"""End-to-end reject smoke (real render path → final.html on disk).
|
||||||
|
|
||||||
|
Drives the full Stage-2 u10 chain INCLUDING ``render_slide``: the
|
||||||
|
AI-repaired ``slot_payload`` is fed through the same Jinja2
|
||||||
|
rendering seam the production pipeline uses
|
||||||
|
(src/phase_z2_pipeline.py:5107-5111), the resulting HTML is
|
||||||
|
written to ``tmp_path / "final.html"``, and the on-disk artifact
|
||||||
|
is then asserted to carry the AI proposal value. Uses
|
||||||
|
``bim_dx_comparison_table`` — a real registered frame partial
|
||||||
|
(templates/phase_z2/families/bim_dx_comparison_table.html) whose
|
||||||
|
template emits ``{{ slot_payload.title }}`` verbatim, so a
|
||||||
|
proposal-overridden title surfaces literally in the HTML output.
|
||||||
|
"""
|
||||||
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
||||||
|
from src.phase_z2_pipeline import build_layout_css, render_slide
|
||||||
|
|
||||||
|
unit = _StubUnit(
|
||||||
|
frame_template_id="bim_dx_comparison_table",
|
||||||
|
zone_position="primary",
|
||||||
|
layout_preset="single",
|
||||||
|
)
|
||||||
|
|
||||||
|
# Step 12 gather + apply. Deterministic non-overridden slots
|
||||||
|
# (col_a_label, col_b_label, rows[*]) are seeded BEFORE apply so the
|
||||||
|
# post-render assertions below can prove u5 merge semantics
|
||||||
|
# (dict.update — not dict-replace) survive the render seam. The
|
||||||
|
# router proposal only carries ``{title, bullets}`` — every other
|
||||||
|
# slot must reach final.html untouched.
|
||||||
|
records = _run_step12_ai_repair([unit])
|
||||||
|
zones = [{
|
||||||
|
"position": "primary",
|
||||||
|
"template_id": "bim_dx_comparison_table",
|
||||||
|
"slot_payload": {
|
||||||
|
"title": "deterministic frame title",
|
||||||
|
"col_a_label": "DETERMINISTIC_COL_A_LABEL",
|
||||||
|
"col_b_label": "DETERMINISTIC_COL_B_LABEL",
|
||||||
|
"rows": [
|
||||||
|
{"label": "DET_ROW_LABEL", "col_a": "DET_ROW_A", "col_b": "DET_ROW_B"},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
}]
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["primary"], zones)
|
||||||
|
assert records[0]["apply_status"] == "applied:partial_overrides"
|
||||||
|
|
||||||
|
# Step 13 render — production seam (src/phase_z2_pipeline.py:5107-5111).
|
||||||
|
layout_css = build_layout_css("single", zones)
|
||||||
|
html = render_slide("IMP-47B E2E reject smoke", None, zones, "single", layout_css)
|
||||||
|
final_html_path = tmp_path / "final.html"
|
||||||
|
final_html_path.write_text(html, encoding="utf-8")
|
||||||
|
|
||||||
|
# final.html artifact exists on disk and is non-empty.
|
||||||
|
assert final_html_path.is_file()
|
||||||
|
assert final_html_path.stat().st_size > 0
|
||||||
|
rendered = final_html_path.read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# AI-repaired slot content appears in the rendered HTML.
|
||||||
|
assert "AI repaired title" in rendered
|
||||||
|
# Deterministic pre-apply title was overridden in the HTML output
|
||||||
|
# (no silent merge that leaves both values visible).
|
||||||
|
assert "deterministic frame title" not in rendered
|
||||||
|
# Non-overridden deterministic slots survive merge → render (u5
|
||||||
|
# dict.update semantics, not dict-replace; dropped 절대 룰 honoured
|
||||||
|
# at the render seam, not just in slot_payload memory).
|
||||||
|
assert "DETERMINISTIC_COL_A_LABEL" in rendered
|
||||||
|
assert "DETERMINISTIC_COL_B_LABEL" in rendered
|
||||||
|
assert "DET_ROW_LABEL" in rendered
|
||||||
|
assert "DET_ROW_A" in rendered
|
||||||
|
assert "DET_ROW_B" in rendered
|
||||||
|
# Frame template id is preserved end-to-end (no auto frame swap).
|
||||||
|
assert 'data-template-id="bim_dx_comparison_table"' in rendered
|
||||||
|
assert unit.frame_template_id == "bim_dx_comparison_table"
|
||||||
|
|
||||||
|
# MDX 원문 100% 보존 — coverage invariant + status surfacing.
|
||||||
|
coverage = _check_post_ai_coverage_invariant([unit], records)
|
||||||
|
assert coverage["status"] == "ok"
|
||||||
|
assert coverage["dropped_section_ids"] == []
|
||||||
|
status = _summarize_ai_repair_status(records, coverage)
|
||||||
|
assert status["status"] == "applied"
|
||||||
|
assert status.get("human_review_required") is not True
|
||||||
|
|
||||||
|
|
||||||
|
def test_e2e_reject_chain_no_text_loss_on_multi_section_unit(monkeypatch):
|
||||||
|
"""Multi-section reject unit — every section id flows through gather,
|
||||||
|
apply, coverage invariant, and ai_repair_status surfacing without a
|
||||||
|
drop. Locks the 'MDX 원문 100% 보존' guardrail at unit-multiplicity
|
||||||
|
granularity (gather copies the list via ``list(...)`` at
|
||||||
|
src/phase_z2_ai_fallback/step12.py:124 so apply mutations cannot
|
||||||
|
silently drop it)."""
|
||||||
|
import src.phase_z2_ai_fallback.step12 as step12_mod
|
||||||
|
monkeypatch.setattr(step12_mod, "route_ai_fallback", _patched_route_ai_fallback)
|
||||||
|
|
||||||
|
unit = _StubUnit(source_section_ids=["MOCK_S1", "MOCK_S2", "MOCK_S3"])
|
||||||
|
records = _run_step12_ai_repair([unit])
|
||||||
|
zones = [{
|
||||||
|
"position": "top",
|
||||||
|
"template_id": "MOCK_T_reject",
|
||||||
|
"slot_payload": {"title": "det", "bullets": ["det"]},
|
||||||
|
}]
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
coverage = _check_post_ai_coverage_invariant([unit], records)
|
||||||
|
assert coverage["pre_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
assert coverage["post_ai_section_ids"] == ["MOCK_S1", "MOCK_S2", "MOCK_S3"]
|
||||||
|
assert coverage["dropped_section_ids"] == []
|
||||||
|
status = _summarize_ai_repair_status(records, coverage)
|
||||||
|
assert status["status"] == "applied"
|
||||||
|
assert status.get("human_review_required") is not True
|
||||||
174
tests/test_imp47b_failure_surface.py
Normal file
174
tests/test_imp47b_failure_surface.py
Normal file
@@ -0,0 +1,174 @@
|
|||||||
|
"""IMP-47B u8 — slide_status.ai_repair_status surfacing tests.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
Helper ``_summarize_ai_repair_status(ai_repair_records, coverage_invariant)``
|
||||||
|
(src/phase_z2_pipeline.py) composes u4 gather ``error`` + u5
|
||||||
|
``apply_status`` + u7 ``coverage_invariant`` into a single
|
||||||
|
``ai_repair_status`` axis attached to ``slide_status``. Failure-axis
|
||||||
|
priority (highest → lowest): ``error`` > ``coverage_violated`` >
|
||||||
|
``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required``
|
||||||
|
flips True on the three failure axes for u11 frontend surfacing.
|
||||||
|
|
||||||
|
The frontend reads ``slide_status.ai_repair_status`` to render a
|
||||||
|
notification per the IMP-47B policy ("AI 호출 실패 / proposal validation
|
||||||
|
실패 / coverage 미달 → frontend notification"). u9~u13 are out of scope.
|
||||||
|
The helper is pure (no IO, no AI call) so synthetic record / invariant
|
||||||
|
dicts exercise every branch directly.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from src.phase_z2_pipeline import _summarize_ai_repair_status
|
||||||
|
|
||||||
|
|
||||||
|
def _record(
|
||||||
|
*,
|
||||||
|
unit_index: int = 0,
|
||||||
|
apply_status: str | None = None,
|
||||||
|
error: str | None = None,
|
||||||
|
source_section_ids: list[str] | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Minimal Step 12 AI repair record stub — fields u8 reads."""
|
||||||
|
return {
|
||||||
|
"unit_index": unit_index,
|
||||||
|
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
|
||||||
|
"apply_status": apply_status,
|
||||||
|
"error": error,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
_OK_COVERAGE = {"status": "ok", "dropped_section_ids": []}
|
||||||
|
_VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 1 : empty pipeline → status='ok' ──────────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_empty_records_returns_ok_no_human_review():
|
||||||
|
"""No AI work executed → status='ok', human_review_required=False.
|
||||||
|
The flag-off default (no provisional units) lands here."""
|
||||||
|
result = _summarize_ai_repair_status([], _OK_COVERAGE)
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["human_review_required"] is False
|
||||||
|
assert result["counts"]["total"] == 0
|
||||||
|
assert result["unsupported_kind_records"] == []
|
||||||
|
assert result["error_records"] == []
|
||||||
|
assert result["dropped_section_ids"] == []
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 2 : applied → status='applied', no human_review ───────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_applied_partial_overrides_marks_applied_no_human_review():
|
||||||
|
"""Successful AI repair (PARTIAL_OVERRIDES applied) is the happy
|
||||||
|
path. status='applied', no human_review surfacing."""
|
||||||
|
records = [_record(apply_status="applied:partial_overrides")]
|
||||||
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||||
|
assert result["status"] == "applied"
|
||||||
|
assert result["human_review_required"] is False
|
||||||
|
assert result["counts"]["applied"] == 1
|
||||||
|
assert result["counts"]["error"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 3 : unsupported kind → status='unsupported_kind' ──────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_unsupported_kind_marks_human_review_required():
|
||||||
|
"""u5 surfaces ``unsupported_kind_for_reject_route:<kind>`` for
|
||||||
|
builder_options_patch / slot_mapping_proposal. u8 must classify as
|
||||||
|
human_review_required so the frontend renders a notification."""
|
||||||
|
records = [
|
||||||
|
_record(
|
||||||
|
unit_index=1,
|
||||||
|
apply_status="unsupported_kind_for_reject_route:builder_options_patch",
|
||||||
|
source_section_ids=["MOCK_S1"],
|
||||||
|
),
|
||||||
|
]
|
||||||
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||||
|
assert result["status"] == "unsupported_kind"
|
||||||
|
assert result["human_review_required"] is True
|
||||||
|
assert result["counts"]["unsupported_kind"] == 1
|
||||||
|
assert result["unsupported_kind_records"] == [
|
||||||
|
{
|
||||||
|
"unit_index": 1,
|
||||||
|
"source_section_ids": ["MOCK_S1"],
|
||||||
|
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 4 : gather error → status='error' (highest priority) ──────
|
||||||
|
|
||||||
|
|
||||||
|
def test_gather_error_marks_status_error_with_records():
|
||||||
|
"""``record['error']`` set means ``gather_step12_ai_repair_proposals``
|
||||||
|
caught a router exception (AI call / validator). status='error'
|
||||||
|
is the highest-priority failure axis."""
|
||||||
|
records = [_record(
|
||||||
|
unit_index=2,
|
||||||
|
error="ValueError: missing slot 'title'",
|
||||||
|
source_section_ids=["MOCK_S2"],
|
||||||
|
)]
|
||||||
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||||
|
assert result["status"] == "error"
|
||||||
|
assert result["human_review_required"] is True
|
||||||
|
assert result["counts"]["error"] == 1
|
||||||
|
assert result["error_records"] == [
|
||||||
|
{
|
||||||
|
"unit_index": 2,
|
||||||
|
"source_section_ids": ["MOCK_S2"],
|
||||||
|
"error": "ValueError: missing slot 'title'",
|
||||||
|
}
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 5 : coverage violated → status='coverage_violated' ────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_coverage_violation_surfaces_dropped_sections():
|
||||||
|
"""u7 coverage_invariant 'violated' means the AI repair dropped a
|
||||||
|
section_id from the post-AI superset. dropped 절대 룰 — surface as
|
||||||
|
human_review_required."""
|
||||||
|
records = [_record(apply_status="applied:partial_overrides")]
|
||||||
|
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
|
||||||
|
assert result["status"] == "coverage_violated"
|
||||||
|
assert result["human_review_required"] is True
|
||||||
|
assert result["coverage_status"] == "violated"
|
||||||
|
assert result["dropped_section_ids"] == ["MOCK_S2"]
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 6 : priority order — error > coverage > unsupported ───────
|
||||||
|
|
||||||
|
|
||||||
|
def test_error_dominates_over_coverage_and_unsupported():
|
||||||
|
"""When multiple failure axes coexist, priority order is
|
||||||
|
error > coverage_violated > unsupported_kind > applied > ok."""
|
||||||
|
records = [
|
||||||
|
_record(unit_index=0, error="RuntimeError"),
|
||||||
|
_record(unit_index=1,
|
||||||
|
apply_status="unsupported_kind_for_reject_route:slot_mapping_proposal"),
|
||||||
|
_record(unit_index=2, apply_status="applied:partial_overrides"),
|
||||||
|
]
|
||||||
|
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
|
||||||
|
assert result["status"] == "error"
|
||||||
|
assert result["human_review_required"] is True
|
||||||
|
assert result["counts"]["error"] == 1
|
||||||
|
assert result["counts"]["unsupported_kind"] == 1
|
||||||
|
assert result["counts"]["applied"] == 1
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 7 : no_proposal + no_zone_match counted, not failure ──────
|
||||||
|
|
||||||
|
|
||||||
|
def test_no_proposal_and_no_zone_match_do_not_trigger_human_review():
|
||||||
|
"""Flag-off short-circuit, not_provisional, route_not_ai_adaptation,
|
||||||
|
and B4-mismatch (no_zone_match) are structural skips — not AI
|
||||||
|
failures. They count but do not flip human_review_required."""
|
||||||
|
records = [
|
||||||
|
_record(unit_index=0, apply_status="no_proposal"),
|
||||||
|
_record(unit_index=1, apply_status="no_zone_match"),
|
||||||
|
]
|
||||||
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||||
|
assert result["status"] == "ok"
|
||||||
|
assert result["human_review_required"] is False
|
||||||
|
assert result["counts"]["no_proposal"] == 1
|
||||||
|
assert result["counts"]["no_zone_match"] == 1
|
||||||
304
tests/test_imp47b_mixed_reject_fill.py
Normal file
304
tests/test_imp47b_mixed_reject_fill.py
Normal file
@@ -0,0 +1,304 @@
|
|||||||
|
"""IMP-47B u12 — Initial plan_composition allow_provisional_fill for mixed direct+reject.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
The u12 glue inserted in ``run_phase_z2_mvp1`` (src/phase_z2_pipeline.py,
|
||||||
|
right after the initial plan_composition + telemetry build, before the
|
||||||
|
Step 7-A layout override block) detects the mixed direct+reject case
|
||||||
|
(initial plan_composition returns a viable layout but some sections
|
||||||
|
remain uncovered) and re-runs plan_composition with:
|
||||||
|
|
||||||
|
* a lookup_fn that passes ``allow_provisional=True`` (so chain_exhausted
|
||||||
|
sections synthesize a provisional rank-1 V4Match), and
|
||||||
|
* ``allow_provisional_fill=True`` (so uncovered sections receive a
|
||||||
|
last-resort provisional candidate fill in select_composition_units).
|
||||||
|
|
||||||
|
This admits the mixed direct+reject case to the AI repair path
|
||||||
|
(IMP-47B u4/u5) on first render — the reject section becomes a
|
||||||
|
provisional unit (``provisional=True`` + ``label="reject"``) which Step
|
||||||
|
12's reject route gather (u4) routes to AI fallback.
|
||||||
|
|
||||||
|
Gate predicates (mirrored from src/phase_z2_pipeline.py u12 block):
|
||||||
|
* units non-empty (all-reject case is handled by IMP-30 u4 retry below)
|
||||||
|
* layout_preset is not None
|
||||||
|
* not override_section_assignments (operator override bypasses the gate)
|
||||||
|
* at least one section_id is uncovered after initial pass
|
||||||
|
|
||||||
|
Guardrails proven by these tests:
|
||||||
|
* MDX 원문 100% 보존 — every section_id covered after mixed admission
|
||||||
|
(no silent drop).
|
||||||
|
* 자동 frame swap 금지 — mixed admission only re-runs plan_composition
|
||||||
|
with provisional flags; rank-1 reject judgment is preserved as the
|
||||||
|
provisional V4Match (no template_id swap to a different rank).
|
||||||
|
* Normal-path AI=0 — the mixed admission still emits the reject label;
|
||||||
|
AI activation is gated separately in router (config.py:19 default OFF).
|
||||||
|
* All-direct slides are a no-op — gate skips when no uncovered sections.
|
||||||
|
|
||||||
|
This test file exercises ``plan_composition`` directly with synthetic
|
||||||
|
stub V4 matches + a stub lookup_fn that mirrors the u12 retry seam.
|
||||||
|
Stub naming follows the IMP-30 u3 convention (MOCK_ prefix mandatory,
|
||||||
|
no real catalog template_id / frame_id leakage).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from src.phase_z2_composition import plan_composition
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Synthetic V4Match duck-type (mirrors IMP-30 _StubV4Match) ───────────
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubV4Match:
|
||||||
|
template_id: str
|
||||||
|
frame_id: str
|
||||||
|
frame_number: int
|
||||||
|
confidence: float
|
||||||
|
label: str
|
||||||
|
v4_rank: Optional[int] = None
|
||||||
|
selection_path: str = "rank_1"
|
||||||
|
fallback_reason: Optional[str] = None
|
||||||
|
provisional: bool = False
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubSection:
|
||||||
|
section_id: str
|
||||||
|
title: str = ""
|
||||||
|
raw_content: str = ""
|
||||||
|
|
||||||
|
|
||||||
|
_LABEL_TO_STATUS = {
|
||||||
|
"use_as_is": "matched_zone",
|
||||||
|
"light_edit": "adapt_matched_zone",
|
||||||
|
"restructure": "extract_matched_zone",
|
||||||
|
"reject": "fallback_candidate",
|
||||||
|
}
|
||||||
|
|
||||||
|
_ALLOWED_STATUSES = {"matched_zone", "adapt_matched_zone"}
|
||||||
|
|
||||||
|
|
||||||
|
def _make_normal_lookup(matches_by_section: dict[str, _StubV4Match]):
|
||||||
|
"""Lookup_fn that returns the synthetic rank-1 match (no provisional path).
|
||||||
|
|
||||||
|
Mirrors the pipeline initial ``lookup_fn`` at
|
||||||
|
src/phase_z2_pipeline.py:3456-3465 (no ``allow_provisional`` kwarg).
|
||||||
|
"""
|
||||||
|
def _fn(section_id: str):
|
||||||
|
return matches_by_section.get(section_id)
|
||||||
|
return _fn
|
||||||
|
|
||||||
|
|
||||||
|
def _make_provisional_lookup(matches_by_section: dict[str, _StubV4Match]):
|
||||||
|
"""Lookup_fn that flags reject rank-1 matches provisional.
|
||||||
|
|
||||||
|
Mirrors the pipeline u12 retry ``_lookup_fn_mixed_admission`` at the
|
||||||
|
inserted block — for reject judgments, returns a provisional=True
|
||||||
|
rank-1 V4Match-shaped stub so plan_composition's last-resort fill
|
||||||
|
pool can see it (provisional candidates are otherwise filtered out
|
||||||
|
of the normal greedy pass).
|
||||||
|
"""
|
||||||
|
def _fn(section_id: str):
|
||||||
|
m = matches_by_section.get(section_id)
|
||||||
|
if m is not None and m.label == "reject":
|
||||||
|
# Synthesize the provisional shape that
|
||||||
|
# lookup_v4_match_with_fallback returns when allow_provisional
|
||||||
|
# is True: provisional=True + selection_path="provisional_rank_1".
|
||||||
|
return _StubV4Match(
|
||||||
|
template_id=m.template_id,
|
||||||
|
frame_id=m.frame_id,
|
||||||
|
frame_number=m.frame_number,
|
||||||
|
confidence=m.confidence,
|
||||||
|
label=m.label,
|
||||||
|
v4_rank=1,
|
||||||
|
selection_path="provisional_rank_1",
|
||||||
|
provisional=True,
|
||||||
|
)
|
||||||
|
return m
|
||||||
|
return _fn
|
||||||
|
|
||||||
|
|
||||||
|
def _make_candidates_lookup_empty():
|
||||||
|
def _fn(section_id: str):
|
||||||
|
return []
|
||||||
|
return _fn
|
||||||
|
|
||||||
|
|
||||||
|
# ─── u12 case 1 : mechanic — mixed admission via provisional lookup + fill ────
|
||||||
|
|
||||||
|
|
||||||
|
def test_u12_mechanic_mixed_admission_covers_reject_section_via_provisional_fill():
|
||||||
|
"""Positive proof. Mixed direct+reject (S1=use_as_is, S2=reject).
|
||||||
|
|
||||||
|
Without u12 (initial path: normal lookup + allow_provisional_fill=False),
|
||||||
|
plan_composition returns only the S1 unit and S2 is silently dropped.
|
||||||
|
|
||||||
|
With u12 (retry: provisional lookup + allow_provisional_fill=True),
|
||||||
|
plan_composition returns both units; S2 is a provisional unit with
|
||||||
|
label="reject" — ready to be picked up by Step 12's reject route
|
||||||
|
gather (IMP-47B u4).
|
||||||
|
"""
|
||||||
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
||||||
|
matches = {
|
||||||
|
"S1": _StubV4Match(
|
||||||
|
template_id="MOCK_template_direct_a",
|
||||||
|
frame_id="MOCK_frame_001",
|
||||||
|
frame_number=1,
|
||||||
|
confidence=0.92,
|
||||||
|
label="use_as_is",
|
||||||
|
v4_rank=1,
|
||||||
|
),
|
||||||
|
"S2": _StubV4Match(
|
||||||
|
template_id="MOCK_template_reject_a",
|
||||||
|
frame_id="MOCK_frame_002",
|
||||||
|
frame_number=2,
|
||||||
|
confidence=0.30,
|
||||||
|
label="reject",
|
||||||
|
v4_rank=1,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Pre-u12 baseline — normal lookup, no provisional fill.
|
||||||
|
units_pre, preset_pre, _ = plan_composition(
|
||||||
|
sections,
|
||||||
|
_make_normal_lookup(matches),
|
||||||
|
_LABEL_TO_STATUS,
|
||||||
|
_ALLOWED_STATUSES,
|
||||||
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
||||||
|
)
|
||||||
|
covered_pre = {sid for u in units_pre for sid in u.source_section_ids}
|
||||||
|
assert "S1" in covered_pre, "S1 (use_as_is) must cover pre-u12"
|
||||||
|
assert "S2" not in covered_pre, (
|
||||||
|
"Pre-u12 baseline regression: reject S2 should be uncovered (no provisional fill)"
|
||||||
|
)
|
||||||
|
|
||||||
|
# u12 mixed-admission retry — provisional lookup + allow_provisional_fill=True.
|
||||||
|
units_post, preset_post, _ = plan_composition(
|
||||||
|
sections,
|
||||||
|
_make_provisional_lookup(matches),
|
||||||
|
_LABEL_TO_STATUS,
|
||||||
|
_ALLOWED_STATUSES,
|
||||||
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
||||||
|
allow_provisional_fill=True,
|
||||||
|
)
|
||||||
|
covered_post = {sid for u in units_post for sid in u.source_section_ids}
|
||||||
|
assert covered_post == {"S1", "S2"}, (
|
||||||
|
"u12 mixed admission must cover every section (no text loss)"
|
||||||
|
)
|
||||||
|
assert preset_post is not None
|
||||||
|
# The S2 unit must be marked provisional so the reject route gather
|
||||||
|
# (src/phase_z2_ai_fallback/step12.py:133-136) admits it.
|
||||||
|
s2_unit = next(u for u in units_post if "S2" in u.source_section_ids)
|
||||||
|
assert s2_unit.provisional is True, (
|
||||||
|
"Reject S2 unit must be provisional so Step 12 reject route admits it"
|
||||||
|
)
|
||||||
|
assert s2_unit.label == "reject"
|
||||||
|
# Frame template id is preserved — no auto frame swap.
|
||||||
|
assert s2_unit.frame_template_id == "MOCK_template_reject_a"
|
||||||
|
|
||||||
|
|
||||||
|
# ─── u12 case 2 : gate — all-direct slides are a no-op ──────────────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_u12_gate_all_direct_yields_no_uncovered_sections():
|
||||||
|
"""No-op proof. When every section is auto-renderable (use_as_is or
|
||||||
|
light_edit), the initial plan_composition covers everything — the
|
||||||
|
u12 mixed-admission gate's ``_u12_uncovered_ids`` list is empty and
|
||||||
|
the retry is skipped.
|
||||||
|
"""
|
||||||
|
sections = [_StubSection("S1"), _StubSection("S2")]
|
||||||
|
matches = {
|
||||||
|
"S1": _StubV4Match(
|
||||||
|
template_id="MOCK_template_direct_a",
|
||||||
|
frame_id="MOCK_frame_001",
|
||||||
|
frame_number=1,
|
||||||
|
confidence=0.92,
|
||||||
|
label="use_as_is",
|
||||||
|
v4_rank=1,
|
||||||
|
),
|
||||||
|
"S2": _StubV4Match(
|
||||||
|
template_id="MOCK_template_direct_b",
|
||||||
|
frame_id="MOCK_frame_002",
|
||||||
|
frame_number=2,
|
||||||
|
confidence=0.81,
|
||||||
|
label="light_edit",
|
||||||
|
v4_rank=1,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
units, preset, _ = plan_composition(
|
||||||
|
sections,
|
||||||
|
_make_normal_lookup(matches),
|
||||||
|
_LABEL_TO_STATUS,
|
||||||
|
_ALLOWED_STATUSES,
|
||||||
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
||||||
|
)
|
||||||
|
covered = {sid for u in units for sid in u.source_section_ids}
|
||||||
|
assert covered == {"S1", "S2"}, "All-direct must cover every section pre-u12"
|
||||||
|
# Predicate from src/phase_z2_pipeline.py u12 block:
|
||||||
|
uncovered = [s.section_id for s in sections if s.section_id not in covered]
|
||||||
|
assert uncovered == [], (
|
||||||
|
"u12 gate must classify all-direct as no-op (uncovered list empty)"
|
||||||
|
)
|
||||||
|
assert preset is not None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── u12 case 3 : gate — initial empty units bypass u12 (IMP-30 retry owns it) ──
|
||||||
|
|
||||||
|
|
||||||
|
def test_u12_gate_skips_when_initial_units_empty():
|
||||||
|
"""All-reject case is owned by IMP-30 u4 retry (units=[] guard at
|
||||||
|
src/phase_z2_pipeline.py:3646). u12 mixed-admission must NOT compete
|
||||||
|
with that path; the gate ``units and layout_preset is not None``
|
||||||
|
short-circuits when the initial plan_composition returns nothing.
|
||||||
|
"""
|
||||||
|
sections = [_StubSection("S1")]
|
||||||
|
matches = {
|
||||||
|
"S1": _StubV4Match(
|
||||||
|
template_id="MOCK_template_reject_a",
|
||||||
|
frame_id="MOCK_frame_002",
|
||||||
|
frame_number=2,
|
||||||
|
confidence=0.30,
|
||||||
|
label="reject",
|
||||||
|
v4_rank=1,
|
||||||
|
),
|
||||||
|
}
|
||||||
|
units, preset, _ = plan_composition(
|
||||||
|
sections,
|
||||||
|
_make_normal_lookup(matches),
|
||||||
|
_LABEL_TO_STATUS,
|
||||||
|
_ALLOWED_STATUSES,
|
||||||
|
v4_candidates_lookup_fn=_make_candidates_lookup_empty(),
|
||||||
|
)
|
||||||
|
# All-reject initial pass: no auto-renderable units, no layout preset.
|
||||||
|
assert units == [] and preset is None
|
||||||
|
# u12 gate predicate would short-circuit on `units` truthiness:
|
||||||
|
gate_active = bool(units) and preset is not None
|
||||||
|
assert gate_active is False, (
|
||||||
|
"u12 mixed-admission gate must skip the all-reject case (IMP-30 u4 owns it)"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── u12 case 4 : code-path anchor — pipeline source contains u12 marker ────
|
||||||
|
|
||||||
|
|
||||||
|
def test_u12_pipeline_source_contains_mixed_admission_marker():
|
||||||
|
"""Anchor test. Ensures the inserted u12 block in src/phase_z2_pipeline.py
|
||||||
|
is reachable (not silently removed by a future refactor).
|
||||||
|
|
||||||
|
Asserts on the marker comment + ``imp47b_u12_mixed_admission`` debug key
|
||||||
|
+ ``allow_provisional_fill=True`` invocation co-located in the file.
|
||||||
|
Cheap structural guard — does not run the heavy pipeline.
|
||||||
|
"""
|
||||||
|
from pathlib import Path
|
||||||
|
src_path = Path(__file__).resolve().parent.parent / "src" / "phase_z2_pipeline.py"
|
||||||
|
text = src_path.read_text(encoding="utf-8")
|
||||||
|
assert "IMP-47B u12 — mixed direct+reject first-render admission" in text, (
|
||||||
|
"u12 marker comment missing from pipeline — block may have been removed"
|
||||||
|
)
|
||||||
|
assert "imp47b_u12_mixed_admission" in text, (
|
||||||
|
"u12 comp_debug telemetry key missing"
|
||||||
|
)
|
||||||
|
# The mixed-admission retry must pass allow_provisional_fill=True.
|
||||||
|
# Anchor against the helper function name + the kwarg co-occurrence.
|
||||||
|
assert "_lookup_fn_mixed_admission" in text
|
||||||
|
assert "allow_provisional_fill=True" in text
|
||||||
180
tests/test_imp47b_override_provisional.py
Normal file
180
tests/test_imp47b_override_provisional.py
Normal file
@@ -0,0 +1,180 @@
|
|||||||
|
"""IMP-47B u3 — override-selected reject frames are admitted as provisional.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
Helper `_apply_frame_override_to_unit` (src/phase_z2_pipeline.py) covers
|
||||||
|
the three probe layers used by the `--override-frame` path:
|
||||||
|
|
||||||
|
1. ``v4_candidates`` exact match (non-reject; existing behaviour).
|
||||||
|
2. Full 32 V4 judgments probe (reject inclusive) — when the user
|
||||||
|
picks a reject frame, the unit is promoted to
|
||||||
|
``provisional=True`` with ``label="reject"`` so Step 12
|
||||||
|
(IMP-47B u4) admits the AI repair path.
|
||||||
|
3. Raw fall-through (template_id only) — no provisional promotion,
|
||||||
|
no label mutation.
|
||||||
|
|
||||||
|
Frame visual / contract stay untouched per the AI isolation contract
|
||||||
|
(frame auto-swap forbidden — AI re-places content into the existing
|
||||||
|
frame only). Sibling test confirms a non-reject override still goes
|
||||||
|
through the v4_candidates path without provisional promotion.
|
||||||
|
|
||||||
|
Synthetic naming convention mirrors tests/test_phase_z2_imp30_first_render.py
|
||||||
|
(MOCK_ prefix mandatory, no real catalog template_id / frame_id leakage).
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
from typing import Optional
|
||||||
|
|
||||||
|
from src.phase_z2_pipeline import _apply_frame_override_to_unit
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubCandidate:
|
||||||
|
template_id: str
|
||||||
|
frame_id: str
|
||||||
|
frame_number: int
|
||||||
|
confidence: float
|
||||||
|
label: str
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubUnit:
|
||||||
|
source_section_ids: list[str]
|
||||||
|
frame_template_id: Optional[str] = None
|
||||||
|
frame_id: Optional[str] = None
|
||||||
|
frame_number: int = 0
|
||||||
|
confidence: float = 0.0
|
||||||
|
label: Optional[str] = None
|
||||||
|
provisional: bool = False
|
||||||
|
v4_candidates: list = field(default_factory=list)
|
||||||
|
|
||||||
|
|
||||||
|
def _v4_with_reject(section_id: str, target_tid: str) -> dict:
|
||||||
|
"""Synthetic V4 dict with target_tid mapped to a reject judgment.
|
||||||
|
|
||||||
|
Mirrors the production V4 schema surface (``mdx_sections`` →
|
||||||
|
``judgments_full32`` → list of judgment dicts with template_id /
|
||||||
|
frame_id / frame_number / confidence / label). Two judgments so we
|
||||||
|
can also assert that the helper picks the reject entry rather than
|
||||||
|
the first non-reject one when the template_ids differ.
|
||||||
|
"""
|
||||||
|
return {
|
||||||
|
"mdx_sections": {
|
||||||
|
section_id: {
|
||||||
|
"judgments_full32": [
|
||||||
|
{
|
||||||
|
"template_id": "MOCK_T_other",
|
||||||
|
"frame_id": "F_other",
|
||||||
|
"frame_number": 1,
|
||||||
|
"confidence": 0.85,
|
||||||
|
"label": "use_as_is",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"template_id": target_tid,
|
||||||
|
"frame_id": "F_reject",
|
||||||
|
"frame_number": 32,
|
||||||
|
"confidence": 0.40,
|
||||||
|
"label": "reject",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 1 : reject override → provisional promotion ────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_override_to_reject_judgment_marks_unit_provisional():
|
||||||
|
"""User picks a reject frame → unit.label=reject, provisional=True.
|
||||||
|
|
||||||
|
Frame metadata is sourced from the reject judgment (frame_id /
|
||||||
|
frame_number / confidence) so Step 9 metadata stays consistent.
|
||||||
|
"""
|
||||||
|
unit = _StubUnit(
|
||||||
|
source_section_ids=["MOCK_S1"],
|
||||||
|
frame_template_id="MOCK_T_auto",
|
||||||
|
frame_id="F_auto",
|
||||||
|
frame_number=5,
|
||||||
|
confidence=0.90,
|
||||||
|
label="use_as_is",
|
||||||
|
provisional=False,
|
||||||
|
)
|
||||||
|
v4 = _v4_with_reject("MOCK_S1", "MOCK_T_reject")
|
||||||
|
|
||||||
|
meta = _apply_frame_override_to_unit(unit, "MOCK_T_reject", v4)
|
||||||
|
|
||||||
|
assert meta == "v4_reject_judgment_provisional"
|
||||||
|
assert unit.frame_template_id == "MOCK_T_reject"
|
||||||
|
assert unit.frame_id == "F_reject"
|
||||||
|
assert unit.frame_number == 32
|
||||||
|
assert unit.confidence == 0.40
|
||||||
|
assert unit.label == "reject"
|
||||||
|
assert unit.provisional is True
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 2 : non-reject override → existing v4_candidates path ───
|
||||||
|
|
||||||
|
|
||||||
|
def test_override_to_v4_candidate_keeps_non_provisional():
|
||||||
|
"""User picks a non-reject candidate → existing v4_candidates path.
|
||||||
|
|
||||||
|
Helper takes the early v4_candidates branch without consulting the
|
||||||
|
full 32 judgments. provisional remains False (normal-path AI=0
|
||||||
|
contract — IMP-30 / IMP-47B router gate intact for this unit).
|
||||||
|
"""
|
||||||
|
unit = _StubUnit(
|
||||||
|
source_section_ids=["MOCK_S2"],
|
||||||
|
frame_template_id="MOCK_T_auto",
|
||||||
|
frame_id="F_auto",
|
||||||
|
frame_number=3,
|
||||||
|
confidence=0.95,
|
||||||
|
label="use_as_is",
|
||||||
|
provisional=False,
|
||||||
|
v4_candidates=[
|
||||||
|
_StubCandidate(
|
||||||
|
template_id="MOCK_T_pick",
|
||||||
|
frame_id="F_pick",
|
||||||
|
frame_number=2,
|
||||||
|
confidence=0.85,
|
||||||
|
label="light_edit",
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
||||||
|
v4 = {"mdx_sections": {}} # full-judgment probe must NOT be reached
|
||||||
|
|
||||||
|
meta = _apply_frame_override_to_unit(unit, "MOCK_T_pick", v4)
|
||||||
|
|
||||||
|
assert meta == "v4_candidates"
|
||||||
|
assert unit.frame_template_id == "MOCK_T_pick"
|
||||||
|
assert unit.frame_id == "F_pick"
|
||||||
|
assert unit.label == "light_edit"
|
||||||
|
assert unit.provisional is False
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 3 : unknown template → raw fall-through (no provisional) ─
|
||||||
|
|
||||||
|
|
||||||
|
def test_override_unknown_template_falls_through_without_provisional():
|
||||||
|
"""Template ID absent from v4_candidates AND from judgments_full32 →
|
||||||
|
raw_template_id_only path. No provisional flag, no label change.
|
||||||
|
"""
|
||||||
|
unit = _StubUnit(
|
||||||
|
source_section_ids=["MOCK_S3"],
|
||||||
|
frame_template_id="MOCK_T_auto",
|
||||||
|
frame_id="F_auto",
|
||||||
|
frame_number=4,
|
||||||
|
confidence=0.92,
|
||||||
|
label="use_as_is",
|
||||||
|
provisional=False,
|
||||||
|
)
|
||||||
|
v4 = {"mdx_sections": {}}
|
||||||
|
|
||||||
|
meta = _apply_frame_override_to_unit(unit, "MOCK_T_unknown", v4)
|
||||||
|
|
||||||
|
assert meta == "raw_template_id_only"
|
||||||
|
assert unit.frame_template_id == "MOCK_T_unknown"
|
||||||
|
# frame_id / label unchanged — caller's print path warns on this case.
|
||||||
|
assert unit.frame_id == "F_auto"
|
||||||
|
assert unit.label == "use_as_is"
|
||||||
|
assert unit.provisional is False
|
||||||
223
tests/test_imp47b_payload_apply.py
Normal file
223
tests/test_imp47b_payload_apply.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
"""IMP-47B u5 — PARTIAL_OVERRIDES apply tests.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
Helper ``_apply_ai_repair_proposals_to_zones`` (src/phase_z2_pipeline.py)
|
||||||
|
merges ``proposal.payload.slots`` into ``zones_data[k]["slot_payload"]``
|
||||||
|
for PARTIAL_OVERRIDES proposals only, and loud-fails out-of-scope
|
||||||
|
proposal kinds (builder_options_patch, slot_mapping_proposal) with an
|
||||||
|
explicit ``apply_status`` marker.
|
||||||
|
|
||||||
|
The IMP-33 u5 validator inside ``route_ai_fallback`` already enforces
|
||||||
|
declared-slot completeness — the apply helper is therefore a structural
|
||||||
|
merge over the validator's contract, not a per-slot guard re-implementation.
|
||||||
|
|
||||||
|
u6 (step12_ai_repair.json audit), u7 (coverage invariant), and u8
|
||||||
|
(slide_status surfacing) are out of scope for this unit.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
from src.phase_z2_pipeline import _apply_ai_repair_proposals_to_zones
|
||||||
|
|
||||||
|
|
||||||
|
def _record(
|
||||||
|
*,
|
||||||
|
unit_index: int,
|
||||||
|
proposal: dict | None,
|
||||||
|
source_section_ids: list[str] | None = None,
|
||||||
|
) -> dict:
|
||||||
|
"""Synthetic gather_step12_ai_repair_proposals record."""
|
||||||
|
return {
|
||||||
|
"unit_index": unit_index,
|
||||||
|
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
|
||||||
|
"frame_template_id": "MOCK_T",
|
||||||
|
"label": "reject",
|
||||||
|
"route_hint": "ai_adaptation_required",
|
||||||
|
"provisional": True,
|
||||||
|
"ai_called": proposal is not None,
|
||||||
|
"skip_reason": None,
|
||||||
|
"proposal": proposal,
|
||||||
|
"error": None,
|
||||||
|
"cache_key": "MOCK_F::abc" if proposal is not None else None,
|
||||||
|
"fingerprints": {"contract_sha": "x", "partial_sha": "y", "catalog_sha": ""}
|
||||||
|
if proposal is not None
|
||||||
|
else None,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def _zone(*, position: str, slot_payload: dict | None = None) -> dict:
|
||||||
|
"""Synthetic zones_data entry — only fields the apply helper touches."""
|
||||||
|
return {
|
||||||
|
"position": position,
|
||||||
|
"template_id": "MOCK_T",
|
||||||
|
"slot_payload": slot_payload if slot_payload is not None else {},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 1 : PARTIAL_OVERRIDES → merged + applied marker ──────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_partial_overrides_merges_slots_into_zone_slot_payload():
|
||||||
|
"""The validator already guarantees declared-slot completeness, so
|
||||||
|
apply is a structural ``dict.update``. Pre-existing meta keys
|
||||||
|
(``_truncated_count``) survive; declared slot values are replaced
|
||||||
|
by the AI proposal values."""
|
||||||
|
proposal = {
|
||||||
|
"proposal_kind": "partial_overrides",
|
||||||
|
"payload": {
|
||||||
|
"slots": {
|
||||||
|
"title": "AI title",
|
||||||
|
"bullets": ["AI bullet 1", "AI bullet 2"],
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"rationale": "MOCK",
|
||||||
|
}
|
||||||
|
records = [_record(unit_index=0, proposal=proposal)]
|
||||||
|
zones = [
|
||||||
|
_zone(
|
||||||
|
position="top",
|
||||||
|
slot_payload={
|
||||||
|
"title": "deterministic title",
|
||||||
|
"bullets": ["det bullet"],
|
||||||
|
"_truncated_count": 0,
|
||||||
|
},
|
||||||
|
)
|
||||||
|
]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
|
||||||
|
assert records[0]["apply_status"] == "applied:partial_overrides"
|
||||||
|
assert zones[0]["slot_payload"]["title"] == "AI title"
|
||||||
|
assert zones[0]["slot_payload"]["bullets"] == ["AI bullet 1", "AI bullet 2"]
|
||||||
|
# meta keys not in proposal must survive the merge
|
||||||
|
assert zones[0]["slot_payload"]["_truncated_count"] == 0
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 2 : BUILDER_OPTIONS_PATCH → loud-fail unsupported_kind ───
|
||||||
|
|
||||||
|
|
||||||
|
def test_builder_options_patch_is_unsupported_for_reject_route():
|
||||||
|
"""Builder-options application is out-of-scope for IMP-47B reject
|
||||||
|
route (see Stage 2 plan). u5 must mark, not apply — the zone
|
||||||
|
slot_payload stays byte-identical and the record carries the
|
||||||
|
``unsupported_kind_for_reject_route:<kind>`` marker so u8 can
|
||||||
|
surface human_review downstream."""
|
||||||
|
proposal = {
|
||||||
|
"proposal_kind": "builder_options_patch",
|
||||||
|
"payload": {"font_size_px": 14},
|
||||||
|
"rationale": "MOCK",
|
||||||
|
}
|
||||||
|
records = [_record(unit_index=0, proposal=proposal)]
|
||||||
|
original_slot_payload = {"title": "deterministic"}
|
||||||
|
zones = [_zone(position="top", slot_payload=dict(original_slot_payload))]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
records[0]["apply_status"]
|
||||||
|
== "unsupported_kind_for_reject_route:builder_options_patch"
|
||||||
|
)
|
||||||
|
assert zones[0]["slot_payload"] == original_slot_payload
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 3 : SLOT_MAPPING_PROPOSAL → loud-fail unsupported_kind ───
|
||||||
|
|
||||||
|
|
||||||
|
def test_slot_mapping_proposal_is_unsupported_for_reject_route():
|
||||||
|
"""Slot-mapping (restructuring) application is also out-of-scope —
|
||||||
|
builder-options + slot-mapping share the same marker path."""
|
||||||
|
proposal = {
|
||||||
|
"proposal_kind": "slot_mapping_proposal",
|
||||||
|
"payload": {"slots": {"title": "x"}},
|
||||||
|
"rationale": "MOCK",
|
||||||
|
}
|
||||||
|
records = [_record(unit_index=0, proposal=proposal)]
|
||||||
|
zones = [_zone(position="top", slot_payload={"title": "deterministic"})]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
|
||||||
|
assert (
|
||||||
|
records[0]["apply_status"]
|
||||||
|
== "unsupported_kind_for_reject_route:slot_mapping_proposal"
|
||||||
|
)
|
||||||
|
assert zones[0]["slot_payload"] == {"title": "deterministic"}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 4 : no proposal (router short-circuit / not_provisional) ──
|
||||||
|
|
||||||
|
|
||||||
|
def test_record_without_proposal_marked_no_proposal_and_zone_untouched():
|
||||||
|
"""Flag-off short-circuit and non-AI-route units carry
|
||||||
|
``proposal=None``. apply_status must distinguish "no proposal to
|
||||||
|
apply" from real apply outcomes so u8 can categorise the per-unit
|
||||||
|
status without re-reading skip_reason."""
|
||||||
|
records = [_record(unit_index=0, proposal=None)]
|
||||||
|
zones = [_zone(position="top", slot_payload={"title": "deterministic"})]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
|
||||||
|
assert records[0]["apply_status"] == "no_proposal"
|
||||||
|
assert zones[0]["slot_payload"] == {"title": "deterministic"}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 5 : proposal exists but no matching zone (B4 mismatch) ────
|
||||||
|
|
||||||
|
|
||||||
|
def test_proposal_for_unit_without_zone_match_marked_no_zone_match():
|
||||||
|
"""When a unit is dropped from zones_data (B4 mismatch or FitError
|
||||||
|
in the Step 12 render loop) but still gathered an AI proposal,
|
||||||
|
apply must surface the mismatch via ``no_zone_match`` rather than
|
||||||
|
silently dropping the proposal or writing into a wrong zone."""
|
||||||
|
proposal = {
|
||||||
|
"proposal_kind": "partial_overrides",
|
||||||
|
"payload": {"slots": {"title": "AI title"}},
|
||||||
|
"rationale": "MOCK",
|
||||||
|
}
|
||||||
|
records = [_record(unit_index=0, proposal=proposal)]
|
||||||
|
# unit_positions[0]="top" but zones_data has only the bottom zone
|
||||||
|
# → no match for the dropped unit's position.
|
||||||
|
zones = [_zone(position="bottom", slot_payload={"title": "other zone"})]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(records, ["top"], zones)
|
||||||
|
|
||||||
|
assert records[0]["apply_status"] == "no_zone_match"
|
||||||
|
# untouched zone — apply must not bleed into a different position
|
||||||
|
assert zones[0]["slot_payload"] == {"title": "other zone"}
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 6 : mixed records — independent per-record classification ──
|
||||||
|
|
||||||
|
|
||||||
|
def test_mixed_records_classified_independently():
|
||||||
|
"""All five apply_status branches coexist in one batch — confirms
|
||||||
|
the helper does not short-circuit on the first non-applied record."""
|
||||||
|
records = [
|
||||||
|
_record(unit_index=0, proposal={
|
||||||
|
"proposal_kind": "partial_overrides",
|
||||||
|
"payload": {"slots": {"title": "AI"}},
|
||||||
|
"rationale": "",
|
||||||
|
}),
|
||||||
|
_record(unit_index=1, proposal={
|
||||||
|
"proposal_kind": "builder_options_patch",
|
||||||
|
"payload": {"font_size_px": 14},
|
||||||
|
"rationale": "",
|
||||||
|
}),
|
||||||
|
_record(unit_index=2, proposal=None),
|
||||||
|
]
|
||||||
|
zones = [
|
||||||
|
_zone(position="top", slot_payload={"title": "det"}),
|
||||||
|
_zone(position="middle", slot_payload={"title": "det"}),
|
||||||
|
_zone(position="bottom", slot_payload={"title": "det"}),
|
||||||
|
]
|
||||||
|
|
||||||
|
_apply_ai_repair_proposals_to_zones(
|
||||||
|
records, ["top", "middle", "bottom"], zones,
|
||||||
|
)
|
||||||
|
|
||||||
|
assert [r["apply_status"] for r in records] == [
|
||||||
|
"applied:partial_overrides",
|
||||||
|
"unsupported_kind_for_reject_route:builder_options_patch",
|
||||||
|
"no_proposal",
|
||||||
|
]
|
||||||
|
assert zones[0]["slot_payload"]["title"] == "AI"
|
||||||
|
assert zones[1]["slot_payload"]["title"] == "det"
|
||||||
|
assert zones[2]["slot_payload"]["title"] == "det"
|
||||||
154
tests/test_imp47b_step12_ai_wiring.py
Normal file
154
tests/test_imp47b_step12_ai_wiring.py
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
"""IMP-47B u4 + u6 — Step 12 AI repair wiring + audit artifact tests.
|
||||||
|
|
||||||
|
Scope (this slice):
|
||||||
|
* u4 — Helper ``_run_step12_ai_repair`` (src/phase_z2_pipeline.py)
|
||||||
|
wires the pipeline's local route-hint helper (``_imp05_route_hint``),
|
||||||
|
the frame contract loader (``get_contract``), and a
|
||||||
|
templates/phase_z2/families partial reader
|
||||||
|
(``_load_frame_partial_html``) into
|
||||||
|
``gather_step12_ai_repair_proposals``.
|
||||||
|
* u6 — The gather records flow into ``_write_step_artifact`` under
|
||||||
|
``step12_ai_repair.json``. The audit shape must stay
|
||||||
|
JSON-serialisable (no Pydantic / dataclass leakage) so the artifact
|
||||||
|
write never raises on real runs.
|
||||||
|
|
||||||
|
The router short-circuits when ``settings.ai_fallback_enabled`` is
|
||||||
|
False (default), so AI=0 for non-AI-route units stays a structural
|
||||||
|
guarantee. Synthetic naming mirrors tests/test_imp47b_override_provisional.py
|
||||||
|
(MOCK_ prefix; no real catalog template_id / frame_id leakage).
|
||||||
|
|
||||||
|
u5 (PARTIAL_OVERRIDES apply), u7 (coverage invariant), and u8
|
||||||
|
(slide_status surfacing) are out of scope for this unit.
|
||||||
|
"""
|
||||||
|
from __future__ import annotations
|
||||||
|
|
||||||
|
import json
|
||||||
|
from dataclasses import dataclass, field
|
||||||
|
|
||||||
|
from src.phase_z2_pipeline import (
|
||||||
|
_load_frame_partial_html,
|
||||||
|
_run_step12_ai_repair,
|
||||||
|
_write_step_artifact,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class _StubUnit:
|
||||||
|
label: str | None
|
||||||
|
provisional: bool
|
||||||
|
frame_template_id: str = "MOCK_T_x"
|
||||||
|
frame_id: str = "MOCK_F_x"
|
||||||
|
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
|
||||||
|
raw_content: str = "MOCK_raw"
|
||||||
|
v4_rank: int | None = 1
|
||||||
|
cardinality: int | None = None
|
||||||
|
layout_preset: str = ""
|
||||||
|
zone_position: str = ""
|
||||||
|
source_shape: str = "paragraph"
|
||||||
|
h3_count: int = 0
|
||||||
|
char_count: int = 0
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 1 : mixed units → per-unit skip_reason classification ─────
|
||||||
|
|
||||||
|
|
||||||
|
def test_mixed_units_classified_by_route_and_provisional_flag():
|
||||||
|
"""Reject + restructure provisional both route to ai_adaptation;
|
||||||
|
use_as_is / light_edit / non-provisional skip without router call.
|
||||||
|
|
||||||
|
With ai_fallback_enabled=False (default) the router returns None,
|
||||||
|
so the two ai_adaptation provisional units record
|
||||||
|
``skip_reason='router_short_circuit'``; the rest record their
|
||||||
|
structural skip_reason (not_provisional / route_not_ai_adaptation).
|
||||||
|
"""
|
||||||
|
units = [
|
||||||
|
_StubUnit(label="use_as_is", provisional=False),
|
||||||
|
_StubUnit(label="light_edit", provisional=True),
|
||||||
|
_StubUnit(label="restructure", provisional=True),
|
||||||
|
_StubUnit(label="reject", provisional=True),
|
||||||
|
_StubUnit(label="restructure", provisional=False),
|
||||||
|
]
|
||||||
|
records = _run_step12_ai_repair(units)
|
||||||
|
assert [r["skip_reason"] for r in records] == [
|
||||||
|
"not_provisional",
|
||||||
|
"route_not_ai_adaptation:deterministic_minor_adjustment",
|
||||||
|
"router_short_circuit",
|
||||||
|
"router_short_circuit",
|
||||||
|
"not_provisional",
|
||||||
|
]
|
||||||
|
assert [r["route_hint"] for r in records] == [
|
||||||
|
"direct_render",
|
||||||
|
"deterministic_minor_adjustment",
|
||||||
|
"ai_adaptation_required",
|
||||||
|
"ai_adaptation_required",
|
||||||
|
"ai_adaptation_required",
|
||||||
|
]
|
||||||
|
assert all(r["ai_called"] is False for r in records)
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 2 : reject provisional unit reaches AI gate ───────────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_provisional_unit_reaches_router_short_circuit():
|
||||||
|
"""Reject + provisional → route_hint=ai_adaptation_required.
|
||||||
|
|
||||||
|
Router short-circuit (flag-off default) is the only thing keeping
|
||||||
|
AI from firing; the wiring proves reject is no longer blocked by
|
||||||
|
Step 12's bespoke design_reference_only skip (removed by u2).
|
||||||
|
"""
|
||||||
|
records = _run_step12_ai_repair([_StubUnit(label="reject", provisional=True)])
|
||||||
|
assert records[0]["route_hint"] == "ai_adaptation_required"
|
||||||
|
assert records[0]["skip_reason"] == "router_short_circuit"
|
||||||
|
assert records[0]["ai_called"] is False
|
||||||
|
# cache_key / fingerprints populated only after the route + provisional
|
||||||
|
# gates pass — confirms gather reached the AI-eligible code path.
|
||||||
|
assert records[0]["cache_key"] is not None
|
||||||
|
assert records[0]["fingerprints"] is not None
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 3 : frame visual loader degrades on missing partial ──────
|
||||||
|
|
||||||
|
|
||||||
|
def test_load_frame_partial_html_returns_empty_for_missing_file():
|
||||||
|
"""__empty__ shell (IMP-30) and any unknown template_id → "".
|
||||||
|
|
||||||
|
Keeps gather() crash-free for the IMP-30 first-render-invariant
|
||||||
|
path where the synthesized empty-shell unit has no families partial.
|
||||||
|
"""
|
||||||
|
assert _load_frame_partial_html("__empty__") == ""
|
||||||
|
assert _load_frame_partial_html("MOCK_T_does_not_exist") == ""
|
||||||
|
|
||||||
|
|
||||||
|
# ─── Case 4 (u6) : audit artifact write is JSON-serialisable ────────
|
||||||
|
|
||||||
|
|
||||||
|
def test_step12_ai_repair_artifact_writes_json_serialisable_records(tmp_path):
|
||||||
|
"""IMP-47B u6 — gather records feed ``_write_step_artifact`` as the
|
||||||
|
``step12_ai_repair.json`` audit. Confirms the gather schema contains
|
||||||
|
only JSON-native primitives (str / int / None / bool / list / dict)
|
||||||
|
so the artifact write never raises on real runs and the audit
|
||||||
|
payload preserves per-unit ``route_hint`` / ``skip_reason`` /
|
||||||
|
``ai_called`` for reviewers.
|
||||||
|
"""
|
||||||
|
records = _run_step12_ai_repair([
|
||||||
|
_StubUnit(label="reject", provisional=True),
|
||||||
|
_StubUnit(label="use_as_is", provisional=False),
|
||||||
|
])
|
||||||
|
fpath = _write_step_artifact(
|
||||||
|
tmp_path, 12, "ai_repair",
|
||||||
|
data={"per_unit": records},
|
||||||
|
outputs=["step12_ai_repair.json"],
|
||||||
|
)
|
||||||
|
assert fpath.is_file()
|
||||||
|
assert fpath.name == "step12_ai_repair.json"
|
||||||
|
payload = json.loads(fpath.read_text(encoding="utf-8"))
|
||||||
|
assert payload["step_num"] == 12
|
||||||
|
assert payload["step_name"] == "ai_repair"
|
||||||
|
assert payload["step_status"] == "done"
|
||||||
|
per_unit = payload["data"]["per_unit"]
|
||||||
|
assert len(per_unit) == 2
|
||||||
|
assert per_unit[0]["route_hint"] == "ai_adaptation_required"
|
||||||
|
assert per_unit[0]["skip_reason"] == "router_short_circuit"
|
||||||
|
assert per_unit[0]["ai_called"] is False
|
||||||
|
assert per_unit[1]["route_hint"] == "direct_render"
|
||||||
|
assert per_unit[1]["skip_reason"] == "not_provisional"
|
||||||
@@ -44,3 +44,43 @@ def test_ai_fallback_budget_and_circuit_defaults_locked() -> None:
|
|||||||
s = Settings()
|
s = Settings()
|
||||||
assert s.ai_fallback_budget_per_run == 10
|
assert s.ai_fallback_budget_per_run == 10
|
||||||
assert s.ai_fallback_circuit_breaker_threshold == 5
|
assert s.ai_fallback_circuit_breaker_threshold == 5
|
||||||
|
|
||||||
|
|
||||||
|
# IMP-46 u5 — auto-cache opt-in setting default lock.
|
||||||
|
# The CLI flag ``--auto-cache`` in src/phase_z2_pipeline.py mutates this
|
||||||
|
# setting at parse time. The default MUST stay OFF so the dual-gate
|
||||||
|
# contract (visual_check_passed AND user_approved) survives without an
|
||||||
|
# explicit operator opt-in.
|
||||||
|
|
||||||
|
|
||||||
|
def test_ai_fallback_auto_cache_default_off() -> None:
|
||||||
|
s = Settings()
|
||||||
|
assert s.ai_fallback_auto_cache is False, (
|
||||||
|
"IMP-46 u5 auto-cache MUST default OFF; the dual-gate contract "
|
||||||
|
"(visual_check_passed AND user_approved) survives without an "
|
||||||
|
"explicit --auto-cache opt-in."
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
# IMP-47B u1 — reject route hint policy correction.
|
||||||
|
# Prior to 2026-05-21 the reject V4 label routed to ``design_reference_only``
|
||||||
|
# (no AI). The user policy correction (issue #76) reroutes reject to
|
||||||
|
# ``ai_adaptation_required`` so the rank-1 reject frame is kept and the AI
|
||||||
|
# re-maps MDX content into its declared slots. Activation remains gated by
|
||||||
|
# ``ai_fallback_enabled`` (default OFF preserves the normal-path AI=0
|
||||||
|
# contract — see test_ai_fallback_master_flag_default_off above).
|
||||||
|
|
||||||
|
|
||||||
|
def test_reject_route_hint_routes_to_ai_adaptation() -> None:
|
||||||
|
from src.phase_z2_pipeline import _IMP05_ROUTE_HINTS, _imp05_route_hint
|
||||||
|
|
||||||
|
assert _IMP05_ROUTE_HINTS["reject"] == "ai_adaptation_required", (
|
||||||
|
"IMP-47B u1: reject must route to ai_adaptation_required so the "
|
||||||
|
"rank-1 reject frame is retained and AI re-maps MDX content into "
|
||||||
|
"its slots (frame auto-swap forbidden)."
|
||||||
|
)
|
||||||
|
assert _imp05_route_hint("reject") == "ai_adaptation_required"
|
||||||
|
# Sibling routes unchanged — guardrail against accidental drift.
|
||||||
|
assert _imp05_route_hint("use_as_is") == "direct_render"
|
||||||
|
assert _imp05_route_hint("light_edit") == "deterministic_minor_adjustment"
|
||||||
|
assert _imp05_route_hint("restructure") == "ai_adaptation_required"
|
||||||
|
|||||||
@@ -237,10 +237,10 @@ def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps
|
|||||||
by_rank = {c["rank"]: c for c in candidates}
|
by_rank = {c["rank"]: c for c in candidates}
|
||||||
assert set(by_rank.keys()) == {1, 2, 3}
|
assert set(by_rank.keys()) == {1, 2, 3}
|
||||||
|
|
||||||
# rank-1 reject — non-direct, design_reference_only
|
# rank-1 reject — non-direct, ai_adaptation_required (IMP-47B u1 policy correction)
|
||||||
assert by_rank[1]["v4_label"] == "reject"
|
assert by_rank[1]["v4_label"] == "reject"
|
||||||
assert by_rank[1]["filtered_for_direct_execution"] is True
|
assert by_rank[1]["filtered_for_direct_execution"] is True
|
||||||
assert by_rank[1]["route_hint"] == "design_reference_only"
|
assert by_rank[1]["route_hint"] == "ai_adaptation_required"
|
||||||
|
|
||||||
# rank-2 restructure — non-direct, ai_adaptation_required
|
# rank-2 restructure — non-direct, ai_adaptation_required
|
||||||
assert by_rank[2]["v4_label"] == "restructure"
|
assert by_rank[2]["v4_label"] == "restructure"
|
||||||
|
|||||||
Reference in New Issue
Block a user