feat(#61): IMP-33 AI fallback scaffolding (u1~u11, flag default OFF)
Frame-aware AI fallback module scaffolded under src/phase_z2_ai_fallback/ with master flag ai_fallback_enabled=False; normal-path AI call count remains 0. AI output constrained to builder_options_patch / partial_overrides / slot_mapping_proposal; MDX / frame_id / raw HTML / raw CSS mutations rejected at schema layer. IMP-46 cache gate (cache.py) raises AiFallbackCacheGateError unless visual_check_passed AND user_approved. Step 12 wires AI repair after IMP-30 provisional payload only; Step 17 stays blocked behind IMP-34 / IMP-35 prerequisites. AST isolation guard forbids fallback package from importing Phase Q / Kei / pipeline runtime symbols. Docs IMP-17 / IMP-31 bound to runtime module surface via 11-row structural test pin (test_docs_sync.py) so drift fails CI. Tests: 116 fallback / 161 phase_z2 regression / 526 scoped full sweep all passing. Existing pre-IMP-33 fixture issue in scripts/test_phase_t_* remains untouched (out of scope). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
0
tests/phase_z2_ai_fallback/__init__.py
Normal file
0
tests/phase_z2_ai_fallback/__init__.py
Normal file
153
tests/phase_z2_ai_fallback/test_ast_isolation.py
Normal file
153
tests/phase_z2_ai_fallback/test_ast_isolation.py
Normal file
@@ -0,0 +1,153 @@
|
||||
"""IMP-33 u10 — AST isolation guard for the AI fallback package.
|
||||
|
||||
Structural defence: parse every ``*.py`` file under
|
||||
``src/phase_z2_ai_fallback/`` and assert that none of them imports a
|
||||
Phase Q runtime module, the Kei API client, or any ``phase_z2_*`` runtime
|
||||
module (e.g. ``phase_z2_pipeline``). Even if a future patch wires such a
|
||||
module by accident, this AST scan catches it before runtime and protects
|
||||
the PZ-1 invariant (normal-path AI call count = 0).
|
||||
|
||||
Allowed imports inside the fallback package:
|
||||
|
||||
* Standard library modules.
|
||||
* ``anthropic`` (u4 client) and ``pydantic`` (u2 schema).
|
||||
* ``src.config`` (u1 settings — single source of truth for policy knobs).
|
||||
* Other modules inside ``src.phase_z2_ai_fallback`` (intra-package).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
import pathlib
|
||||
|
||||
import pytest
|
||||
|
||||
PACKAGE_ROOT = pathlib.Path(__file__).resolve().parents[2] / "src" / "phase_z2_ai_fallback"
|
||||
|
||||
_ALLOWED_SRC_PREFIXES: tuple[str, ...] = (
|
||||
"src.config",
|
||||
"src.phase_z2_ai_fallback",
|
||||
)
|
||||
|
||||
_ALLOWED_TOP_LEVEL: frozenset[str] = frozenset(
|
||||
{
|
||||
"anthropic",
|
||||
"pydantic",
|
||||
"__future__",
|
||||
"ast",
|
||||
"dataclasses",
|
||||
"enum",
|
||||
"json",
|
||||
"pathlib",
|
||||
"random",
|
||||
"time",
|
||||
"typing",
|
||||
}
|
||||
)
|
||||
|
||||
_FORBIDDEN_PHASE_Q_MODULES: frozenset[str] = frozenset(
|
||||
{
|
||||
"src.pipeline",
|
||||
"src.pipeline_v2",
|
||||
"src.block_assembler",
|
||||
"src.block_assembler_b2",
|
||||
"src.block_matcher_tfidf",
|
||||
"src.block_reference",
|
||||
"src.block_search",
|
||||
"src.block_selector",
|
||||
"src.content_editor",
|
||||
"src.design_director",
|
||||
"src.html_generator",
|
||||
"src.html_validator",
|
||||
"src.renderer",
|
||||
"src.mdx_normalizer",
|
||||
"src.fit_verifier",
|
||||
"src.slide_measurer",
|
||||
"src.space_allocator",
|
||||
"src.kei_client",
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
def _module_files() -> list[pathlib.Path]:
|
||||
return sorted(p for p in PACKAGE_ROOT.glob("*.py") if p.name != "__pycache__")
|
||||
|
||||
|
||||
def _imported_names(tree: ast.AST) -> list[str]:
|
||||
names: list[str] = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
for alias in node.names:
|
||||
names.append(alias.name)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
if node.module is not None:
|
||||
names.append(node.module)
|
||||
return names
|
||||
|
||||
|
||||
def _parse(path: pathlib.Path) -> ast.AST:
|
||||
return ast.parse(path.read_text(encoding="utf-8"), filename=str(path))
|
||||
|
||||
|
||||
def _is_allowed(name: str) -> bool:
|
||||
for prefix in _ALLOWED_SRC_PREFIXES:
|
||||
if name == prefix or name.startswith(prefix + "."):
|
||||
return True
|
||||
top = name.split(".", 1)[0]
|
||||
return top in _ALLOWED_TOP_LEVEL
|
||||
|
||||
|
||||
def test_fallback_package_root_exists() -> None:
|
||||
assert PACKAGE_ROOT.is_dir(), (
|
||||
f"fallback package root not found at {PACKAGE_ROOT!s}; module path "
|
||||
"is locked by IMP-31-GATE-AUDIT (src/phase_z2_ai_fallback/)."
|
||||
)
|
||||
files = _module_files()
|
||||
assert files, f"no .py modules found under {PACKAGE_ROOT!s}"
|
||||
|
||||
|
||||
def test_fallback_package_imports_are_whitelisted() -> None:
|
||||
violations: list[tuple[str, str]] = []
|
||||
for path in _module_files():
|
||||
for name in _imported_names(_parse(path)):
|
||||
if not _is_allowed(name):
|
||||
violations.append((path.name, name))
|
||||
assert not violations, (
|
||||
"fallback package imports outside the IMP-33 whitelist "
|
||||
f"(Phase Q / Kei / phase_z2_* runtime forbidden): {violations}"
|
||||
)
|
||||
|
||||
|
||||
@pytest.mark.parametrize("forbidden_module", sorted(_FORBIDDEN_PHASE_Q_MODULES))
|
||||
def test_fallback_package_forbids_phase_q_and_kei_imports(forbidden_module: str) -> None:
|
||||
for path in _module_files():
|
||||
for name in _imported_names(_parse(path)):
|
||||
top2 = ".".join(name.split(".")[:2])
|
||||
assert top2 != forbidden_module and name != forbidden_module, (
|
||||
f"{path.name} imports forbidden module {name!r}; "
|
||||
f"{forbidden_module!r} is a Phase Q / Kei runtime module and "
|
||||
"must not be reachable from the AI fallback package."
|
||||
)
|
||||
|
||||
|
||||
def test_fallback_package_forbids_phase_z2_pipeline_imports() -> None:
|
||||
for path in _module_files():
|
||||
for name in _imported_names(_parse(path)):
|
||||
assert not name.startswith("src.phase_z2_pipeline"), (
|
||||
f"{path.name} imports {name!r}; the Phase Z2 pipeline runtime "
|
||||
"module must not be reachable from the AI fallback package "
|
||||
"(PZ-1: normal-path AI=0)."
|
||||
)
|
||||
|
||||
|
||||
def test_fallback_package_forbids_other_phase_z2_runtime_imports() -> None:
|
||||
violations: list[tuple[str, str]] = []
|
||||
for path in _module_files():
|
||||
for name in _imported_names(_parse(path)):
|
||||
if name.startswith("src.phase_z2_") and not name.startswith(
|
||||
"src.phase_z2_ai_fallback"
|
||||
):
|
||||
violations.append((path.name, name))
|
||||
assert not violations, (
|
||||
"fallback package imports another phase_z2_* runtime module; "
|
||||
f"violations: {violations}"
|
||||
)
|
||||
90
tests/phase_z2_ai_fallback/test_cache.py
Normal file
90
tests/phase_z2_ai_fallback/test_cache.py
Normal file
@@ -0,0 +1,90 @@
|
||||
"""IMP-33 u6 — AI fallback cache gate tests.
|
||||
|
||||
Verifies the IMP-46 gate contract:
|
||||
* ``read_proposal`` is a stub (returns None until IMP-46).
|
||||
* ``save_proposal`` enforces both gates before any write attempt.
|
||||
* Storage itself raises NotImplementedError (IMP-46 marker).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_ai_fallback.cache import (
|
||||
AiFallbackCacheGateError,
|
||||
read_proposal,
|
||||
save_proposal,
|
||||
)
|
||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||
|
||||
|
||||
def _proposal() -> AiFallbackProposal:
|
||||
return AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
payload={"item_parser": "bullet_v2"},
|
||||
rationale="u6-test",
|
||||
)
|
||||
|
||||
|
||||
def test_read_proposal_returns_none_for_any_key():
|
||||
assert read_proposal("frame=foo|cardinality=3") is None
|
||||
|
||||
|
||||
def test_read_proposal_rejects_empty_key():
|
||||
with pytest.raises(ValueError):
|
||||
read_proposal("")
|
||||
|
||||
|
||||
def test_save_rejects_when_visual_check_failed():
|
||||
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||
save_proposal(
|
||||
"k", _proposal(), visual_check_passed=False, user_approved=True
|
||||
)
|
||||
assert "visual_check_passed" in str(exc.value)
|
||||
|
||||
|
||||
def test_save_rejects_when_user_not_approved():
|
||||
with pytest.raises(AiFallbackCacheGateError) as exc:
|
||||
save_proposal(
|
||||
"k", _proposal(), visual_check_passed=True, user_approved=False
|
||||
)
|
||||
assert "user_approved" in str(exc.value)
|
||||
|
||||
|
||||
def test_save_rejects_when_both_gates_false():
|
||||
with pytest.raises(AiFallbackCacheGateError):
|
||||
save_proposal(
|
||||
"k", _proposal(), visual_check_passed=False, user_approved=False
|
||||
)
|
||||
|
||||
|
||||
def test_save_raises_not_implemented_when_both_gates_pass():
|
||||
with pytest.raises(NotImplementedError) as exc:
|
||||
save_proposal(
|
||||
"k", _proposal(), visual_check_passed=True, user_approved=True
|
||||
)
|
||||
assert "IMP-46" in str(exc.value)
|
||||
|
||||
|
||||
def test_save_rejects_empty_key():
|
||||
with pytest.raises(ValueError):
|
||||
save_proposal(
|
||||
"", _proposal(), visual_check_passed=True, user_approved=True
|
||||
)
|
||||
|
||||
|
||||
def test_save_rejects_non_proposal_object():
|
||||
with pytest.raises(TypeError):
|
||||
save_proposal(
|
||||
"k",
|
||||
{"proposal_kind": "builder_options_patch"}, # type: ignore[arg-type]
|
||||
visual_check_passed=True,
|
||||
user_approved=True,
|
||||
)
|
||||
|
||||
|
||||
def test_gate_error_is_not_notimplementederror():
|
||||
with pytest.raises(AiFallbackCacheGateError):
|
||||
save_proposal(
|
||||
"k", _proposal(), visual_check_passed=False, user_approved=True
|
||||
)
|
||||
assert not issubclass(AiFallbackCacheGateError, NotImplementedError)
|
||||
151
tests/phase_z2_ai_fallback/test_client_mock.py
Normal file
151
tests/phase_z2_ai_fallback/test_client_mock.py
Normal file
@@ -0,0 +1,151 @@
|
||||
"""IMP-33 u4 — fallback client mock tests.
|
||||
|
||||
Scope (Stage 2 plan, u4):
|
||||
- Success path returns a validated ``AiFallbackProposal`` (u2 schema).
|
||||
- Transient errors (timeout / connection / 429 / 5xx) are retried.
|
||||
- Retries exhausted → last transient error propagates + consec-fail bumps.
|
||||
- Non-transient errors are NOT retried.
|
||||
- Per-run budget exhaustion raises ``AiFallbackBudgetExceeded``.
|
||||
- Circuit breaker opens after consecutive-failure threshold reached.
|
||||
- Policy values are sourced from ``settings`` (no inline literals).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import anthropic
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from src.config import settings
|
||||
from src.phase_z2_ai_fallback.client import (
|
||||
AiFallbackBudgetExceeded,
|
||||
AiFallbackCircuitOpen,
|
||||
AiFallbackClient,
|
||||
)
|
||||
|
||||
|
||||
class _NonTransient(Exception):
|
||||
"""Stand-in for any anthropic error not in the transient whitelist."""
|
||||
|
||||
|
||||
def _ok_response() -> SimpleNamespace:
|
||||
block = SimpleNamespace(
|
||||
text=json.dumps(
|
||||
{
|
||||
"proposal_kind": "builder_options_patch",
|
||||
"payload": {"k": 1},
|
||||
"rationale": "ok",
|
||||
}
|
||||
)
|
||||
)
|
||||
return SimpleNamespace(content=[block])
|
||||
|
||||
|
||||
def _timeout_err() -> anthropic.APITimeoutError:
|
||||
return anthropic.APITimeoutError(request=httpx.Request("POST", "https://x"))
|
||||
|
||||
|
||||
def _connection_err() -> anthropic.APIConnectionError:
|
||||
return anthropic.APIConnectionError(request=httpx.Request("POST", "https://x"))
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _no_real_sleep(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(time, "sleep", lambda _s: None)
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _restore_settings():
|
||||
snapshot = settings.model_dump()
|
||||
yield
|
||||
for key, value in snapshot.items():
|
||||
setattr(settings, key, value)
|
||||
|
||||
|
||||
def _client_with(side_effect=None, return_value=None) -> AiFallbackClient:
|
||||
fake = MagicMock()
|
||||
if side_effect is not None:
|
||||
fake.messages.create.side_effect = side_effect
|
||||
else:
|
||||
fake.messages.create.return_value = return_value or _ok_response()
|
||||
return AiFallbackClient(client=fake)
|
||||
|
||||
|
||||
def test_success_returns_validated_proposal() -> None:
|
||||
out = _client_with().request_proposal({"system": "s", "user": "u"})
|
||||
assert out.proposal_kind.value == "builder_options_patch"
|
||||
assert out.payload == {"k": 1}
|
||||
|
||||
|
||||
def test_call_uses_settings_model() -> None:
|
||||
fake = MagicMock()
|
||||
fake.messages.create.return_value = _ok_response()
|
||||
AiFallbackClient(client=fake).request_proposal({"system": "s", "user": "u"})
|
||||
kwargs = fake.messages.create.call_args.kwargs
|
||||
assert kwargs["model"] == settings.ai_fallback_model
|
||||
|
||||
|
||||
def test_transient_retries_then_succeeds() -> None:
|
||||
fake = MagicMock()
|
||||
fake.messages.create.side_effect = [_timeout_err(), _connection_err(), _ok_response()]
|
||||
AiFallbackClient(client=fake).request_proposal({"system": "s", "user": "u"})
|
||||
assert fake.messages.create.call_count == 3
|
||||
|
||||
|
||||
def test_retries_exhausted_raises_last_transient(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_max_retries", 1)
|
||||
fake = MagicMock()
|
||||
fake.messages.create.side_effect = [_timeout_err(), _timeout_err()]
|
||||
c = AiFallbackClient(client=fake)
|
||||
with pytest.raises(anthropic.APITimeoutError):
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
assert fake.messages.create.call_count == 2
|
||||
assert c._consecutive_failures == 1
|
||||
|
||||
|
||||
def test_non_transient_not_retried() -> None:
|
||||
fake = MagicMock()
|
||||
fake.messages.create.side_effect = _NonTransient("boom")
|
||||
c = AiFallbackClient(client=fake)
|
||||
with pytest.raises(_NonTransient):
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
assert fake.messages.create.call_count == 1
|
||||
|
||||
|
||||
def test_budget_exceeded(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_budget_per_run", 1)
|
||||
c = _client_with()
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
with pytest.raises(AiFallbackBudgetExceeded):
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
|
||||
|
||||
def test_circuit_breaker_opens(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_circuit_breaker_threshold", 1)
|
||||
monkeypatch.setattr(settings, "ai_fallback_max_retries", 0)
|
||||
fake = MagicMock()
|
||||
fake.messages.create.side_effect = _timeout_err()
|
||||
c = AiFallbackClient(client=fake)
|
||||
with pytest.raises(anthropic.APITimeoutError):
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
with pytest.raises(AiFallbackCircuitOpen):
|
||||
c.request_proposal({"system": "s", "user": "u"})
|
||||
|
||||
|
||||
def test_backoff_uses_settings(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
"""Sleep delay must be derived from settings (no inline literals)."""
|
||||
monkeypatch.setattr(settings, "ai_fallback_max_retries", 1)
|
||||
monkeypatch.setattr(settings, "ai_fallback_backoff_base_s", 0.25)
|
||||
monkeypatch.setattr(settings, "ai_fallback_backoff_cap_s", 0.5)
|
||||
monkeypatch.setattr(settings, "ai_fallback_backoff_jitter", 0.0)
|
||||
sleeps: list[float] = []
|
||||
monkeypatch.setattr(time, "sleep", lambda s: sleeps.append(s))
|
||||
fake = MagicMock()
|
||||
fake.messages.create.side_effect = [_timeout_err(), _ok_response()]
|
||||
AiFallbackClient(client=fake).request_proposal({"system": "s", "user": "u"})
|
||||
# attempt 0 transient → sleep(min(cap, base * 2**0) + jitter==0) = 0.25
|
||||
assert sleeps == [0.25]
|
||||
61
tests/phase_z2_ai_fallback/test_docs_sync.py
Normal file
61
tests/phase_z2_ai_fallback/test_docs_sync.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""IMP-33 u11 — docs sync verification.
|
||||
|
||||
Verifies that the binding architecture docs reference the IMP-33 runtime
|
||||
module surface introduced by u1~u10. Scope is intentionally narrow per the
|
||||
Stage 2 plan: module path, Step 12 entry, Step 17 entry, cascade order, and
|
||||
the IMP-46 cache gate. Failure here means the docs and the code have
|
||||
drifted — fix the docs (or the code) before merging.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
DOCS_ROOT = Path(__file__).resolve().parents[2] / "docs" / "architecture"
|
||||
CARVE_OUT_DOC = DOCS_ROOT / "IMP-17-CARVE-OUT.md"
|
||||
GATE_AUDIT_DOC = DOCS_ROOT / "IMP-31-GATE-AUDIT.md"
|
||||
|
||||
|
||||
def _read(doc: Path) -> str:
|
||||
assert doc.is_file(), f"binding doc missing: {doc}"
|
||||
return doc.read_text(encoding="utf-8")
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"needle",
|
||||
[
|
||||
# Module path lock.
|
||||
"src/phase_z2_ai_fallback/",
|
||||
# Step 12 entry.
|
||||
"gather_step12_ai_repair_proposals",
|
||||
# Step 17 entry + blocked-reason sentinel.
|
||||
"gather_step17_ai_repair_proposals",
|
||||
"step17_ai_blocked_imp_34_35_prerequisites_missing",
|
||||
# Cascade order single source of truth.
|
||||
"OVERFLOW_CASCADE_ORDER",
|
||||
"(DETERMINISTIC, POPUP, AI_REPAIR, USER_OVERRIDE)",
|
||||
# IMP-46 cache gate.
|
||||
"visual_check_passed",
|
||||
"user_approved",
|
||||
"AiFallbackCacheGateError",
|
||||
# PZ-1 normal-path AI=0 invariant.
|
||||
"ai_fallback_enabled",
|
||||
],
|
||||
)
|
||||
def test_carve_out_doc_references_runtime_surface(needle: str) -> None:
|
||||
assert needle in _read(CARVE_OUT_DOC), (
|
||||
f"IMP-17-CARVE-OUT.md missing binding reference: {needle!r}"
|
||||
)
|
||||
|
||||
|
||||
def test_gate_audit_reflects_scaffolded_module() -> None:
|
||||
body = _read(GATE_AUDIT_DOC)
|
||||
assert "scaffolded under IMP-33" in body, (
|
||||
"IMP-31-GATE-AUDIT.md must record that the fallback module path is "
|
||||
"scaffolded (not 'not created this cycle')."
|
||||
)
|
||||
assert "ai_fallback_enabled" in body, (
|
||||
"IMP-31-GATE-AUDIT.md must record the flag default that keeps PZ-1 "
|
||||
"(normal-path AI=0) intact while the 3-condition gate is open."
|
||||
)
|
||||
100
tests/phase_z2_ai_fallback/test_prompts.py
Normal file
100
tests/phase_z2_ai_fallback/test_prompts.py
Normal file
@@ -0,0 +1,100 @@
|
||||
"""IMP-33 u3 — fallback prompt builder tests.
|
||||
|
||||
Scope (Stage 2 plan, u3):
|
||||
- Prompt is built only when V4 route == 'ai_adaptation_required'.
|
||||
- System prompt declares MDX READ-ONLY and pins the u2 whitelist.
|
||||
- System prompt forbids the u2 forbidden kinds + frame_id swap.
|
||||
- User payload carries all 6 declared inputs and labels MDX READ_ONLY.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_ai_fallback.prompts import (
|
||||
SYSTEM_PROMPT,
|
||||
V4_ROUTE_AI_ADAPTATION,
|
||||
build_ai_fallback_prompt,
|
||||
)
|
||||
from src.phase_z2_ai_fallback.schema import FORBIDDEN_KINDS, ProposalKind
|
||||
|
||||
|
||||
def _v4(route: str = V4_ROUTE_AI_ADAPTATION) -> dict:
|
||||
return {
|
||||
"route": route,
|
||||
"cardinality": {"strict": 3},
|
||||
"label": "restructure",
|
||||
"frame_id": 1171281190,
|
||||
"rank": 1,
|
||||
}
|
||||
|
||||
|
||||
def _inputs(route: str = V4_ROUTE_AI_ADAPTATION) -> dict:
|
||||
return {
|
||||
"v4_result": _v4(route),
|
||||
"frame_contract": {"template_id": "three_parallel_requirements"},
|
||||
"frame_visual_html": "<section class='f13b'/>",
|
||||
"figma_partial_json": {"nodes": []},
|
||||
"internal_region": {"id": "region_top", "bbox": [0, 0, 1200, 320]},
|
||||
"mdx_text": "# 대목차\n- 항목 1\n- 항목 2\n- 항목 3",
|
||||
}
|
||||
|
||||
|
||||
def test_system_prompt_declares_mdx_read_only() -> None:
|
||||
assert "READ-ONLY" in SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_system_prompt_lists_all_whitelisted_kinds() -> None:
|
||||
for kind in ProposalKind:
|
||||
assert kind.value in SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_system_prompt_forbids_all_forbidden_kinds() -> None:
|
||||
for forbidden in FORBIDDEN_KINDS:
|
||||
assert forbidden in SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_system_prompt_locks_frame_id_swap() -> None:
|
||||
assert "frame_id" in SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_build_prompt_returns_system_and_user() -> None:
|
||||
prompt = build_ai_fallback_prompt(**_inputs())
|
||||
assert set(prompt.keys()) == {"system", "user"}
|
||||
assert prompt["system"] == SYSTEM_PROMPT
|
||||
|
||||
|
||||
def test_user_payload_carries_all_inputs_and_marks_mdx_read_only() -> None:
|
||||
prompt = build_ai_fallback_prompt(**_inputs())
|
||||
payload = json.loads(prompt["user"])
|
||||
assert payload["v4"]["route"] == V4_ROUTE_AI_ADAPTATION
|
||||
assert payload["v4"]["cardinality"] == {"strict": 3}
|
||||
assert payload["v4"]["frame_id"] == 1171281190
|
||||
assert payload["frame_contract"]["template_id"] == "three_parallel_requirements"
|
||||
assert payload["frame_visual_html"] == "<section class='f13b'/>"
|
||||
assert payload["figma_partial_json"] == {"nodes": []}
|
||||
assert payload["internal_region"]["id"] == "region_top"
|
||||
assert "mdx_text_READ_ONLY" in payload
|
||||
assert payload["mdx_text_READ_ONLY"].startswith("# 대목차")
|
||||
assert "mdx_text" not in payload # only the READ_ONLY key, not a writable alias
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"route", ["direct_render", "deterministic_minor_adjustment", "design_reference_only", None]
|
||||
)
|
||||
def test_non_ai_route_rejected(route) -> None:
|
||||
inputs = _inputs(route=route) if route is not None else _inputs()
|
||||
if route is None:
|
||||
inputs["v4_result"].pop("route")
|
||||
with pytest.raises(ValueError, match=V4_ROUTE_AI_ADAPTATION):
|
||||
build_ai_fallback_prompt(**inputs)
|
||||
|
||||
|
||||
def test_cardinality_signature_alias_accepted() -> None:
|
||||
"""Some V4 callers expose ``cardinality_signature``; both keys must resolve."""
|
||||
inputs = _inputs()
|
||||
inputs["v4_result"].pop("cardinality")
|
||||
inputs["v4_result"]["cardinality_signature"] = {"strict": 4}
|
||||
payload = json.loads(build_ai_fallback_prompt(**inputs)["user"])
|
||||
assert payload["v4"]["cardinality"] == {"strict": 4}
|
||||
156
tests/phase_z2_ai_fallback/test_router.py
Normal file
156
tests/phase_z2_ai_fallback/test_router.py
Normal file
@@ -0,0 +1,156 @@
|
||||
"""IMP-33 u7 — AI fallback router tests.
|
||||
|
||||
Scope (Stage 2 plan, u7):
|
||||
- flag-off gate returns None and does NOT touch the client / prompt
|
||||
- route-mismatch gate returns None and does NOT touch the client / prompt
|
||||
- cache-hit short-circuits the client and still re-validates against the
|
||||
current frame contract (defence-in-depth)
|
||||
- cache-miss calls the client and validates the returned proposal
|
||||
- validation errors propagate
|
||||
- budget / circuit exceptions from u4 propagate
|
||||
- router never imports ``save_proposal`` (cache save is caller-driven
|
||||
after visual_check + user_approved per u6 IMP-46 gate)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_ai_fallback import AiFallbackProposal, ProposalKind
|
||||
from src.phase_z2_ai_fallback import router as router_mod
|
||||
from src.phase_z2_ai_fallback.client import (
|
||||
AiFallbackBudgetExceeded,
|
||||
AiFallbackCircuitOpen,
|
||||
AiFallbackClient,
|
||||
)
|
||||
from src.phase_z2_ai_fallback.router import route_ai_fallback
|
||||
from src.phase_z2_ai_fallback.validate import AiFallbackValidationError
|
||||
|
||||
|
||||
_FRAME_CONTRACT = {
|
||||
"frame_id": 1171281190,
|
||||
"sub_zones": [{"id": "pillar_1", "accepts": ["text_block"]}],
|
||||
"payload": {"builder_options": {"item_parser": "pillar_item"}},
|
||||
}
|
||||
_REGION = {"id": "zone_top.region_a"}
|
||||
_V4_AI = {
|
||||
"route": "ai_adaptation_required",
|
||||
"cardinality": "many",
|
||||
"frame_id": 1171281190,
|
||||
"rank": 1,
|
||||
}
|
||||
_V4_NOT_AI = {"route": "light_edit", "cardinality": "many"}
|
||||
|
||||
|
||||
def _make_proposal(
|
||||
kind: ProposalKind = ProposalKind.PARTIAL_OVERRIDES,
|
||||
payload: dict | None = None,
|
||||
) -> AiFallbackProposal:
|
||||
return AiFallbackProposal(
|
||||
proposal_kind=kind,
|
||||
payload=payload if payload is not None else {"slots": {"pillar_1": "a"}},
|
||||
)
|
||||
|
||||
|
||||
def _call_kwargs() -> dict:
|
||||
return dict(
|
||||
cache_key="frame:1171281190:cardinality:many",
|
||||
v4_result=_V4_AI,
|
||||
frame_contract=_FRAME_CONTRACT,
|
||||
frame_visual_html="<div></div>",
|
||||
figma_partial_json={},
|
||||
internal_region=_REGION,
|
||||
mdx_text="# example\n- a\n- b",
|
||||
)
|
||||
|
||||
|
||||
def test_router_returns_none_when_flag_off(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", False)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
result = route_ai_fallback(**_call_kwargs(), client=client)
|
||||
assert result is None
|
||||
client.request_proposal.assert_not_called()
|
||||
|
||||
|
||||
def test_router_returns_none_when_route_not_ai_adaptation(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
kwargs = _call_kwargs()
|
||||
kwargs["v4_result"] = _V4_NOT_AI
|
||||
result = route_ai_fallback(**kwargs, client=client)
|
||||
assert result is None
|
||||
client.request_proposal.assert_not_called()
|
||||
|
||||
|
||||
def test_router_returns_cached_when_cache_hit(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
cached = _make_proposal()
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: cached)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
result = route_ai_fallback(**_call_kwargs(), client=client)
|
||||
assert result is cached
|
||||
client.request_proposal.assert_not_called()
|
||||
|
||||
|
||||
def test_router_validates_cached_proposal(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
bad_cached = AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
payload={"unknown_key": "x"},
|
||||
)
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: bad_cached)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
with pytest.raises(AiFallbackValidationError):
|
||||
route_ai_fallback(**_call_kwargs(), client=client)
|
||||
client.request_proposal.assert_not_called()
|
||||
|
||||
|
||||
def test_router_calls_client_and_returns_validated_proposal(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: None)
|
||||
proposal = _make_proposal()
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
client.request_proposal.return_value = proposal
|
||||
result = route_ai_fallback(**_call_kwargs(), client=client)
|
||||
assert result is proposal
|
||||
client.request_proposal.assert_called_once()
|
||||
sent_prompt = client.request_proposal.call_args.args[0]
|
||||
assert set(sent_prompt.keys()) == {"system", "user"}
|
||||
|
||||
|
||||
def test_router_propagates_validation_error(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: None)
|
||||
bad = AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
payload={"unknown_key": "x"},
|
||||
)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
client.request_proposal.return_value = bad
|
||||
with pytest.raises(AiFallbackValidationError):
|
||||
route_ai_fallback(**_call_kwargs(), client=client)
|
||||
|
||||
|
||||
def test_router_propagates_budget_exceeded(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: None)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
client.request_proposal.side_effect = AiFallbackBudgetExceeded("over")
|
||||
with pytest.raises(AiFallbackBudgetExceeded):
|
||||
route_ai_fallback(**_call_kwargs(), client=client)
|
||||
|
||||
|
||||
def test_router_propagates_circuit_open(monkeypatch):
|
||||
monkeypatch.setattr(router_mod.settings, "ai_fallback_enabled", True)
|
||||
monkeypatch.setattr(router_mod, "read_proposal", lambda key: None)
|
||||
client = MagicMock(spec=AiFallbackClient)
|
||||
client.request_proposal.side_effect = AiFallbackCircuitOpen("tripped")
|
||||
with pytest.raises(AiFallbackCircuitOpen):
|
||||
route_ai_fallback(**_call_kwargs(), client=client)
|
||||
|
||||
|
||||
def test_router_does_not_import_save_proposal():
|
||||
"""Cache save is caller-driven AFTER visual_check + user_approved (u6 IMP-46
|
||||
gate); structurally guaranteed by NOT importing save_proposal in the router."""
|
||||
assert not hasattr(router_mod, "save_proposal")
|
||||
46
tests/phase_z2_ai_fallback/test_schema.py
Normal file
46
tests/phase_z2_ai_fallback/test_schema.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""IMP-33 u2 — AiFallbackProposal schema tests.
|
||||
|
||||
Scope (Stage 2 plan, u2):
|
||||
- Whitelisted proposal_kind values are accepted.
|
||||
- Forbidden output forms are rejected: mdx_text / frame_id_change / raw_html / raw_css.
|
||||
- extra fields outside the declared schema are rejected (MDX read-only signal).
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
from pydantic import ValidationError
|
||||
|
||||
from src.phase_z2_ai_fallback import AiFallbackProposal, ProposalKind
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"kind_value",
|
||||
[
|
||||
"builder_options_patch",
|
||||
"partial_overrides",
|
||||
"slot_mapping_proposal",
|
||||
],
|
||||
)
|
||||
def test_whitelisted_proposal_kinds_accepted(kind_value: str) -> None:
|
||||
proposal = AiFallbackProposal(proposal_kind=kind_value)
|
||||
assert proposal.proposal_kind == ProposalKind(kind_value)
|
||||
|
||||
|
||||
@pytest.mark.parametrize(
|
||||
"forbidden",
|
||||
["mdx_text", "frame_id_change", "raw_html", "raw_css"],
|
||||
)
|
||||
def test_forbidden_proposal_kinds_rejected(forbidden: str) -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
AiFallbackProposal(proposal_kind=forbidden)
|
||||
|
||||
|
||||
def test_unknown_proposal_kind_rejected() -> None:
|
||||
with pytest.raises(ValidationError):
|
||||
AiFallbackProposal(proposal_kind="something_else")
|
||||
|
||||
|
||||
def test_extra_fields_rejected() -> None:
|
||||
"""`extra=forbid` keeps the AI from smuggling raw_html/mdx_text alongside a valid kind."""
|
||||
with pytest.raises(ValidationError):
|
||||
AiFallbackProposal(proposal_kind="partial_overrides", raw_html="<div/>")
|
||||
193
tests/phase_z2_ai_fallback/test_step12.py
Normal file
193
tests/phase_z2_ai_fallback/test_step12.py
Normal file
@@ -0,0 +1,193 @@
|
||||
"""IMP-33 u8 — Step 12 AI repair wiring tests.
|
||||
|
||||
Covers the two structural gates layered on top of the u7 router:
|
||||
* IMP-30 provisional gate (only provisional units may invoke AI repair)
|
||||
* Reject gate (route_hint=design_reference_only NEVER calls AI)
|
||||
Plus the record-shape contract returned for downstream Step 12 artifacts.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
from src.phase_z2_ai_fallback import step12 as step12_mod
|
||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeUnit:
|
||||
label: str | None
|
||||
provisional: bool
|
||||
frame_template_id: str = "tmpl"
|
||||
frame_id: str = "fid"
|
||||
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
|
||||
raw_content: str = "raw"
|
||||
v4_rank: int | None = 1
|
||||
|
||||
|
||||
_ROUTE_HINTS: dict[str | None, str | None] = {
|
||||
"use_as_is": "direct_render",
|
||||
"light_edit": "deterministic_minor_adjustment",
|
||||
"restructure": "ai_adaptation_required",
|
||||
"reject": "design_reference_only",
|
||||
None: None,
|
||||
}
|
||||
|
||||
|
||||
def _route_for_label(label: str | None) -> str | None:
|
||||
return _ROUTE_HINTS.get(label)
|
||||
|
||||
|
||||
def _get_contract(_tid: str) -> dict[str, Any]:
|
||||
return {"frame_id": "fid", "payload": {"builder_options": {}}, "sub_zones": []}
|
||||
|
||||
|
||||
def _frame_visual(_tid: str) -> str:
|
||||
return "<html></html>"
|
||||
|
||||
|
||||
def _call(
|
||||
units: list[FakeUnit],
|
||||
*,
|
||||
route_ai_fallback: Any | None = None,
|
||||
**overrides: Any,
|
||||
) -> list[dict]:
|
||||
if route_ai_fallback is not None:
|
||||
step12_mod.route_ai_fallback = route_ai_fallback # type: ignore[assignment]
|
||||
kwargs: dict[str, Any] = dict(
|
||||
route_for_label=_route_for_label,
|
||||
get_contract_fn=_get_contract,
|
||||
frame_visual_loader=_frame_visual,
|
||||
)
|
||||
kwargs.update(overrides)
|
||||
return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs)
|
||||
|
||||
|
||||
def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
|
||||
router = MagicMock()
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="restructure", provisional=False)]
|
||||
records = _call(units)
|
||||
assert records[0]["ai_called"] is False
|
||||
assert records[0]["skip_reason"] == "not_provisional"
|
||||
assert records[0]["provisional"] is False
|
||||
router.assert_not_called()
|
||||
|
||||
|
||||
def test_reject_route_is_skipped_without_ai_call(monkeypatch):
|
||||
router = MagicMock()
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="reject", provisional=True)]
|
||||
records = _call(units)
|
||||
assert records[0]["ai_called"] is False
|
||||
assert records[0]["skip_reason"] == "design_reference_only_no_ai"
|
||||
assert records[0]["route_hint"] == "design_reference_only"
|
||||
router.assert_not_called()
|
||||
|
||||
|
||||
def test_non_ai_route_is_skipped_with_reason(monkeypatch):
|
||||
router = MagicMock()
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="light_edit", provisional=True)]
|
||||
records = _call(units)
|
||||
assert records[0]["ai_called"] is False
|
||||
assert records[0]["skip_reason"] == (
|
||||
"route_not_ai_adaptation:deterministic_minor_adjustment"
|
||||
)
|
||||
router.assert_not_called()
|
||||
|
||||
|
||||
def test_router_short_circuit_returns_none_skip_reason(monkeypatch):
|
||||
router = MagicMock(return_value=None)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
records = _call(units)
|
||||
assert records[0]["ai_called"] is False
|
||||
assert records[0]["skip_reason"] == "router_short_circuit"
|
||||
assert records[0]["proposal"] is None
|
||||
router.assert_called_once()
|
||||
|
||||
|
||||
def test_ai_adaptation_call_records_proposal(monkeypatch):
|
||||
proposal = AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
||||
payload={"slots": {"s_text": "x"}},
|
||||
rationale="r",
|
||||
)
|
||||
router = MagicMock(return_value=proposal)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
records = _call(units)
|
||||
rec = records[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["skip_reason"] is None
|
||||
assert rec["proposal"]["proposal_kind"] == "partial_overrides"
|
||||
router.assert_called_once()
|
||||
kwargs = router.call_args.kwargs
|
||||
assert kwargs["v4_result"]["route"] == "ai_adaptation_required"
|
||||
assert kwargs["v4_result"]["label"] == "restructure"
|
||||
|
||||
|
||||
def test_router_exception_is_captured_per_record(monkeypatch):
|
||||
router = MagicMock(side_effect=RuntimeError("transient_boom"))
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
records = _call(units)
|
||||
rec = records[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["proposal"] is None
|
||||
assert rec["error"] == "RuntimeError: transient_boom"
|
||||
router.assert_called_once()
|
||||
|
||||
|
||||
def test_mixed_units_each_independently_classified(monkeypatch):
|
||||
router = MagicMock(return_value=None)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [
|
||||
FakeUnit(label="use_as_is", provisional=False),
|
||||
FakeUnit(label="reject", provisional=True),
|
||||
FakeUnit(label="restructure", provisional=True),
|
||||
FakeUnit(label="restructure", provisional=False),
|
||||
]
|
||||
records = _call(units)
|
||||
assert [r["skip_reason"] for r in records] == [
|
||||
"not_provisional",
|
||||
"design_reference_only_no_ai",
|
||||
"router_short_circuit",
|
||||
"not_provisional",
|
||||
]
|
||||
assert router.call_count == 1
|
||||
|
||||
|
||||
def test_cache_key_includes_template_and_section_ids(monkeypatch):
|
||||
router = MagicMock(return_value=None)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
units = [
|
||||
FakeUnit(
|
||||
label="restructure",
|
||||
provisional=True,
|
||||
frame_template_id="tmpl_abc",
|
||||
source_section_ids=["02-1", "02-2"],
|
||||
)
|
||||
]
|
||||
_call(units)
|
||||
assert router.call_args.kwargs["cache_key"] == "tmpl_abc::02-1,02-2"
|
||||
|
||||
|
||||
def test_record_shape_contract_is_stable(monkeypatch):
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
|
||||
units = [FakeUnit(label="reject", provisional=True)]
|
||||
rec = _call(units)[0]
|
||||
assert set(rec.keys()) == {
|
||||
"unit_index",
|
||||
"source_section_ids",
|
||||
"frame_template_id",
|
||||
"label",
|
||||
"route_hint",
|
||||
"provisional",
|
||||
"ai_called",
|
||||
"skip_reason",
|
||||
"proposal",
|
||||
"error",
|
||||
}
|
||||
208
tests/phase_z2_ai_fallback/test_step17.py
Normal file
208
tests/phase_z2_ai_fallback/test_step17.py
Normal file
@@ -0,0 +1,208 @@
|
||||
"""IMP-33 u9 — Step 17 AI repair wiring tests (BLOCKED until IMP-34 + IMP-35).
|
||||
|
||||
Covers:
|
||||
* :data:`OVERFLOW_CASCADE_ORDER` canonical order (4 stages).
|
||||
* :class:`OverflowCascadeStage` member values.
|
||||
* :data:`STEP17_AI_REPAIR_BLOCKED_REASON` constant value.
|
||||
* :func:`gather_step17_ai_repair_proposals` BLOCKED contract — every unit
|
||||
returns ``ai_called=False`` + ``skip_reason=STEP17_AI_REPAIR_BLOCKED_REASON``
|
||||
+ ``proposal=None`` regardless of provisional / label / route_hint.
|
||||
* Structural guarantee — the u9 module does NOT import
|
||||
:func:`src.phase_z2_ai_fallback.router.route_ai_fallback` or the
|
||||
``anthropic`` SDK. Step 17 AI repair stays structurally blocked.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import ast
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
from src.phase_z2_ai_fallback import step17 as step17_mod
|
||||
from src.phase_z2_ai_fallback.step17 import (
|
||||
OVERFLOW_CASCADE_ORDER,
|
||||
STEP17_AI_REPAIR_BLOCKED_REASON,
|
||||
OverflowCascadeStage,
|
||||
gather_step17_ai_repair_proposals,
|
||||
)
|
||||
|
||||
|
||||
@dataclass
|
||||
class FakeUnit:
|
||||
label: str | None
|
||||
provisional: bool
|
||||
frame_template_id: str = "tmpl"
|
||||
frame_id: str = "fid"
|
||||
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
|
||||
raw_content: str = "raw"
|
||||
v4_rank: int | None = 1
|
||||
|
||||
|
||||
_ROUTE_HINTS: dict[str | None, str | None] = {
|
||||
"use_as_is": "direct_render",
|
||||
"light_edit": "deterministic_minor_adjustment",
|
||||
"restructure": "ai_adaptation_required",
|
||||
"reject": "design_reference_only",
|
||||
None: None,
|
||||
}
|
||||
|
||||
|
||||
def _route_for_label(label: str | None) -> str | None:
|
||||
return _ROUTE_HINTS.get(label)
|
||||
|
||||
|
||||
# ─── Stage / order constants ─────────────────────────────────────────
|
||||
|
||||
|
||||
def test_overflow_cascade_order_is_canonical():
|
||||
assert OVERFLOW_CASCADE_ORDER == (
|
||||
OverflowCascadeStage.DETERMINISTIC,
|
||||
OverflowCascadeStage.POPUP,
|
||||
OverflowCascadeStage.AI_REPAIR,
|
||||
OverflowCascadeStage.USER_OVERRIDE,
|
||||
)
|
||||
|
||||
|
||||
def test_overflow_cascade_stage_string_values():
|
||||
assert OverflowCascadeStage.DETERMINISTIC.value == "deterministic"
|
||||
assert OverflowCascadeStage.POPUP.value == "popup"
|
||||
assert OverflowCascadeStage.AI_REPAIR.value == "ai_repair"
|
||||
assert OverflowCascadeStage.USER_OVERRIDE.value == "user_override"
|
||||
|
||||
|
||||
def test_step17_blocked_reason_constant_value():
|
||||
assert (
|
||||
STEP17_AI_REPAIR_BLOCKED_REASON
|
||||
== "step17_ai_blocked_imp_34_35_prerequisites_missing"
|
||||
)
|
||||
|
||||
|
||||
# ─── BLOCKED contract: every unit returns blocked record ─────────────
|
||||
|
||||
|
||||
def test_gather_returns_one_record_per_unit():
|
||||
units = [
|
||||
FakeUnit(label="restructure", provisional=True),
|
||||
FakeUnit(label="reject", provisional=False),
|
||||
FakeUnit(label="use_as_is", provisional=True),
|
||||
]
|
||||
records = gather_step17_ai_repair_proposals(units, route_for_label=_route_for_label)
|
||||
assert len(records) == 3
|
||||
|
||||
|
||||
def test_gather_records_blocked_skip_reason():
|
||||
"""Every record must carry the IMP-34/IMP-35 prerequisite block reason."""
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
records = gather_step17_ai_repair_proposals(units, route_for_label=_route_for_label)
|
||||
assert records[0]["skip_reason"] == STEP17_AI_REPAIR_BLOCKED_REASON
|
||||
|
||||
|
||||
def test_gather_blocks_even_when_route_is_ai_adaptation_required():
|
||||
"""Provisional + ai_adaptation_required must NOT bypass the u9 block.
|
||||
|
||||
Stage 2 contract: AI repair at Step 17 is blocked behind IMP-34 + IMP-35
|
||||
regardless of V4 route hint. Only u8 (Step 12) is allowed to invoke AI today.
|
||||
"""
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
record = gather_step17_ai_repair_proposals(
|
||||
units, route_for_label=_route_for_label
|
||||
)[0]
|
||||
assert record["route_hint"] == "ai_adaptation_required"
|
||||
assert record["ai_called"] is False
|
||||
assert record["proposal"] is None
|
||||
assert record["skip_reason"] == STEP17_AI_REPAIR_BLOCKED_REASON
|
||||
|
||||
|
||||
def test_gather_blocks_reject_units_too():
|
||||
"""Reject units (design_reference_only) are also blocked at u9 — same reason."""
|
||||
units = [FakeUnit(label="reject", provisional=False)]
|
||||
record = gather_step17_ai_repair_proposals(
|
||||
units, route_for_label=_route_for_label
|
||||
)[0]
|
||||
assert record["ai_called"] is False
|
||||
assert record["skip_reason"] == STEP17_AI_REPAIR_BLOCKED_REASON
|
||||
|
||||
|
||||
def test_gather_records_proposal_none_and_no_error():
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
record = gather_step17_ai_repair_proposals(
|
||||
units, route_for_label=_route_for_label
|
||||
)[0]
|
||||
assert record["proposal"] is None
|
||||
assert record["error"] is None
|
||||
|
||||
|
||||
def test_gather_records_cascade_stage_is_ai_repair():
|
||||
units = [FakeUnit(label="restructure", provisional=True)]
|
||||
record = gather_step17_ai_repair_proposals(
|
||||
units, route_for_label=_route_for_label
|
||||
)[0]
|
||||
assert record["cascade_stage"] == OverflowCascadeStage.AI_REPAIR.value
|
||||
|
||||
|
||||
def test_gather_preserves_unit_metadata():
|
||||
units = [
|
||||
FakeUnit(
|
||||
label="restructure",
|
||||
provisional=True,
|
||||
frame_template_id="frame_05_overview",
|
||||
source_section_ids=["s1", "s2"],
|
||||
)
|
||||
]
|
||||
record = gather_step17_ai_repair_proposals(
|
||||
units, route_for_label=_route_for_label
|
||||
)[0]
|
||||
assert record["unit_index"] == 0
|
||||
assert record["frame_template_id"] == "frame_05_overview"
|
||||
assert record["source_section_ids"] == ["s1", "s2"]
|
||||
assert record["label"] == "restructure"
|
||||
assert record["provisional"] is True
|
||||
|
||||
|
||||
def test_gather_with_empty_units_returns_empty_list():
|
||||
records = gather_step17_ai_repair_proposals([], route_for_label=_route_for_label)
|
||||
assert records == []
|
||||
|
||||
|
||||
# ─── Structural guarantee: u9 must NOT import route_ai_fallback / anthropic ─
|
||||
|
||||
|
||||
def _u9_imports() -> list[str]:
|
||||
src_path = Path(step17_mod.__file__)
|
||||
tree = ast.parse(src_path.read_text(encoding="utf-8"))
|
||||
imports: list[str] = []
|
||||
for node in ast.walk(tree):
|
||||
if isinstance(node, ast.Import):
|
||||
imports.extend(alias.name for alias in node.names)
|
||||
elif isinstance(node, ast.ImportFrom):
|
||||
module = node.module or ""
|
||||
for alias in node.names:
|
||||
imports.append(f"{module}.{alias.name}")
|
||||
return imports
|
||||
|
||||
|
||||
def test_step17_module_does_not_import_route_ai_fallback():
|
||||
"""u9 must not be able to reach the u7 router — structural block."""
|
||||
imports = _u9_imports()
|
||||
forbidden = {
|
||||
"src.phase_z2_ai_fallback.router.route_ai_fallback",
|
||||
"src.phase_z2_ai_fallback.router",
|
||||
}
|
||||
assert not any(imp in forbidden for imp in imports), imports
|
||||
assert not hasattr(step17_mod, "route_ai_fallback")
|
||||
|
||||
|
||||
def test_step17_module_does_not_import_anthropic():
|
||||
"""u9 must not reach the Anthropic SDK directly — AI=0 in this layer."""
|
||||
imports = _u9_imports()
|
||||
leaked = [imp for imp in imports if imp.split(".", 1)[0] == "anthropic"]
|
||||
assert leaked == [], leaked
|
||||
|
||||
|
||||
def test_step17_module_does_not_import_ai_fallback_client():
|
||||
"""u9 must not instantiate the u4 client either."""
|
||||
imports = _u9_imports()
|
||||
forbidden_prefixes = ("src.phase_z2_ai_fallback.client",)
|
||||
leaked = [
|
||||
imp for imp in imports if imp.startswith(forbidden_prefixes)
|
||||
]
|
||||
assert leaked == [], leaked
|
||||
144
tests/phase_z2_ai_fallback/test_validate.py
Normal file
144
tests/phase_z2_ai_fallback/test_validate.py
Normal file
@@ -0,0 +1,144 @@
|
||||
"""IMP-33 u5 — AI fallback validator tests.
|
||||
|
||||
Scope (Stage 2 plan, u5):
|
||||
- schema re-validation (defence-in-depth)
|
||||
- builder whitelist (BUILDER_OPTIONS_PATCH)
|
||||
- dropped-slot guard (PARTIAL_OVERRIDES / SLOT_MAPPING_PROPOSAL must keep
|
||||
every declared sub_zone slot present)
|
||||
- frame-swap guard (no payload.frame_id mutation; V4 rank-1 protected)
|
||||
- Internal Region containment (payload.region_id must match declared id)
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_ai_fallback import AiFallbackProposal, ProposalKind
|
||||
from src.phase_z2_ai_fallback.validate import (
|
||||
AiFallbackValidationError,
|
||||
validate_proposal,
|
||||
)
|
||||
|
||||
|
||||
_FRAME_CONTRACT = {
|
||||
"frame_id": 1171281190,
|
||||
"sub_zones": [
|
||||
{"id": "pillar_1", "accepts": ["text_block"]},
|
||||
{"id": "pillar_2", "accepts": ["text_block"]},
|
||||
{"id": "pillar_3", "accepts": ["text_block"]},
|
||||
],
|
||||
"payload": {
|
||||
"builder_options": {
|
||||
"item_parser": "pillar_item",
|
||||
"array_root": "pillars",
|
||||
"role_field": "color_class",
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
_REGION = {"id": "zone_top.region_a"}
|
||||
|
||||
|
||||
def _make(kind: ProposalKind, payload: dict) -> AiFallbackProposal:
|
||||
return AiFallbackProposal(proposal_kind=kind, payload=payload)
|
||||
|
||||
|
||||
def test_builder_options_patch_accepts_whitelisted_keys() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
{"item_parser": "alt_pillar_item"},
|
||||
)
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_builder_options_patch_rejects_unknown_key() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
{"item_parser": "x", "padding_px": 10},
|
||||
)
|
||||
with pytest.raises(AiFallbackValidationError, match="builder whitelist"):
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_partial_overrides_requires_all_declared_slots() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{"slots": {"pillar_1": "a", "pillar_2": "b"}},
|
||||
)
|
||||
with pytest.raises(AiFallbackValidationError, match="dropped-slot guard"):
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_partial_overrides_with_all_slots_passes() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{"slots": {"pillar_1": "a", "pillar_2": "b", "pillar_3": "c"}},
|
||||
)
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_slot_mapping_proposal_requires_slots_dict() -> None:
|
||||
proposal = _make(ProposalKind.SLOT_MAPPING_PROPOSAL, {"slots": []})
|
||||
with pytest.raises(AiFallbackValidationError, match="dropped-slot guard"):
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_frame_swap_guard_rejects_mismatched_frame_id() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.BUILDER_OPTIONS_PATCH,
|
||||
{"frame_id": 9999, "item_parser": "x"},
|
||||
)
|
||||
with pytest.raises(AiFallbackValidationError, match="frame-swap guard"):
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_frame_swap_guard_accepts_matching_frame_id() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{
|
||||
"frame_id": 1171281190,
|
||||
"slots": {"pillar_1": "a", "pillar_2": "b", "pillar_3": "c"},
|
||||
},
|
||||
)
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
|
||||
|
||||
def test_internal_region_containment_rejects_mismatch() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{
|
||||
"slots": {"pillar_1": "a", "pillar_2": "b", "pillar_3": "c"},
|
||||
"region_id": "zone_bottom.region_x",
|
||||
},
|
||||
)
|
||||
with pytest.raises(AiFallbackValidationError, match="Internal Region"):
|
||||
validate_proposal(
|
||||
proposal,
|
||||
frame_contract=_FRAME_CONTRACT,
|
||||
internal_region=_REGION,
|
||||
)
|
||||
|
||||
|
||||
def test_internal_region_containment_accepts_match() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{
|
||||
"slots": {"pillar_1": "a", "pillar_2": "b", "pillar_3": "c"},
|
||||
"region_id": "zone_top.region_a",
|
||||
},
|
||||
)
|
||||
validate_proposal(
|
||||
proposal,
|
||||
frame_contract=_FRAME_CONTRACT,
|
||||
internal_region=_REGION,
|
||||
)
|
||||
|
||||
|
||||
def test_internal_region_check_skipped_when_no_region_supplied() -> None:
|
||||
proposal = _make(
|
||||
ProposalKind.PARTIAL_OVERRIDES,
|
||||
{
|
||||
"slots": {"pillar_1": "a", "pillar_2": "b", "pillar_3": "c"},
|
||||
"region_id": "zone_top.region_a",
|
||||
},
|
||||
)
|
||||
validate_proposal(proposal, frame_contract=_FRAME_CONTRACT)
|
||||
46
tests/test_phase_z2_ai_fallback_config.py
Normal file
46
tests/test_phase_z2_ai_fallback_config.py
Normal file
@@ -0,0 +1,46 @@
|
||||
"""IMP-33 u1 — AI fallback Settings defaults (locked).
|
||||
|
||||
These defaults are the binding contract from Stage 2 plan (per-unit u1):
|
||||
- ai_fallback_enabled = False (master flag OFF; fallback path only)
|
||||
- ai_fallback_model = "claude-opus-4-6-20250415"
|
||||
- ai_fallback_timeout_s = 60.0
|
||||
- ai_fallback_max_retries = 3
|
||||
- ai_fallback_backoff_base_s = 1.0
|
||||
- ai_fallback_backoff_cap_s = 8.0
|
||||
- ai_fallback_backoff_jitter = 0.3
|
||||
- ai_fallback_budget_per_run = 10
|
||||
- ai_fallback_circuit_breaker_threshold = 5
|
||||
|
||||
Downstream u4 (client) MUST source timeout/retry/backoff/budget/circuit from
|
||||
Settings; inline literals are forbidden by Stage 2 plan.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from src.config import Settings
|
||||
|
||||
|
||||
def test_ai_fallback_master_flag_default_off() -> None:
|
||||
s = Settings()
|
||||
assert s.ai_fallback_enabled is False, (
|
||||
"AI fallback master flag MUST default OFF (normal path AI=0 contract)."
|
||||
)
|
||||
|
||||
|
||||
def test_ai_fallback_model_default_locked() -> None:
|
||||
s = Settings()
|
||||
assert s.ai_fallback_model == "claude-opus-4-6-20250415"
|
||||
|
||||
|
||||
def test_ai_fallback_retry_timeout_backoff_defaults_locked() -> None:
|
||||
s = Settings()
|
||||
assert s.ai_fallback_timeout_s == 60.0
|
||||
assert s.ai_fallback_max_retries == 3
|
||||
assert s.ai_fallback_backoff_base_s == 1.0
|
||||
assert s.ai_fallback_backoff_cap_s == 8.0
|
||||
assert s.ai_fallback_backoff_jitter == 0.3
|
||||
|
||||
|
||||
def test_ai_fallback_budget_and_circuit_defaults_locked() -> None:
|
||||
s = Settings()
|
||||
assert s.ai_fallback_budget_per_run == 10
|
||||
assert s.ai_fallback_circuit_breaker_threshold == 5
|
||||
Reference in New Issue
Block a user