Files
C.E.L_Slide_test2/tests/phase_z2_ai_fallback/test_step12.py
kyeongmin 1186ad8ae2 feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)
- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook
- u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage)
- u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks)
- u12: coverage_invariant guard
- u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 00:19:10 +09:00

503 lines
20 KiB
Python

"""IMP-33 u8 + IMP-46 u4 + IMP-47B u2 — Step 12 AI repair wiring tests.
Covers the structural gates layered on top of the u7 router:
* IMP-30 provisional gate (only provisional units may invoke AI repair)
* Catch-all ``route_not_ai_adaptation:<hint>`` skip — every route_hint
other than ``ai_adaptation_required`` (including the legacy
``design_reference_only`` hint) falls through to a single uniform skip
after the IMP-47B u2 removal of the bespoke reject gate.
Plus the record-shape contract returned for downstream Step 12 artifacts
and the IMP-46 u4 structural cache key + fingerprints contract.
"""
from __future__ import annotations
import hashlib
import json
from dataclasses import dataclass, field
from typing import Any
from unittest.mock import MagicMock
from src.phase_z2_ai_fallback import step12 as step12_mod
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
@dataclass
class FakeUnit:
label: str | None
provisional: bool
frame_template_id: str = "tmpl"
frame_id: str = "fid"
source_section_ids: list[str] = field(default_factory=lambda: ["s1"])
raw_content: str = "raw"
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = ""
zone_position: str = ""
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 0
_ROUTE_HINTS: dict[str | None, str | None] = {
"use_as_is": "direct_render",
"light_edit": "deterministic_minor_adjustment",
"restructure": "ai_adaptation_required",
"reject": "design_reference_only",
None: None,
}
def _route_for_label(label: str | None) -> str | None:
return _ROUTE_HINTS.get(label)
def _get_contract(_tid: str) -> dict[str, Any]:
return {"frame_id": "fid", "payload": {"builder_options": {}}, "sub_zones": []}
def _frame_visual(_tid: str) -> str:
return "<html></html>"
def _call(
units: list[FakeUnit],
*,
route_ai_fallback: Any | None = None,
**overrides: Any,
) -> list[dict]:
if route_ai_fallback is not None:
step12_mod.route_ai_fallback = route_ai_fallback # type: ignore[assignment]
kwargs: dict[str, Any] = dict(
route_for_label=_route_for_label,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
kwargs.update(overrides)
return step12_mod.gather_step12_ai_repair_proposals(units, **kwargs)
def _ai_unit(**overrides: Any) -> FakeUnit:
"""Construct an AI-eligible FakeUnit (provisional + restructure) with sane defaults."""
base: dict[str, Any] = dict(
label="restructure",
provisional=True,
frame_template_id="tmpl_x",
frame_id="fid_123",
source_section_ids=["02-1"],
layout_preset="single_column",
zone_position="zone_a",
source_shape="bullet",
h3_count=3,
char_count=200,
cardinality=5,
)
base.update(overrides)
return FakeUnit(**base)
def test_non_provisional_unit_is_skipped_without_ai_call(monkeypatch):
router = MagicMock()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="restructure", provisional=False)]
records = _call(units)
assert records[0]["ai_called"] is False
assert records[0]["skip_reason"] == "not_provisional"
assert records[0]["provisional"] is False
router.assert_not_called()
def test_design_reference_route_falls_through_to_route_not_ai_adaptation(monkeypatch):
"""IMP-47B u2 — the bespoke 'design_reference_only_no_ai' skip is gone.
Any non-AI-adaptation route_hint (including the legacy
``design_reference_only`` hint exercised here via the local test mapping
of ``reject``) now flows into the single ``route_not_ai_adaptation:<hint>``
catch-all. Production reject routing is exercised by u9.
"""
router = MagicMock()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="reject", provisional=True)]
records = _call(units)
assert records[0]["ai_called"] is False
assert records[0]["skip_reason"] == "route_not_ai_adaptation:design_reference_only"
assert records[0]["route_hint"] == "design_reference_only"
router.assert_not_called()
def test_non_ai_route_is_skipped_with_reason(monkeypatch):
router = MagicMock()
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="light_edit", provisional=True)]
records = _call(units)
assert records[0]["ai_called"] is False
assert records[0]["skip_reason"] == (
"route_not_ai_adaptation:deterministic_minor_adjustment"
)
router.assert_not_called()
def test_router_short_circuit_returns_none_skip_reason(monkeypatch):
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="restructure", provisional=True)]
records = _call(units)
assert records[0]["ai_called"] is False
assert records[0]["skip_reason"] == "router_short_circuit"
assert records[0]["proposal"] is None
router.assert_called_once()
def test_ai_adaptation_call_records_proposal(monkeypatch):
proposal = AiFallbackProposal(
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
payload={"slots": {"s_text": "x"}},
rationale="r",
)
router = MagicMock(return_value=proposal)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="restructure", provisional=True)]
records = _call(units)
rec = records[0]
assert rec["ai_called"] is True
assert rec["skip_reason"] is None
assert rec["proposal"]["proposal_kind"] == "partial_overrides"
router.assert_called_once()
kwargs = router.call_args.kwargs
assert kwargs["v4_result"]["route"] == "ai_adaptation_required"
assert kwargs["v4_result"]["label"] == "restructure"
def test_router_exception_is_captured_per_record(monkeypatch):
router = MagicMock(side_effect=RuntimeError("transient_boom"))
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [FakeUnit(label="restructure", provisional=True)]
records = _call(units)
rec = records[0]
assert rec["ai_called"] is True
assert rec["proposal"] is None
assert rec["error"] == "RuntimeError: transient_boom"
router.assert_called_once()
def test_mixed_units_each_independently_classified(monkeypatch):
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [
FakeUnit(label="use_as_is", provisional=False),
FakeUnit(label="reject", provisional=True),
FakeUnit(label="restructure", provisional=True),
FakeUnit(label="restructure", provisional=False),
]
records = _call(units)
assert [r["skip_reason"] for r in records] == [
"not_provisional",
"route_not_ai_adaptation:design_reference_only",
"router_short_circuit",
"not_provisional",
]
assert router.call_count == 1
# ---------------------------------------------------------------------------
# IMP-46 u4 — structural cache key + fingerprints
# ---------------------------------------------------------------------------
def test_cache_key_format_is_frame_id_plus_sha256(monkeypatch):
"""cache_key is '{frame_id}::{64-hex-sha256}', NOT template_id + section_ids."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
cache_key = router.call_args.kwargs["cache_key"]
assert "::" in cache_key
frame_part, _, signature_part = cache_key.partition("::")
assert frame_part == "fid_123"
assert len(signature_part) == 64
assert all(c in "0123456789abcdef" for c in signature_part)
# The legacy "template_id::sorted(section_ids)" form is gone.
assert "tmpl_x" not in cache_key
assert "02-1" not in cache_key
def test_cache_key_invariant_to_section_id_changes(monkeypatch):
"""Same structural axes → same cache_key regardless of source_section_ids."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(source_section_ids=["02-1"])])
key_a = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(source_section_ids=["05-2", "07-3"])])
key_b = router.call_args.kwargs["cache_key"]
assert key_a == key_b
def test_cache_key_invariant_to_template_id_changes(monkeypatch):
"""frame_template_id is NOT part of the structural signature (frame_id is)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(frame_template_id="tmpl_x")])
key_a = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(frame_template_id="tmpl_OTHER")])
key_b = router.call_args.kwargs["cache_key"]
assert key_a == key_b
def test_cache_key_changes_when_any_signature_axis_changes(monkeypatch):
"""Flipping any of the 7 unit-derived signature axes mutates cache_key."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
base_key = router.call_args.kwargs["cache_key"]
perturbations: dict[str, Any] = {
"frame_id": "fid_OTHER",
"label": "use_as_is", # v4_label axis change; still routed to AI via _ROUTE_HINTS? No.
# ↑ "use_as_is" → "direct_render" → would skip. Use another ai-adaptation-mapped label.
# Replace with frame_id-only diff to keep route stable. Drop this entry below.
}
# Rebuild perturbations restricted to axes that don't change routing.
perturbations = {
"frame_id": "fid_OTHER",
"layout_preset": "two_column",
"zone_position": "zone_b",
"source_shape": "paragraph",
"h3_count": 7,
"char_count": 500, # bucket boundary crossing (151-400 → 401-1000)
"cardinality": 4,
}
for axis, value in perturbations.items():
router.reset_mock()
_call([_ai_unit(**{axis: value})])
new_key = router.call_args.kwargs["cache_key"]
assert new_key != base_key, f"signature axis {axis!r} did not mutate cache_key"
def test_char_count_bucket_collapses_within_bucket(monkeypatch):
"""Different char_counts in the SAME bucket → identical cache_key."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(char_count=160)])
key_low = router.call_args.kwargs["cache_key"]
router.reset_mock()
_call([_ai_unit(char_count=399)])
key_high = router.call_args.kwargs["cache_key"]
assert key_low == key_high # both fall in "151-400"
router.reset_mock()
_call([_ai_unit(char_count=401)])
key_overflow = router.call_args.kwargs["cache_key"]
assert key_overflow != key_low # crossed into "401-1000"
def test_fingerprints_attached_to_ai_record(monkeypatch):
"""AI-called records expose contract_sha + partial_sha + catalog_sha."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
contract = {"frame_id": "fid", "payload": {"x": 1}, "sub_zones": []}
partial = {"some": "partial", "deeper": [1, 2, 3]}
catalog_value = "deadbeef" * 8
recs = _call(
[_ai_unit()],
get_contract_fn=lambda _t: contract,
figma_partial_loader=lambda _t: partial,
catalog_sha_loader=lambda: catalog_value,
)
fps = recs[0]["fingerprints"]
assert isinstance(fps, dict)
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
assert all(isinstance(v, str) for v in fps.values())
assert fps["catalog_sha"] == catalog_value
# contract_sha and partial_sha must be deterministic SHA256 over JSON-sorted payloads.
expected_contract = hashlib.sha256(
json.dumps(contract, sort_keys=True, ensure_ascii=False).encode("utf-8")
).hexdigest()
expected_partial = hashlib.sha256(
json.dumps(partial, sort_keys=True, ensure_ascii=False).encode("utf-8")
).hexdigest()
assert fps["contract_sha"] == expected_contract
assert fps["partial_sha"] == expected_partial
def test_fingerprints_default_catalog_sha_is_empty_string(monkeypatch):
"""No catalog_sha_loader → catalog_sha defaults to '' (sentinel, not missing key)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
fps = recs[0]["fingerprints"]
assert fps["catalog_sha"] == ""
# contract_sha + partial_sha keys still present (always 3 keys).
assert set(fps.keys()) == {"contract_sha", "partial_sha", "catalog_sha"}
def test_fingerprints_change_when_contract_changes(monkeypatch):
"""Different frame_contract → different contract_sha, partial_sha unchanged."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
fps_a = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 1})[0]["fingerprints"]
fps_b = _call([_ai_unit()], get_contract_fn=lambda _t: {"a": 2})[0]["fingerprints"]
assert fps_a["contract_sha"] != fps_b["contract_sha"]
assert fps_a["partial_sha"] == fps_b["partial_sha"]
def test_fingerprints_change_when_partial_changes(monkeypatch):
"""Different figma_partial_json → different partial_sha, contract_sha unchanged."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
fps_a = _call(
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 1}
)[0]["fingerprints"]
fps_b = _call(
[_ai_unit()], figma_partial_loader=lambda _t: {"p": 2}
)[0]["fingerprints"]
assert fps_a["partial_sha"] != fps_b["partial_sha"]
assert fps_a["contract_sha"] == fps_b["contract_sha"]
def test_v4_result_cardinality_uses_unit_value(monkeypatch):
"""v4_result['cardinality'] mirrors the unit's cardinality (no longer hardcoded None)."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit(cardinality=7)])
assert router.call_args.kwargs["v4_result"]["cardinality"] == 7
router.reset_mock()
_call([_ai_unit(cardinality=None)])
assert router.call_args.kwargs["v4_result"]["cardinality"] is None
def test_skipped_records_have_no_cache_key_or_fingerprints(monkeypatch):
"""Non-AI-eligible records keep cache_key and fingerprints as None."""
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
units = [
FakeUnit(label="restructure", provisional=False),
FakeUnit(label="reject", provisional=True),
FakeUnit(label="light_edit", provisional=True),
]
recs = _call(units)
for rec in recs:
assert rec["cache_key"] is None
assert rec["fingerprints"] is None
def test_catalog_sha_loader_called_once_per_gather(monkeypatch):
"""catalog_sha is computed once per gather call, not per unit."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
loader = MagicMock(return_value="cafefeed" * 8)
_call(
[_ai_unit(), _ai_unit(frame_id="fid_other"), _ai_unit(frame_id="fid_third")],
catalog_sha_loader=loader,
)
loader.assert_called_once()
def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch):
"""Record schema includes the IMP-46 u4 cache_key + fingerprints fields."""
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
units = [FakeUnit(label="reject", provisional=True)]
rec = _call(units)[0]
assert set(rec.keys()) == {
"unit_index",
"source_section_ids",
"frame_template_id",
"label",
"route_hint",
"provisional",
"ai_called",
"skip_reason",
"proposal",
"error",
"cache_key",
"fingerprints",
}
def test_cache_key_is_compatible_with_cache_parse_key(monkeypatch):
"""cache_key produced here must round-trip through cache.py's _parse_key."""
from src.phase_z2_ai_fallback.cache import KEY_DELIMITER, _parse_key
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
_call([_ai_unit()])
cache_key = router.call_args.kwargs["cache_key"]
parsed = _parse_key(cache_key)
assert parsed is not None
frame_id, signature_hash = parsed
assert frame_id == "fid_123"
assert len(signature_hash) == 64
assert KEY_DELIMITER not in signature_hash
# ---------------------------------------------------------------------------
# IMP-47B u9 — Step 12 reject eligibility + normal-path AI=0 regression
# ---------------------------------------------------------------------------
# Locks the end-to-end Step 12 contract against the production route helper
# `_imp05_route_hint`. The local `_ROUTE_HINTS` mapping above intentionally
# preserves the legacy ``reject -> design_reference_only`` form to exercise
# the catch-all fall-through branch; u9 instead drives gather with the real
# production map (post-u1 flip) so reject provisional units reach the router
# and normal-path labels stay AI=0.
def test_production_reject_route_reaches_router_when_provisional(monkeypatch):
"""Post-u1, provisional reject units must reach ``route_ai_fallback``."""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
records = step12_mod.gather_step12_ai_repair_proposals(
[FakeUnit(label="reject", provisional=True)],
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["skip_reason"] == "router_short_circuit"
assert records[0]["ai_called"] is False
router.assert_called_once()
def test_production_normal_route_labels_never_reach_router(monkeypatch):
"""Normal-path labels stay AI=0 even when the unit is provisional."""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
units = [
FakeUnit(label="use_as_is", provisional=True),
FakeUnit(label="light_edit", provisional=True),
FakeUnit(label=None, provisional=True),
]
records = step12_mod.gather_step12_ai_repair_proposals(
units,
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["skip_reason"] == "route_not_ai_adaptation:direct_render"
assert records[1]["skip_reason"] == (
"route_not_ai_adaptation:deterministic_minor_adjustment"
)
assert records[2]["skip_reason"] == "route_not_ai_adaptation:None"
router.assert_not_called()
def test_production_non_provisional_reject_skipped_before_route_gate(monkeypatch):
"""The provisional gate fires before the route gate (production routing).
Even with reject routed to ``ai_adaptation_required`` (post-u1), a
non-provisional reject unit must short-circuit at ``not_provisional``
without ever consulting ``route_for_label`` for an AI dispatch.
"""
from src.phase_z2_pipeline import _imp05_route_hint
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
records = step12_mod.gather_step12_ai_repair_proposals(
[FakeUnit(label="reject", provisional=False)],
route_for_label=_imp05_route_hint,
get_contract_fn=_get_contract,
frame_visual_loader=_frame_visual,
)
assert records[0]["skip_reason"] == "not_provisional"
assert records[0]["ai_called"] is False
router.assert_not_called()