Files
C.E.L_Slide_test2/tests/test_imp47b_step12_ai_wiring.py
kyeongmin 1186ad8ae2 feat(#76): IMP-47B reject-as-AI-adaptation activation (u1~u13 backend + tests)
- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook
- u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage)
- u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks)
- u12: coverage_invariant guard
- u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-22 00:19:10 +09:00

155 lines
6.0 KiB
Python

"""IMP-47B u4 + u6 — Step 12 AI repair wiring + audit artifact tests.
Scope (this slice):
* u4 — Helper ``_run_step12_ai_repair`` (src/phase_z2_pipeline.py)
wires the pipeline's local route-hint helper (``_imp05_route_hint``),
the frame contract loader (``get_contract``), and a
templates/phase_z2/families partial reader
(``_load_frame_partial_html``) into
``gather_step12_ai_repair_proposals``.
* u6 — The gather records flow into ``_write_step_artifact`` under
``step12_ai_repair.json``. The audit shape must stay
JSON-serialisable (no Pydantic / dataclass leakage) so the artifact
write never raises on real runs.
The router short-circuits when ``settings.ai_fallback_enabled`` is
False (default), so AI=0 for non-AI-route units stays a structural
guarantee. Synthetic naming mirrors tests/test_imp47b_override_provisional.py
(MOCK_ prefix; no real catalog template_id / frame_id leakage).
u5 (PARTIAL_OVERRIDES apply), u7 (coverage invariant), and u8
(slide_status surfacing) are out of scope for this unit.
"""
from __future__ import annotations
import json
from dataclasses import dataclass, field
from src.phase_z2_pipeline import (
_load_frame_partial_html,
_run_step12_ai_repair,
_write_step_artifact,
)
@dataclass
class _StubUnit:
label: str | None
provisional: bool
frame_template_id: str = "MOCK_T_x"
frame_id: str = "MOCK_F_x"
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
raw_content: str = "MOCK_raw"
v4_rank: int | None = 1
cardinality: int | None = None
layout_preset: str = ""
zone_position: str = ""
source_shape: str = "paragraph"
h3_count: int = 0
char_count: int = 0
# ─── Case 1 : mixed units → per-unit skip_reason classification ─────
def test_mixed_units_classified_by_route_and_provisional_flag():
"""Reject + restructure provisional both route to ai_adaptation;
use_as_is / light_edit / non-provisional skip without router call.
With ai_fallback_enabled=False (default) the router returns None,
so the two ai_adaptation provisional units record
``skip_reason='router_short_circuit'``; the rest record their
structural skip_reason (not_provisional / route_not_ai_adaptation).
"""
units = [
_StubUnit(label="use_as_is", provisional=False),
_StubUnit(label="light_edit", provisional=True),
_StubUnit(label="restructure", provisional=True),
_StubUnit(label="reject", provisional=True),
_StubUnit(label="restructure", provisional=False),
]
records = _run_step12_ai_repair(units)
assert [r["skip_reason"] for r in records] == [
"not_provisional",
"route_not_ai_adaptation:deterministic_minor_adjustment",
"router_short_circuit",
"router_short_circuit",
"not_provisional",
]
assert [r["route_hint"] for r in records] == [
"direct_render",
"deterministic_minor_adjustment",
"ai_adaptation_required",
"ai_adaptation_required",
"ai_adaptation_required",
]
assert all(r["ai_called"] is False for r in records)
# ─── Case 2 : reject provisional unit reaches AI gate ───────────────
def test_reject_provisional_unit_reaches_router_short_circuit():
"""Reject + provisional → route_hint=ai_adaptation_required.
Router short-circuit (flag-off default) is the only thing keeping
AI from firing; the wiring proves reject is no longer blocked by
Step 12's bespoke design_reference_only skip (removed by u2).
"""
records = _run_step12_ai_repair([_StubUnit(label="reject", provisional=True)])
assert records[0]["route_hint"] == "ai_adaptation_required"
assert records[0]["skip_reason"] == "router_short_circuit"
assert records[0]["ai_called"] is False
# cache_key / fingerprints populated only after the route + provisional
# gates pass — confirms gather reached the AI-eligible code path.
assert records[0]["cache_key"] is not None
assert records[0]["fingerprints"] is not None
# ─── Case 3 : frame visual loader degrades on missing partial ──────
def test_load_frame_partial_html_returns_empty_for_missing_file():
"""__empty__ shell (IMP-30) and any unknown template_id → "".
Keeps gather() crash-free for the IMP-30 first-render-invariant
path where the synthesized empty-shell unit has no families partial.
"""
assert _load_frame_partial_html("__empty__") == ""
assert _load_frame_partial_html("MOCK_T_does_not_exist") == ""
# ─── Case 4 (u6) : audit artifact write is JSON-serialisable ────────
def test_step12_ai_repair_artifact_writes_json_serialisable_records(tmp_path):
"""IMP-47B u6 — gather records feed ``_write_step_artifact`` as the
``step12_ai_repair.json`` audit. Confirms the gather schema contains
only JSON-native primitives (str / int / None / bool / list / dict)
so the artifact write never raises on real runs and the audit
payload preserves per-unit ``route_hint`` / ``skip_reason`` /
``ai_called`` for reviewers.
"""
records = _run_step12_ai_repair([
_StubUnit(label="reject", provisional=True),
_StubUnit(label="use_as_is", provisional=False),
])
fpath = _write_step_artifact(
tmp_path, 12, "ai_repair",
data={"per_unit": records},
outputs=["step12_ai_repair.json"],
)
assert fpath.is_file()
assert fpath.name == "step12_ai_repair.json"
payload = json.loads(fpath.read_text(encoding="utf-8"))
assert payload["step_num"] == 12
assert payload["step_name"] == "ai_repair"
assert payload["step_status"] == "done"
per_unit = payload["data"]["per_unit"]
assert len(per_unit) == 2
assert per_unit[0]["route_hint"] == "ai_adaptation_required"
assert per_unit[0]["skip_reason"] == "router_short_circuit"
assert per_unit[0]["ai_called"] is False
assert per_unit[1]["route_hint"] == "direct_render"
assert per_unit[1]["skip_reason"] == "not_provisional"