- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook - u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage) - u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks) - u12: coverage_invariant guard - u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
155 lines
6.0 KiB
Python
155 lines
6.0 KiB
Python
"""IMP-47B u4 + u6 — Step 12 AI repair wiring + audit artifact tests.
|
|
|
|
Scope (this slice):
|
|
* u4 — Helper ``_run_step12_ai_repair`` (src/phase_z2_pipeline.py)
|
|
wires the pipeline's local route-hint helper (``_imp05_route_hint``),
|
|
the frame contract loader (``get_contract``), and a
|
|
templates/phase_z2/families partial reader
|
|
(``_load_frame_partial_html``) into
|
|
``gather_step12_ai_repair_proposals``.
|
|
* u6 — The gather records flow into ``_write_step_artifact`` under
|
|
``step12_ai_repair.json``. The audit shape must stay
|
|
JSON-serialisable (no Pydantic / dataclass leakage) so the artifact
|
|
write never raises on real runs.
|
|
|
|
The router short-circuits when ``settings.ai_fallback_enabled`` is
|
|
False (default), so AI=0 for non-AI-route units stays a structural
|
|
guarantee. Synthetic naming mirrors tests/test_imp47b_override_provisional.py
|
|
(MOCK_ prefix; no real catalog template_id / frame_id leakage).
|
|
|
|
u5 (PARTIAL_OVERRIDES apply), u7 (coverage invariant), and u8
|
|
(slide_status surfacing) are out of scope for this unit.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import json
|
|
from dataclasses import dataclass, field
|
|
|
|
from src.phase_z2_pipeline import (
|
|
_load_frame_partial_html,
|
|
_run_step12_ai_repair,
|
|
_write_step_artifact,
|
|
)
|
|
|
|
|
|
@dataclass
|
|
class _StubUnit:
|
|
label: str | None
|
|
provisional: bool
|
|
frame_template_id: str = "MOCK_T_x"
|
|
frame_id: str = "MOCK_F_x"
|
|
source_section_ids: list[str] = field(default_factory=lambda: ["MOCK_S1"])
|
|
raw_content: str = "MOCK_raw"
|
|
v4_rank: int | None = 1
|
|
cardinality: int | None = None
|
|
layout_preset: str = ""
|
|
zone_position: str = ""
|
|
source_shape: str = "paragraph"
|
|
h3_count: int = 0
|
|
char_count: int = 0
|
|
|
|
|
|
# ─── Case 1 : mixed units → per-unit skip_reason classification ─────
|
|
|
|
|
|
def test_mixed_units_classified_by_route_and_provisional_flag():
|
|
"""Reject + restructure provisional both route to ai_adaptation;
|
|
use_as_is / light_edit / non-provisional skip without router call.
|
|
|
|
With ai_fallback_enabled=False (default) the router returns None,
|
|
so the two ai_adaptation provisional units record
|
|
``skip_reason='router_short_circuit'``; the rest record their
|
|
structural skip_reason (not_provisional / route_not_ai_adaptation).
|
|
"""
|
|
units = [
|
|
_StubUnit(label="use_as_is", provisional=False),
|
|
_StubUnit(label="light_edit", provisional=True),
|
|
_StubUnit(label="restructure", provisional=True),
|
|
_StubUnit(label="reject", provisional=True),
|
|
_StubUnit(label="restructure", provisional=False),
|
|
]
|
|
records = _run_step12_ai_repair(units)
|
|
assert [r["skip_reason"] for r in records] == [
|
|
"not_provisional",
|
|
"route_not_ai_adaptation:deterministic_minor_adjustment",
|
|
"router_short_circuit",
|
|
"router_short_circuit",
|
|
"not_provisional",
|
|
]
|
|
assert [r["route_hint"] for r in records] == [
|
|
"direct_render",
|
|
"deterministic_minor_adjustment",
|
|
"ai_adaptation_required",
|
|
"ai_adaptation_required",
|
|
"ai_adaptation_required",
|
|
]
|
|
assert all(r["ai_called"] is False for r in records)
|
|
|
|
|
|
# ─── Case 2 : reject provisional unit reaches AI gate ───────────────
|
|
|
|
|
|
def test_reject_provisional_unit_reaches_router_short_circuit():
|
|
"""Reject + provisional → route_hint=ai_adaptation_required.
|
|
|
|
Router short-circuit (flag-off default) is the only thing keeping
|
|
AI from firing; the wiring proves reject is no longer blocked by
|
|
Step 12's bespoke design_reference_only skip (removed by u2).
|
|
"""
|
|
records = _run_step12_ai_repair([_StubUnit(label="reject", provisional=True)])
|
|
assert records[0]["route_hint"] == "ai_adaptation_required"
|
|
assert records[0]["skip_reason"] == "router_short_circuit"
|
|
assert records[0]["ai_called"] is False
|
|
# cache_key / fingerprints populated only after the route + provisional
|
|
# gates pass — confirms gather reached the AI-eligible code path.
|
|
assert records[0]["cache_key"] is not None
|
|
assert records[0]["fingerprints"] is not None
|
|
|
|
|
|
# ─── Case 3 : frame visual loader degrades on missing partial ──────
|
|
|
|
|
|
def test_load_frame_partial_html_returns_empty_for_missing_file():
|
|
"""__empty__ shell (IMP-30) and any unknown template_id → "".
|
|
|
|
Keeps gather() crash-free for the IMP-30 first-render-invariant
|
|
path where the synthesized empty-shell unit has no families partial.
|
|
"""
|
|
assert _load_frame_partial_html("__empty__") == ""
|
|
assert _load_frame_partial_html("MOCK_T_does_not_exist") == ""
|
|
|
|
|
|
# ─── Case 4 (u6) : audit artifact write is JSON-serialisable ────────
|
|
|
|
|
|
def test_step12_ai_repair_artifact_writes_json_serialisable_records(tmp_path):
|
|
"""IMP-47B u6 — gather records feed ``_write_step_artifact`` as the
|
|
``step12_ai_repair.json`` audit. Confirms the gather schema contains
|
|
only JSON-native primitives (str / int / None / bool / list / dict)
|
|
so the artifact write never raises on real runs and the audit
|
|
payload preserves per-unit ``route_hint`` / ``skip_reason`` /
|
|
``ai_called`` for reviewers.
|
|
"""
|
|
records = _run_step12_ai_repair([
|
|
_StubUnit(label="reject", provisional=True),
|
|
_StubUnit(label="use_as_is", provisional=False),
|
|
])
|
|
fpath = _write_step_artifact(
|
|
tmp_path, 12, "ai_repair",
|
|
data={"per_unit": records},
|
|
outputs=["step12_ai_repair.json"],
|
|
)
|
|
assert fpath.is_file()
|
|
assert fpath.name == "step12_ai_repair.json"
|
|
payload = json.loads(fpath.read_text(encoding="utf-8"))
|
|
assert payload["step_num"] == 12
|
|
assert payload["step_name"] == "ai_repair"
|
|
assert payload["step_status"] == "done"
|
|
per_unit = payload["data"]["per_unit"]
|
|
assert len(per_unit) == 2
|
|
assert per_unit[0]["route_hint"] == "ai_adaptation_required"
|
|
assert per_unit[0]["skip_reason"] == "router_short_circuit"
|
|
assert per_unit[0]["ai_called"] is False
|
|
assert per_unit[1]["route_hint"] == "direct_render"
|
|
assert per_unit[1]["skip_reason"] == "not_provisional"
|