- u1~u9: AI fallback infrastructure (router/prompts/schema/validator) + Step 12 hook - u10: e2e reject chain (writes final.html with AI-repaired slot, full coverage) - u11: frontend wiring deferred to follow-up commit (split from IMP-41 hunks) - u12: coverage_invariant guard - u13: cache save gate (visual_check PASS + user_approved/auto_cache) — Codex #22 verified Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
175 lines
6.9 KiB
Python
175 lines
6.9 KiB
Python
"""IMP-47B u8 — slide_status.ai_repair_status surfacing tests.
|
|
|
|
Scope (this slice):
|
|
Helper ``_summarize_ai_repair_status(ai_repair_records, coverage_invariant)``
|
|
(src/phase_z2_pipeline.py) composes u4 gather ``error`` + u5
|
|
``apply_status`` + u7 ``coverage_invariant`` into a single
|
|
``ai_repair_status`` axis attached to ``slide_status``. Failure-axis
|
|
priority (highest → lowest): ``error`` > ``coverage_violated`` >
|
|
``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required``
|
|
flips True on the three failure axes for u11 frontend surfacing.
|
|
|
|
The frontend reads ``slide_status.ai_repair_status`` to render a
|
|
notification per the IMP-47B policy ("AI 호출 실패 / proposal validation
|
|
실패 / coverage 미달 → frontend notification"). u9~u13 are out of scope.
|
|
The helper is pure (no IO, no AI call) so synthetic record / invariant
|
|
dicts exercise every branch directly.
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
from src.phase_z2_pipeline import _summarize_ai_repair_status
|
|
|
|
|
|
def _record(
|
|
*,
|
|
unit_index: int = 0,
|
|
apply_status: str | None = None,
|
|
error: str | None = None,
|
|
source_section_ids: list[str] | None = None,
|
|
) -> dict:
|
|
"""Minimal Step 12 AI repair record stub — fields u8 reads."""
|
|
return {
|
|
"unit_index": unit_index,
|
|
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
|
|
"apply_status": apply_status,
|
|
"error": error,
|
|
}
|
|
|
|
|
|
_OK_COVERAGE = {"status": "ok", "dropped_section_ids": []}
|
|
_VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]}
|
|
|
|
|
|
# ─── Case 1 : empty pipeline → status='ok' ──────────────────────────
|
|
|
|
|
|
def test_empty_records_returns_ok_no_human_review():
|
|
"""No AI work executed → status='ok', human_review_required=False.
|
|
The flag-off default (no provisional units) lands here."""
|
|
result = _summarize_ai_repair_status([], _OK_COVERAGE)
|
|
assert result["status"] == "ok"
|
|
assert result["human_review_required"] is False
|
|
assert result["counts"]["total"] == 0
|
|
assert result["unsupported_kind_records"] == []
|
|
assert result["error_records"] == []
|
|
assert result["dropped_section_ids"] == []
|
|
|
|
|
|
# ─── Case 2 : applied → status='applied', no human_review ───────────
|
|
|
|
|
|
def test_applied_partial_overrides_marks_applied_no_human_review():
|
|
"""Successful AI repair (PARTIAL_OVERRIDES applied) is the happy
|
|
path. status='applied', no human_review surfacing."""
|
|
records = [_record(apply_status="applied:partial_overrides")]
|
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
|
assert result["status"] == "applied"
|
|
assert result["human_review_required"] is False
|
|
assert result["counts"]["applied"] == 1
|
|
assert result["counts"]["error"] == 0
|
|
|
|
|
|
# ─── Case 3 : unsupported kind → status='unsupported_kind' ──────────
|
|
|
|
|
|
def test_unsupported_kind_marks_human_review_required():
|
|
"""u5 surfaces ``unsupported_kind_for_reject_route:<kind>`` for
|
|
builder_options_patch / slot_mapping_proposal. u8 must classify as
|
|
human_review_required so the frontend renders a notification."""
|
|
records = [
|
|
_record(
|
|
unit_index=1,
|
|
apply_status="unsupported_kind_for_reject_route:builder_options_patch",
|
|
source_section_ids=["MOCK_S1"],
|
|
),
|
|
]
|
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
|
assert result["status"] == "unsupported_kind"
|
|
assert result["human_review_required"] is True
|
|
assert result["counts"]["unsupported_kind"] == 1
|
|
assert result["unsupported_kind_records"] == [
|
|
{
|
|
"unit_index": 1,
|
|
"source_section_ids": ["MOCK_S1"],
|
|
"apply_status": "unsupported_kind_for_reject_route:builder_options_patch",
|
|
}
|
|
]
|
|
|
|
|
|
# ─── Case 4 : gather error → status='error' (highest priority) ──────
|
|
|
|
|
|
def test_gather_error_marks_status_error_with_records():
|
|
"""``record['error']`` set means ``gather_step12_ai_repair_proposals``
|
|
caught a router exception (AI call / validator). status='error'
|
|
is the highest-priority failure axis."""
|
|
records = [_record(
|
|
unit_index=2,
|
|
error="ValueError: missing slot 'title'",
|
|
source_section_ids=["MOCK_S2"],
|
|
)]
|
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
|
assert result["status"] == "error"
|
|
assert result["human_review_required"] is True
|
|
assert result["counts"]["error"] == 1
|
|
assert result["error_records"] == [
|
|
{
|
|
"unit_index": 2,
|
|
"source_section_ids": ["MOCK_S2"],
|
|
"error": "ValueError: missing slot 'title'",
|
|
}
|
|
]
|
|
|
|
|
|
# ─── Case 5 : coverage violated → status='coverage_violated' ────────
|
|
|
|
|
|
def test_coverage_violation_surfaces_dropped_sections():
|
|
"""u7 coverage_invariant 'violated' means the AI repair dropped a
|
|
section_id from the post-AI superset. dropped 절대 룰 — surface as
|
|
human_review_required."""
|
|
records = [_record(apply_status="applied:partial_overrides")]
|
|
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
|
|
assert result["status"] == "coverage_violated"
|
|
assert result["human_review_required"] is True
|
|
assert result["coverage_status"] == "violated"
|
|
assert result["dropped_section_ids"] == ["MOCK_S2"]
|
|
|
|
|
|
# ─── Case 6 : priority order — error > coverage > unsupported ───────
|
|
|
|
|
|
def test_error_dominates_over_coverage_and_unsupported():
|
|
"""When multiple failure axes coexist, priority order is
|
|
error > coverage_violated > unsupported_kind > applied > ok."""
|
|
records = [
|
|
_record(unit_index=0, error="RuntimeError"),
|
|
_record(unit_index=1,
|
|
apply_status="unsupported_kind_for_reject_route:slot_mapping_proposal"),
|
|
_record(unit_index=2, apply_status="applied:partial_overrides"),
|
|
]
|
|
result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE)
|
|
assert result["status"] == "error"
|
|
assert result["human_review_required"] is True
|
|
assert result["counts"]["error"] == 1
|
|
assert result["counts"]["unsupported_kind"] == 1
|
|
assert result["counts"]["applied"] == 1
|
|
|
|
|
|
# ─── Case 7 : no_proposal + no_zone_match counted, not failure ──────
|
|
|
|
|
|
def test_no_proposal_and_no_zone_match_do_not_trigger_human_review():
|
|
"""Flag-off short-circuit, not_provisional, route_not_ai_adaptation,
|
|
and B4-mismatch (no_zone_match) are structural skips — not AI
|
|
failures. They count but do not flip human_review_required."""
|
|
records = [
|
|
_record(unit_index=0, apply_status="no_proposal"),
|
|
_record(unit_index=1, apply_status="no_zone_match"),
|
|
]
|
|
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
|
assert result["status"] == "ok"
|
|
assert result["human_review_required"] is False
|
|
assert result["counts"]["no_proposal"] == 1
|
|
assert result["counts"]["no_zone_match"] == 1
|