feat(#92): IMP-92 u1~u5 AI fallback config validation (model ping + operational error classification)

Replaces #84 UI-noise removal plan with positive operational-alert contract.
Five-axis stack lands together: (1) default model literal moved to current
Opus-family ID, (2) Anthropic SDK error classifier mapping exceptions to
quota/billing/auth/other, (3) api_error_kind plumbed through ai_repair_status
summary + per-record retention, (4) Step 0 preflight ping gated under
ai_fallback_enabled (default OFF preserved) with fail-fast on invalid
model/key, (5) frontend formatter rewritten to surface only operational
quota/billing/auth toasts (non-operational paths return null per
feedback_auto_pipeline_first silent-pipeline policy).

u1 - default model literal claude-opus-4-6-20250415 -> claude-opus-4-7
     (src/config.py + tests/test_phase_z2_ai_fallback_config.py lock mirror)
u2 - classify_operational_error type+status_code dispatch + Step 12
     api_error_kind stamp on except path (src/phase_z2_ai_fallback/client.py
     + src/phase_z2_ai_fallback/step12.py + tests/phase_z2_ai_fallback/test_step12.py)
u3 - _summarize_ai_repair_status aggregates api_error_kinds {quota,billing,
     auth,other}; error_records[i].api_error_kind retained per-record
     (src/phase_z2_pipeline.py + tests/test_imp47b_failure_surface.py)
u4 - _run_step0_ai_preflight + Step0PreflightError; preflight only fires
     when ai_fallback_enabled=true; one-token ping; invalid key/model =>
     setup failure before Step 1 (src/phase_z2_pipeline.py +
     tests/phase_z2/test_pipeline_step0_preflight.py NEW)
u5 - AiRepairStatus.api_error_kinds? interface + formatAiRepairHumanReview
     Message rewritten: operational quota/billing/auth -> Korean copy
     verbatim from issue body (tie-break quota -> billing -> auth);
     validation/coverage_violated/unsupported_kind/generic-other/legacy
     payload -> null (Front/client/src/services/designAgentApi.ts +
     Front/client/tests/imp47b_human_review_toast.test.tsx)

Guardrails respected:
- feedback_demo_env_toggle_policy: default OFF preserved; preflight skipped
  when ai_fallback_enabled=false (test_preflight_skipped_when_disabled
  asserts anthropic.Anthropic() not called).
- feedback_auto_pipeline_first: non-operational AI failures stay silent;
  only quota/billing/auth reach user toast.
- feedback_ai_isolation_contract: AI remains fallback-only; no normal-path
  migration; MDX preserved.
- project_imp46_carveout_caveat: cache_key/fingerprints fields untouched on
  every record; no overlap with #62 cache region.
- feedback_no_hardcoding: zero MDX-sample-specific literals; classifier
  dispatch by SDK type, not by string parsing.
- feedback_artifact_status_naming: operational toast scoped to alert axis,
  not overall PASS signal.

Tests:
- Targeted u1+u2+u3+u4: 63 passed
- u5 vitest (Front/): 10/10 passed
- tests/phase_z2_ai_fallback dir regression: 240 passed
- tests/phase_z2 dir regression: 323 passed
- IMP-92-adjacent (-k "imp47b or ai_fallback or preflight or step12 or step0"): 299 passed (808 deselected)
- u1 baseline lock (test_client_mock.py): 8 passed
Zero failures, zero regressions outside scope.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:07:25 +09:00
parent 842a46144c
commit 896f273ffa
10 changed files with 835 additions and 63 deletions

View File

@@ -17,6 +17,9 @@ from dataclasses import dataclass, field
from typing import Any
from unittest.mock import MagicMock
import anthropic
import httpx
from src.phase_z2_ai_fallback import step12 as step12_mod
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
@@ -176,6 +179,9 @@ def test_router_exception_is_captured_per_record(monkeypatch):
assert rec["ai_called"] is True
assert rec["proposal"] is None
assert rec["error"] == "RuntimeError: transient_boom"
# IMP-92 u2 — generic (non-Anthropic) exceptions classify as "other"
# so the frontend operational formatter stays silent for them.
assert rec["api_error_kind"] == "other"
router.assert_called_once()
@@ -405,6 +411,7 @@ def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch):
"skip_reason",
"proposal",
"error",
"api_error_kind",
"cache_key",
"fingerprints",
}
@@ -602,3 +609,146 @@ def test_mixed_units_router_receives_fingerprints_only_for_ai_eligible(monkeypat
# Skipped records carry None.
assert recs[0]["fingerprints"] is None
assert recs[1]["fingerprints"] is None
# ---------------------------------------------------------------------------
# IMP-92 u2 — Anthropic SDK exception → api_error_kind classification
# ---------------------------------------------------------------------------
# Step 12 stamps each AI-called record with api_error_kind so the frontend
# operational alert formatter can render quota / billing / auth surfaces
# while keeping "other" failures silent (the #84 replacement-plan contract).
# Classification is type-based (no string parsing); only AI-eligible units
# that actually hit ``route_ai_fallback`` and raise can produce a non-None
# api_error_kind. Skipped units (not_provisional / non-AI route) retain
# api_error_kind=None alongside cache_key/fingerprints=None.
def _anthropic_status_error(
error_cls: type[anthropic.APIStatusError], status_code: int
) -> anthropic.APIStatusError:
"""Construct an Anthropic SDK status error suitable for side_effect.
The SDK error constructors require ``response`` and ``body`` kwargs; an
``httpx.Response`` bound to a stub request is the minimum that satisfies
isinstance dispatch in ``classify_operational_error``.
"""
request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
response = httpx.Response(status_code, request=request)
return error_cls("simulated", response=response, body=None)
def test_router_rate_limit_error_classifies_as_quota(monkeypatch):
"""RateLimitError (HTTP 429) → api_error_kind='quota'."""
err = _anthropic_status_error(anthropic.RateLimitError, 429)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "quota"
assert rec["error"].startswith("RateLimitError: ")
def test_router_permission_denied_classifies_as_billing(monkeypatch):
"""PermissionDeniedError (HTTP 403) → api_error_kind='billing'."""
err = _anthropic_status_error(anthropic.PermissionDeniedError, 403)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "billing"
assert rec["error"].startswith("PermissionDeniedError: ")
def test_router_payment_required_classifies_as_billing(monkeypatch):
"""Generic APIStatusError with HTTP 402 → api_error_kind='billing'.
The Anthropic SDK has no dedicated PaymentRequired subclass; a 402
response surfaces as the base ``APIStatusError``. The issue body's
explicit operational contract requires 402 to render as billing,
so the classifier must fall through to ``status_code`` dispatch when
the typed subclass branches miss.
"""
err = _anthropic_status_error(anthropic.APIStatusError, 402)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "billing"
assert rec["error"].startswith("APIStatusError: ")
def test_router_authentication_error_classifies_as_auth(monkeypatch):
"""AuthenticationError (HTTP 401) → api_error_kind='auth'."""
err = _anthropic_status_error(anthropic.AuthenticationError, 401)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "auth"
assert rec["error"].startswith("AuthenticationError: ")
def test_router_bad_request_classifies_as_other(monkeypatch):
"""BadRequestError (HTTP 400) is non-operational → api_error_kind='other'."""
err = _anthropic_status_error(anthropic.BadRequestError, 400)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "other"
def test_router_internal_server_error_classifies_as_other(monkeypatch):
"""InternalServerError (HTTP 5xx) is non-operational → api_error_kind='other'."""
err = _anthropic_status_error(anthropic.InternalServerError, 500)
router = MagicMock(side_effect=err)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["api_error_kind"] == "other"
def test_router_success_leaves_api_error_kind_none(monkeypatch):
"""Successful proposal record keeps api_error_kind=None (no error to classify)."""
proposal = AiFallbackProposal(
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
payload={"slots": {"s": "x"}},
rationale="r",
)
router = MagicMock(return_value=proposal)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["ai_called"] is True
assert rec["error"] is None
assert rec["api_error_kind"] is None
def test_skipped_records_keep_api_error_kind_none(monkeypatch):
"""Non-AI-eligible records never see the router, so api_error_kind stays None."""
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
units = [
FakeUnit(label="restructure", provisional=False), # not_provisional
FakeUnit(label="light_edit", provisional=True), # non-AI route
FakeUnit(label="reject", provisional=True), # legacy non-AI route
]
recs = _call(units)
for rec in recs:
assert rec["api_error_kind"] is None
assert rec["error"] is None
def test_router_short_circuit_keeps_api_error_kind_none(monkeypatch):
"""Router short-circuit (None return) is not an error path → api_error_kind=None."""
router = MagicMock(return_value=None)
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
recs = _call([_ai_unit()])
rec = recs[0]
assert rec["skip_reason"] == "router_short_circuit"
assert rec["api_error_kind"] is None