feat(#92): IMP-92 u1~u5 AI fallback config validation (model ping + operational error classification)

Replaces #84 UI-noise removal plan with positive operational-alert contract.
Five-axis stack lands together: (1) default model literal moved to current
Opus-family ID, (2) Anthropic SDK error classifier mapping exceptions to
quota/billing/auth/other, (3) api_error_kind plumbed through ai_repair_status
summary + per-record retention, (4) Step 0 preflight ping gated under
ai_fallback_enabled (default OFF preserved) with fail-fast on invalid
model/key, (5) frontend formatter rewritten to surface only operational
quota/billing/auth toasts (non-operational paths return null per
feedback_auto_pipeline_first silent-pipeline policy).

u1 - default model literal claude-opus-4-6-20250415 -> claude-opus-4-7
     (src/config.py + tests/test_phase_z2_ai_fallback_config.py lock mirror)
u2 - classify_operational_error type+status_code dispatch + Step 12
     api_error_kind stamp on except path (src/phase_z2_ai_fallback/client.py
     + src/phase_z2_ai_fallback/step12.py + tests/phase_z2_ai_fallback/test_step12.py)
u3 - _summarize_ai_repair_status aggregates api_error_kinds {quota,billing,
     auth,other}; error_records[i].api_error_kind retained per-record
     (src/phase_z2_pipeline.py + tests/test_imp47b_failure_surface.py)
u4 - _run_step0_ai_preflight + Step0PreflightError; preflight only fires
     when ai_fallback_enabled=true; one-token ping; invalid key/model =>
     setup failure before Step 1 (src/phase_z2_pipeline.py +
     tests/phase_z2/test_pipeline_step0_preflight.py NEW)
u5 - AiRepairStatus.api_error_kinds? interface + formatAiRepairHumanReview
     Message rewritten: operational quota/billing/auth -> Korean copy
     verbatim from issue body (tie-break quota -> billing -> auth);
     validation/coverage_violated/unsupported_kind/generic-other/legacy
     payload -> null (Front/client/src/services/designAgentApi.ts +
     Front/client/tests/imp47b_human_review_toast.test.tsx)

Guardrails respected:
- feedback_demo_env_toggle_policy: default OFF preserved; preflight skipped
  when ai_fallback_enabled=false (test_preflight_skipped_when_disabled
  asserts anthropic.Anthropic() not called).
- feedback_auto_pipeline_first: non-operational AI failures stay silent;
  only quota/billing/auth reach user toast.
- feedback_ai_isolation_contract: AI remains fallback-only; no normal-path
  migration; MDX preserved.
- project_imp46_carveout_caveat: cache_key/fingerprints fields untouched on
  every record; no overlap with #62 cache region.
- feedback_no_hardcoding: zero MDX-sample-specific literals; classifier
  dispatch by SDK type, not by string parsing.
- feedback_artifact_status_naming: operational toast scoped to alert axis,
  not overall PASS signal.

Tests:
- Targeted u1+u2+u3+u4: 63 passed
- u5 vitest (Front/): 10/10 passed
- tests/phase_z2_ai_fallback dir regression: 240 passed
- tests/phase_z2 dir regression: 323 passed
- IMP-92-adjacent (-k "imp47b or ai_fallback or preflight or step12 or step0"): 299 passed (808 deselected)
- u1 baseline lock (test_client_mock.py): 8 passed
Zero failures, zero regressions outside scope.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-23 22:07:25 +09:00
parent 842a46144c
commit 896f273ffa
10 changed files with 835 additions and 63 deletions

View File

@@ -17,7 +17,7 @@ class Settings(BaseSettings):
# IMP-33 u1 — AI fallback policy. Fallback-path only; normal path AI=0.
# Defaults locked by Stage 2 plan; do NOT inline literals downstream.
ai_fallback_enabled: bool = False
ai_fallback_model: str = "claude-opus-4-6-20250415"
ai_fallback_model: str = "claude-opus-4-7"
ai_fallback_timeout_s: float = 60.0
ai_fallback_max_retries: int = 3
ai_fallback_backoff_base_s: float = 1.0

View File

@@ -31,6 +31,55 @@ _TRANSIENT_ERRORS: tuple[type[BaseException], ...] = (
# Output cap is an Anthropic API requirement, not a policy knob (u1).
_MAX_OUTPUT_TOKENS = 4096
# IMP-92 u2 — Anthropic SDK exception → operational error kind classifier.
# Stamped onto Step 12 AI repair records (api_error_kind) so the frontend
# operational alert formatter can surface quota / billing / auth to users
# while keeping non-operational ("other") failures silent. The classifier
# is type-based (not string parsing) and the four kinds are the only
# values frontend operational formatter is allowed to render.
_OPERATIONAL_ERROR_KIND_QUOTA = "quota"
_OPERATIONAL_ERROR_KIND_BILLING = "billing"
_OPERATIONAL_ERROR_KIND_AUTH = "auth"
_OPERATIONAL_ERROR_KIND_OTHER = "other"
def classify_operational_error(exc: BaseException) -> str:
"""Return the operational error kind for an Anthropic SDK exception.
Dispatch combines SDK exception type with the HTTP status code so the
issue body's explicit operational contract (429 quota / 402 billing /
401 auth) is honoured even when the SDK surfaces a 402 as the generic
``anthropic.APIStatusError`` rather than a typed subclass:
* ``anthropic.RateLimitError`` OR HTTP 429 → ``"quota"``
* ``anthropic.PermissionDeniedError`` OR HTTP 402 → ``"billing"``
(Anthropic Payment Required surfaces as 402; PermissionDenied/403
is the SDK-typed billing/permission surface)
* ``anthropic.AuthenticationError`` OR HTTP 401 → ``"auth"``
* everything else → ``"other"`` (silent on UI)
The frontend formatter renders quota / billing / auth and returns
``None`` for ``"other"`` so non-operational AI failures stay silent
per the #84 replacement-plan contract.
"""
if isinstance(exc, anthropic.RateLimitError):
return _OPERATIONAL_ERROR_KIND_QUOTA
if isinstance(exc, anthropic.PermissionDeniedError):
return _OPERATIONAL_ERROR_KIND_BILLING
if isinstance(exc, anthropic.AuthenticationError):
return _OPERATIONAL_ERROR_KIND_AUTH
if isinstance(exc, anthropic.APIStatusError):
status_code = getattr(exc, "status_code", None)
if status_code is None:
status_code = getattr(getattr(exc, "response", None), "status_code", None)
if status_code == 429:
return _OPERATIONAL_ERROR_KIND_QUOTA
if status_code == 402:
return _OPERATIONAL_ERROR_KIND_BILLING
if status_code == 401:
return _OPERATIONAL_ERROR_KIND_AUTH
return _OPERATIONAL_ERROR_KIND_OTHER
class AiFallbackBudgetExceeded(RuntimeError):
"""Per-run AI call budget (u1 ai_fallback_budget_per_run) exhausted."""

View File

@@ -56,6 +56,7 @@ import hashlib
import json
from typing import Any, Callable, Iterable
from src.phase_z2_ai_fallback.client import classify_operational_error
from src.phase_z2_ai_fallback.router import route_ai_fallback
from src.phase_z2_ai_fallback.signature import bucket_char_count, build_signature
@@ -96,6 +97,7 @@ def gather_step12_ai_repair_proposals(
"skip_reason": str | None,
"proposal": dict | None,
"error": str | None,
"api_error_kind": str | None, # IMP-92 u2 (quota|billing|auth|other)
"cache_key": str | None, # IMP-46 u4
"fingerprints": dict | None, # IMP-46 u4
}
@@ -130,6 +132,7 @@ def gather_step12_ai_repair_proposals(
"skip_reason": None,
"proposal": None,
"error": None,
"api_error_kind": None,
"cache_key": None,
"fingerprints": None,
}
@@ -205,6 +208,7 @@ def gather_step12_ai_repair_proposals(
except Exception as exc: # noqa: BLE001 — record + continue, no AI re-raise
record["ai_called"] = True
record["error"] = f"{type(exc).__name__}: {exc}"
record["api_error_kind"] = classify_operational_error(exc)
records.append(record)
continue
if proposal is None:

View File

@@ -789,6 +789,14 @@ def _summarize_ai_repair_status(
frontend (u11) can surface a notification per the IMP-47B policy
("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification").
Pure: no IO, no AI call.
IMP-92 u3 — propagate ``api_error_kind`` (quota / billing / auth /
other) stamped by Step 12 (u2 ``classify_operational_error``) through
``ai_repair_status`` so the frontend operational formatter can route
only operational kinds (quota / billing / auth) to user-visible
alerts. ``api_error_kinds`` aggregates counts by kind at the summary
level; ``error_records[i]["api_error_kind"]`` retains the per-record
kind for unit-level surfacing.
"""
counts = {
"total": len(ai_repair_records),
@@ -798,15 +806,20 @@ def _summarize_ai_repair_status(
"unsupported_kind": 0,
"error": 0,
}
api_error_kinds = {"quota": 0, "billing": 0, "auth": 0, "other": 0}
unsupported_records: list[dict] = []
error_records: list[dict] = []
for record in ai_repair_records:
if record.get("error"):
counts["error"] += 1
kind = record.get("api_error_kind")
if kind in api_error_kinds:
api_error_kinds[kind] += 1
error_records.append({
"unit_index": record.get("unit_index"),
"source_section_ids": list(record.get("source_section_ids") or []),
"error": record.get("error"),
"api_error_kind": kind,
})
continue
apply_status = record.get("apply_status") or ""
@@ -838,6 +851,7 @@ def _summarize_ai_repair_status(
return {
"status": status,
"counts": counts,
"api_error_kinds": api_error_kinds,
"unsupported_kind_records": unsupported_records,
"error_records": error_records,
"coverage_status": coverage_status,
@@ -3588,6 +3602,114 @@ def _build_application_plan_unit(
# ─── Main entry ────────────────────────────────────────────────
class Step0PreflightError(RuntimeError):
"""IMP-92 u4 — Step 0 AI preflight fail-fast surface.
Raised at boot when ``settings.ai_fallback_enabled`` is True and the
Anthropic API ping reveals a persistent setup problem (invalid API
key, invalid model ID, billing / permission denied). Transient errors
(429 / 5xx) do NOT fail boot — they are recorded as ``"transient"``
in the Step 0 artifact and the pipeline proceeds; the in-pipeline
retry layer + u2 operational classifier handle them downstream.
"""
def _run_step0_ai_preflight() -> dict:
"""IMP-92 u4 — Boot-time AI fallback preflight ping (gated).
When ``settings.ai_fallback_enabled`` is False (default), returns
``{"status": "skipped", "reason": "ai_fallback_disabled", ...}``
without instantiating ``anthropic.Anthropic`` — preserves the PZ-1
AI=0 normal path and the ``feedback_demo_env_toggle_policy``
default-OFF contract (no API call on normal runs).
When enabled, issues a single 1-token Anthropic ``messages.create``
to validate the configured ``(ai_fallback_model, anthropic_api_key)``
pair. Persistent setup errors raise ``Step0PreflightError`` so the
pipeline fails fast at boot rather than at first AI repair attempt.
Transient errors are recorded as ``"transient"`` and the pipeline
continues.
Setup errors (fail-fast):
* ``anthropic.AuthenticationError`` (401) — invalid API key
* ``anthropic.PermissionDeniedError`` (403) — billing / permission
* ``anthropic.NotFoundError`` (404) — invalid model ID
* generic ``anthropic.APIStatusError`` (402) — billing / payment
required (Anthropic surfaces 402 without a typed subclass;
dispatched here by HTTP status code, mirroring u2
``classify_operational_error``).
Transient (record + continue):
* ``anthropic.RateLimitError`` (429)
* ``anthropic.InternalServerError`` (5xx)
* generic ``anthropic.APIStatusError`` with HTTP 429 / 5xx
"""
import anthropic
from src.config import settings as _settings
if not _settings.ai_fallback_enabled:
return {
"status": "skipped",
"reason": "ai_fallback_disabled",
"model": _settings.ai_fallback_model,
}
try:
client = anthropic.Anthropic(
api_key=_settings.anthropic_api_key,
timeout=_settings.ai_fallback_timeout_s,
)
client.messages.create(
model=_settings.ai_fallback_model,
max_tokens=1,
messages=[{"role": "user", "content": "ping"}],
)
except (
anthropic.AuthenticationError,
anthropic.PermissionDeniedError,
anthropic.NotFoundError,
) as exc:
raise Step0PreflightError(
f"Anthropic API preflight failed for model "
f"{_settings.ai_fallback_model!r}: "
f"{type(exc).__name__}: {exc}. "
"Check ANTHROPIC_API_KEY / ai_fallback_model in .env."
) from exc
except (anthropic.RateLimitError, anthropic.InternalServerError) as exc:
return {
"status": "transient",
"model": _settings.ai_fallback_model,
"transient_error": f"{type(exc).__name__}: {exc}",
}
except anthropic.APIStatusError as exc:
# IMP-92 u4 — fall back to HTTP status code dispatch when the SDK
# surfaces a setup error as the generic ``APIStatusError`` instead
# of a typed subclass. Mirrors u2 ``classify_operational_error``
# so HTTP 402 (Payment Required / billing) becomes a fail-fast
# Step0PreflightError, matching the issue body's explicit
# operational contract.
status_code = getattr(exc, "status_code", None)
if status_code is None:
status_code = getattr(getattr(exc, "response", None), "status_code", None)
if status_code == 429 or (status_code is not None and 500 <= status_code < 600):
return {
"status": "transient",
"model": _settings.ai_fallback_model,
"transient_error": f"{type(exc).__name__}: {exc}",
}
raise Step0PreflightError(
f"Anthropic API preflight failed for model "
f"{_settings.ai_fallback_model!r}: "
f"HTTP {status_code} {type(exc).__name__}: {exc}. "
"Check ANTHROPIC_API_KEY / ai_fallback_model in .env."
) from exc
return {
"status": "passed",
"model": _settings.ai_fallback_model,
}
def run_phase_z2_mvp1(
mdx_path: Path,
run_id: Optional[str] = None,
@@ -3629,6 +3751,10 @@ def run_phase_z2_mvp1(
print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}")
# ─── Step 0: 사전 준비 (precondition snapshot) ───
# IMP-92 u4 — boot-time AI fallback preflight (gated on
# settings.ai_fallback_enabled; default OFF = skipped, no API call).
# Persistent setup errors raise Step0PreflightError before Step 1.
ai_preflight = _run_step0_ai_preflight()
_write_step_artifact(
run_dir, 0, "preconditions",
data={
@@ -3639,6 +3765,7 @@ def run_phase_z2_mvp1(
"frame_contracts_template_ids": sorted(load_frame_contracts().keys()),
"v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS,
"mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES),
"ai_preflight": ai_preflight,
},
step_status="partial",
pipeline_path_connected=True,