feat(#92): IMP-92 u1~u5 AI fallback config validation (model ping + operational error classification)

Replaces #84 UI-noise removal plan with positive operational-alert contract. Five-axis stack lands together: (1) default model literal moved to current Opus-family ID, (2) Anthropic SDK error classifier mapping exceptions to quota/billing/auth/other, (3) api_error_kind plumbed through ai_repair_status summary + per-record retention, (4) Step 0 preflight ping gated under ai_fallback_enabled (default OFF preserved) with fail-fast on invalid model/key, (5) frontend formatter rewritten to surface only operational quota/billing/auth toasts (non-operational paths return null per feedback_auto_pipeline_first silent-pipeline policy). u1 - default model literal claude-opus-4-6-20250415 -> claude-opus-4-7 (src/config.py + tests/test_phase_z2_ai_fallback_config.py lock mirror) u2 - classify_operational_error type+status_code dispatch + Step 12 api_error_kind stamp on except path (src/phase_z2_ai_fallback/client.py + src/phase_z2_ai_fallback/step12.py + tests/phase_z2_ai_fallback/test_step12.py) u3 - _summarize_ai_repair_status aggregates api_error_kinds {quota,billing, auth,other}; error_records[i].api_error_kind retained per-record (src/phase_z2_pipeline.py + tests/test_imp47b_failure_surface.py) u4 - _run_step0_ai_preflight + Step0PreflightError; preflight only fires when ai_fallback_enabled=true; one-token ping; invalid key/model => setup failure before Step 1 (src/phase_z2_pipeline.py + tests/phase_z2/test_pipeline_step0_preflight.py NEW) u5 - AiRepairStatus.api_error_kinds? interface + formatAiRepairHumanReview Message rewritten: operational quota/billing/auth -> Korean copy verbatim from issue body (tie-break quota -> billing -> auth); validation/coverage_violated/unsupported_kind/generic-other/legacy payload -> null (Front/client/src/services/designAgentApi.ts + Front/client/tests/imp47b_human_review_toast.test.tsx) Guardrails respected: - feedback_demo_env_toggle_policy: default OFF preserved; preflight skipped when ai_fallback_enabled=false (test_preflight_skipped_when_disabled asserts anthropic.Anthropic() not called). - feedback_auto_pipeline_first: non-operational AI failures stay silent; only quota/billing/auth reach user toast. - feedback_ai_isolation_contract: AI remains fallback-only; no normal-path migration; MDX preserved. - project_imp46_carveout_caveat: cache_key/fingerprints fields untouched on every record; no overlap with #62 cache region. - feedback_no_hardcoding: zero MDX-sample-specific literals; classifier dispatch by SDK type, not by string parsing. - feedback_artifact_status_naming: operational toast scoped to alert axis, not overall PASS signal. Tests: - Targeted u1+u2+u3+u4: 63 passed - u5 vitest (Front/): 10/10 passed - tests/phase_z2_ai_fallback dir regression: 240 passed - tests/phase_z2 dir regression: 323 passed - IMP-92-adjacent (-k "imp47b or ai_fallback or preflight or step12 or step0"): 299 passed (808 deselected) - u1 baseline lock (test_client_mock.py): 8 passed Zero failures, zero regressions outside scope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-23 22:07:25 +09:00
parent 842a46144c
commit 896f273ffa
10 changed files with 835 additions and 63 deletions
--- a/src/config.py
+++ b/src/config.py
@@ -17,7 +17,7 @@ class Settings(BaseSettings):
    # IMP-33 u1 — AI fallback policy. Fallback-path only; normal path AI=0.
    # Defaults locked by Stage 2 plan; do NOT inline literals downstream.
    ai_fallback_enabled: bool = False
-    ai_fallback_model: str = "claude-opus-4-6-20250415"
+    ai_fallback_model: str = "claude-opus-4-7"
    ai_fallback_timeout_s: float = 60.0
    ai_fallback_max_retries: int = 3
    ai_fallback_backoff_base_s: float = 1.0
--- a/src/phase_z2_ai_fallback/client.py
+++ b/src/phase_z2_ai_fallback/client.py
@@ -31,6 +31,55 @@ _TRANSIENT_ERRORS: tuple[type[BaseException], ...] = (
 # Output cap is an Anthropic API requirement, not a policy knob (u1).
 _MAX_OUTPUT_TOKENS = 4096

+# IMP-92 u2 — Anthropic SDK exception → operational error kind classifier.
+# Stamped onto Step 12 AI repair records (api_error_kind) so the frontend
+# operational alert formatter can surface quota / billing / auth to users
+# while keeping non-operational ("other") failures silent. The classifier
+# is type-based (not string parsing) and the four kinds are the only
+# values frontend operational formatter is allowed to render.
+_OPERATIONAL_ERROR_KIND_QUOTA = "quota"
+_OPERATIONAL_ERROR_KIND_BILLING = "billing"
+_OPERATIONAL_ERROR_KIND_AUTH = "auth"
+_OPERATIONAL_ERROR_KIND_OTHER = "other"
+
+
+def classify_operational_error(exc: BaseException) -> str:
+    """Return the operational error kind for an Anthropic SDK exception.
+
+    Dispatch combines SDK exception type with the HTTP status code so the
+    issue body's explicit operational contract (429 quota / 402 billing /
+    401 auth) is honoured even when the SDK surfaces a 402 as the generic
+    ``anthropic.APIStatusError`` rather than a typed subclass:
+
+      * ``anthropic.RateLimitError`` OR HTTP 429        → ``"quota"``
+      * ``anthropic.PermissionDeniedError`` OR HTTP 402 → ``"billing"``
+        (Anthropic Payment Required surfaces as 402; PermissionDenied/403
+        is the SDK-typed billing/permission surface)
+      * ``anthropic.AuthenticationError`` OR HTTP 401   → ``"auth"``
+      * everything else                                 → ``"other"`` (silent on UI)
+
+    The frontend formatter renders quota / billing / auth and returns
+    ``None`` for ``"other"`` so non-operational AI failures stay silent
+    per the #84 replacement-plan contract.
+    """
+    if isinstance(exc, anthropic.RateLimitError):
+        return _OPERATIONAL_ERROR_KIND_QUOTA
+    if isinstance(exc, anthropic.PermissionDeniedError):
+        return _OPERATIONAL_ERROR_KIND_BILLING
+    if isinstance(exc, anthropic.AuthenticationError):
+        return _OPERATIONAL_ERROR_KIND_AUTH
+    if isinstance(exc, anthropic.APIStatusError):
+        status_code = getattr(exc, "status_code", None)
+        if status_code is None:
+            status_code = getattr(getattr(exc, "response", None), "status_code", None)
+        if status_code == 429:
+            return _OPERATIONAL_ERROR_KIND_QUOTA
+        if status_code == 402:
+            return _OPERATIONAL_ERROR_KIND_BILLING
+        if status_code == 401:
+            return _OPERATIONAL_ERROR_KIND_AUTH
+    return _OPERATIONAL_ERROR_KIND_OTHER
+

 class AiFallbackBudgetExceeded(RuntimeError):
    """Per-run AI call budget (u1 ai_fallback_budget_per_run) exhausted."""
--- a/src/phase_z2_ai_fallback/step12.py
+++ b/src/phase_z2_ai_fallback/step12.py
@@ -56,6 +56,7 @@ import hashlib
 import json
 from typing import Any, Callable, Iterable

+from src.phase_z2_ai_fallback.client import classify_operational_error
 from src.phase_z2_ai_fallback.router import route_ai_fallback
 from src.phase_z2_ai_fallback.signature import bucket_char_count, build_signature

@@ -96,6 +97,7 @@ def gather_step12_ai_repair_proposals(
          "skip_reason": str | None,
          "proposal": dict | None,
          "error": str | None,
+          "api_error_kind": str | None,   # IMP-92 u2 (quota|billing|auth|other)
          "cache_key": str | None,        # IMP-46 u4
          "fingerprints": dict | None,    # IMP-46 u4
        }
@@ -130,6 +132,7 @@ def gather_step12_ai_repair_proposals(
            "skip_reason": None,
            "proposal": None,
            "error": None,
+            "api_error_kind": None,
            "cache_key": None,
            "fingerprints": None,
        }
@@ -205,6 +208,7 @@ def gather_step12_ai_repair_proposals(
        except Exception as exc:  # noqa: BLE001 — record + continue, no AI re-raise
            record["ai_called"] = True
            record["error"] = f"{type(exc).__name__}: {exc}"
+            record["api_error_kind"] = classify_operational_error(exc)
            records.append(record)
            continue
        if proposal is None:
--- a/src/phase_z2_pipeline.py
+++ b/src/phase_z2_pipeline.py
@@ -789,6 +789,14 @@ def _summarize_ai_repair_status(
    frontend (u11) can surface a notification per the IMP-47B policy
    ("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification").
    Pure: no IO, no AI call.
+
+    IMP-92 u3 — propagate ``api_error_kind`` (quota / billing / auth /
+    other) stamped by Step 12 (u2 ``classify_operational_error``) through
+    ``ai_repair_status`` so the frontend operational formatter can route
+    only operational kinds (quota / billing / auth) to user-visible
+    alerts. ``api_error_kinds`` aggregates counts by kind at the summary
+    level; ``error_records[i]["api_error_kind"]`` retains the per-record
+    kind for unit-level surfacing.
    """
    counts = {
        "total": len(ai_repair_records),
@@ -798,15 +806,20 @@ def _summarize_ai_repair_status(
        "unsupported_kind": 0,
        "error": 0,
    }
+    api_error_kinds = {"quota": 0, "billing": 0, "auth": 0, "other": 0}
    unsupported_records: list[dict] = []
    error_records: list[dict] = []
    for record in ai_repair_records:
        if record.get("error"):
            counts["error"] += 1
+            kind = record.get("api_error_kind")
+            if kind in api_error_kinds:
+                api_error_kinds[kind] += 1
            error_records.append({
                "unit_index": record.get("unit_index"),
                "source_section_ids": list(record.get("source_section_ids") or []),
                "error": record.get("error"),
+                "api_error_kind": kind,
            })
            continue
        apply_status = record.get("apply_status") or ""
@@ -838,6 +851,7 @@ def _summarize_ai_repair_status(
    return {
        "status": status,
        "counts": counts,
+        "api_error_kinds": api_error_kinds,
        "unsupported_kind_records": unsupported_records,
        "error_records": error_records,
        "coverage_status": coverage_status,
@@ -3588,6 +3602,114 @@ def _build_application_plan_unit(

 # ─── Main entry ────────────────────────────────────────────────

+
+class Step0PreflightError(RuntimeError):
+    """IMP-92 u4 — Step 0 AI preflight fail-fast surface.
+
+    Raised at boot when ``settings.ai_fallback_enabled`` is True and the
+    Anthropic API ping reveals a persistent setup problem (invalid API
+    key, invalid model ID, billing / permission denied). Transient errors
+    (429 / 5xx) do NOT fail boot — they are recorded as ``"transient"``
+    in the Step 0 artifact and the pipeline proceeds; the in-pipeline
+    retry layer + u2 operational classifier handle them downstream.
+    """
+
+
+def _run_step0_ai_preflight() -> dict:
+    """IMP-92 u4 — Boot-time AI fallback preflight ping (gated).
+
+    When ``settings.ai_fallback_enabled`` is False (default), returns
+    ``{"status": "skipped", "reason": "ai_fallback_disabled", ...}``
+    without instantiating ``anthropic.Anthropic`` — preserves the PZ-1
+    AI=0 normal path and the ``feedback_demo_env_toggle_policy``
+    default-OFF contract (no API call on normal runs).
+
+    When enabled, issues a single 1-token Anthropic ``messages.create``
+    to validate the configured ``(ai_fallback_model, anthropic_api_key)``
+    pair. Persistent setup errors raise ``Step0PreflightError`` so the
+    pipeline fails fast at boot rather than at first AI repair attempt.
+    Transient errors are recorded as ``"transient"`` and the pipeline
+    continues.
+
+    Setup errors (fail-fast):
+      * ``anthropic.AuthenticationError`` (401)     — invalid API key
+      * ``anthropic.PermissionDeniedError`` (403)   — billing / permission
+      * ``anthropic.NotFoundError`` (404)           — invalid model ID
+      * generic ``anthropic.APIStatusError`` (402)  — billing / payment
+        required (Anthropic surfaces 402 without a typed subclass;
+        dispatched here by HTTP status code, mirroring u2
+        ``classify_operational_error``).
+
+    Transient (record + continue):
+      * ``anthropic.RateLimitError`` (429)
+      * ``anthropic.InternalServerError`` (5xx)
+      * generic ``anthropic.APIStatusError`` with HTTP 429 / 5xx
+    """
+    import anthropic
+
+    from src.config import settings as _settings
+
+    if not _settings.ai_fallback_enabled:
+        return {
+            "status": "skipped",
+            "reason": "ai_fallback_disabled",
+            "model": _settings.ai_fallback_model,
+        }
+    try:
+        client = anthropic.Anthropic(
+            api_key=_settings.anthropic_api_key,
+            timeout=_settings.ai_fallback_timeout_s,
+        )
+        client.messages.create(
+            model=_settings.ai_fallback_model,
+            max_tokens=1,
+            messages=[{"role": "user", "content": "ping"}],
+        )
+    except (
+        anthropic.AuthenticationError,
+        anthropic.PermissionDeniedError,
+        anthropic.NotFoundError,
+    ) as exc:
+        raise Step0PreflightError(
+            f"Anthropic API preflight failed for model "
+            f"{_settings.ai_fallback_model!r}: "
+            f"{type(exc).__name__}: {exc}. "
+            "Check ANTHROPIC_API_KEY / ai_fallback_model in .env."
+        ) from exc
+    except (anthropic.RateLimitError, anthropic.InternalServerError) as exc:
+        return {
+            "status": "transient",
+            "model": _settings.ai_fallback_model,
+            "transient_error": f"{type(exc).__name__}: {exc}",
+        }
+    except anthropic.APIStatusError as exc:
+        # IMP-92 u4 — fall back to HTTP status code dispatch when the SDK
+        # surfaces a setup error as the generic ``APIStatusError`` instead
+        # of a typed subclass. Mirrors u2 ``classify_operational_error``
+        # so HTTP 402 (Payment Required / billing) becomes a fail-fast
+        # Step0PreflightError, matching the issue body's explicit
+        # operational contract.
+        status_code = getattr(exc, "status_code", None)
+        if status_code is None:
+            status_code = getattr(getattr(exc, "response", None), "status_code", None)
+        if status_code == 429 or (status_code is not None and 500 <= status_code < 600):
+            return {
+                "status": "transient",
+                "model": _settings.ai_fallback_model,
+                "transient_error": f"{type(exc).__name__}: {exc}",
+            }
+        raise Step0PreflightError(
+            f"Anthropic API preflight failed for model "
+            f"{_settings.ai_fallback_model!r}: "
+            f"HTTP {status_code} {type(exc).__name__}: {exc}. "
+            "Check ANTHROPIC_API_KEY / ai_fallback_model in .env."
+        ) from exc
+    return {
+        "status": "passed",
+        "model": _settings.ai_fallback_model,
+    }
+
+
 def run_phase_z2_mvp1(
    mdx_path: Path,
    run_id: Optional[str] = None,
@@ -3629,6 +3751,10 @@ def run_phase_z2_mvp1(
    print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}")

    # ─── Step 0: 사전 준비 (precondition snapshot) ───
+    # IMP-92 u4 — boot-time AI fallback preflight (gated on
+    # settings.ai_fallback_enabled; default OFF = skipped, no API call).
+    # Persistent setup errors raise Step0PreflightError before Step 1.
+    ai_preflight = _run_step0_ai_preflight()
    _write_step_artifact(
        run_dir, 0, "preconditions",
        data={
@@ -3639,6 +3765,7 @@ def run_phase_z2_mvp1(
            "frame_contracts_template_ids": sorted(load_frame_contracts().keys()),
            "v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS,
            "mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES),
+            "ai_preflight": ai_preflight,
        },
        step_status="partial",
        pipeline_path_connected=True,