diff --git a/Front/client/src/services/designAgentApi.ts b/Front/client/src/services/designAgentApi.ts index f6a9240..272a0a3 100644 --- a/Front/client/src/services/designAgentApi.ts +++ b/Front/client/src/services/designAgentApi.ts @@ -235,6 +235,15 @@ export interface AiRepairStatus { unsupported_kind: number; error: number; }; + // IMP-92 u3 — per-kind operational error aggregates plumbed from Step 12 + // (u2 classify_operational_error). Optional for backward compatibility + // with pre-u3 payloads — u5 formatter treats absence as silent. + api_error_kinds?: { + quota: number; + billing: number; + auth: number; + other: number; + }; unsupported_kind_records: Array<{ unit_index?: number | null; source_section_ids: string[]; @@ -244,6 +253,8 @@ export interface AiRepairStatus { unit_index?: number | null; source_section_ids: string[]; error: string; + // IMP-92 u3 — per-record operational error kind (quota|billing|auth|other|null). + api_error_kind?: string | null; }>; coverage_status: string; dropped_section_ids: string[]; @@ -267,23 +278,32 @@ export interface RunMeta { ai_repair_status: AiRepairStatus | null; } +// IMP-92 u5 — Operational-only AI repair message formatter. +// +// Per the #84 operational-vs-non-operational replacement-plan contract, this +// returns a user-visible toast string ONLY when ai_repair_status carries one +// of the three actionable Anthropic API error kinds plumbed by u3 +// (quota / billing / auth). Non-operational AI failures (validation, +// coverage_violated, unsupported_kind, or generic "other" API errors) return +// null so the auto-pipeline stays silent per feedback_auto_pipeline_first. +// Messages mirror the issue body copy contract exactly (429/402/401 → +// quota/billing/auth Korean strings). export function formatAiRepairHumanReviewMessage( ai: AiRepairStatus | null | undefined, ): string | null { - if (!ai || !ai.human_review_required) return null; - if (ai.status === "error") { - const n = ai.counts?.error ?? ai.error_records?.length ?? 0; - return `AI 재구성 호출 실패 (${n}건) — 다른 frame 선택 또는 수동 편집 필요`; + if (!ai) return null; + const kinds = ai.api_error_kinds; + if (!kinds) return null; + if (kinds.quota > 0) { + return `API quota 부족 — 충전 필요 (${kinds.quota}건)`; } - if (ai.status === "coverage_violated") { - const dropped = (ai.dropped_section_ids || []).join(", "); - return `AI 재구성 후 콘텐츠 누락 (dropped: ${dropped || "?"}) — 다른 frame 선택 또는 수동 편집 필요`; + if (kinds.billing > 0) { + return `API billing 문제 — 결제 정보 확인 (${kinds.billing}건)`; } - if (ai.status === "unsupported_kind") { - const n = ai.counts?.unsupported_kind ?? ai.unsupported_kind_records?.length ?? 0; - return `AI 제안 형식 미지원 (${n}건) — 다른 frame 선택 또는 수동 편집 필요`; + if (kinds.auth > 0) { + return `API key 무효 — .env 확인 (${kinds.auth}건)`; } - return `AI 재구성 human_review 필요 (status: ${ai.status})`; + return null; } export interface LoadRunResult { diff --git a/Front/client/tests/imp47b_human_review_toast.test.tsx b/Front/client/tests/imp47b_human_review_toast.test.tsx index 712e3cf..51d6a9d 100644 --- a/Front/client/tests/imp47b_human_review_toast.test.tsx +++ b/Front/client/tests/imp47b_human_review_toast.test.tsx @@ -1,20 +1,28 @@ -// IMP-47B u11 — Frontend ai_repair_status notification surfacing. +// IMP-92 u5 — Frontend AI repair operational-only formatter test surface. // -// Scope (Stage 2 unit u11 contract): -// 1) loadRun → RunMeta.ai_repair_status exposes the u8 step20 payload. -// 2) formatAiRepairHumanReviewMessage(...) returns user-facing notification -// text on the three failure axes (error / coverage_violated / -// unsupported_kind) and returns null on success / no-AI paths. +// Scope (Stage 2 unit u5 contract): +// 1) formatAiRepairHumanReviewMessage(...) surfaces a user-facing toast +// ONLY on the three operational Anthropic API error kinds (quota / +// billing / auth) classified by Step 12 u2 +// (classify_operational_error) and aggregated through u3 +// ai_repair_status.api_error_kinds. +// 2) Non-operational AI failures (validation / coverage_violated / +// unsupported_kind / generic "other") return null so the +// auto-pipeline stays silent per feedback_auto_pipeline_first and +// the #84 operational-vs-non-operational replacement-plan contract. +// 3) Replaces the prior IMP-47B u11 surface — previously rendered toasts +// for error / coverage_violated / unsupported_kind. After IMP-92 the +// ONLY operational reaches the user; non-operational stays silent. // // Pure-function unit test (no React Testing Library required — vitest is // already in devDependencies; @testing-library/* is NOT installed). The -// Home.tsx wiring is a 2-line site that calls this helper after -// setRunMeta(...); covering the helper covers the user-visible message text -// directly without DOM rendering. +// Home.tsx wiring is a 2-line site (`Home.tsx:438`) that calls this helper +// after `setRunMeta(...)`; covering the helper covers the user-visible +// message text directly without DOM rendering. // -// File extension is `.tsx` per Stage 2 unit contract path; no JSX is required -// for these assertions but the extension allows future RTL-based tests to -// land here without renaming. +// The test file path is preserved from IMP-47B u11 (Stage 2 plan +// `Front/client/tests/imp47b_human_review_toast.test.tsx`); the assertions +// inside reflect the IMP-92 u5 operational-only contract. import { describe, it, expect } from "vitest"; import { @@ -23,7 +31,7 @@ import { } from "../src/services/designAgentApi"; const baseCounts = { - total: 1, + total: 0, applied: 0, no_proposal: 0, no_zone_match: 0, @@ -31,16 +39,19 @@ const baseCounts = { error: 0, }; -describe("formatAiRepairHumanReviewMessage (IMP-47B u11)", () => { - it("returns null when ai_repair_status is null (legacy / pre-Step12 abort)", () => { +const zeroKinds = { quota: 0, billing: 0, auth: 0, other: 0 }; + +describe("formatAiRepairHumanReviewMessage (IMP-92 u5 — operational-only)", () => { + it("returns null when ai_repair_status is null / undefined", () => { expect(formatAiRepairHumanReviewMessage(null)).toBeNull(); expect(formatAiRepairHumanReviewMessage(undefined)).toBeNull(); }); - it("returns null when human_review_required=false (success / no-AI path)", () => { + it("returns null on success / no-AI path (no operational kind present)", () => { const ok: AiRepairStatus = { status: "ok", - counts: { ...baseCounts, total: 0 }, + counts: { ...baseCounts }, + api_error_kinds: { ...zeroKinds }, unsupported_kind_records: [], error_records: [], coverage_status: "ok", @@ -57,47 +68,127 @@ describe("formatAiRepairHumanReviewMessage (IMP-47B u11)", () => { expect(formatAiRepairHumanReviewMessage(applied)).toBeNull(); }); - it("surfaces AI call failures with count + frame/manual guidance", () => { - const errored: AiRepairStatus = { + it("surfaces quota operational alert (Anthropic 429 / RateLimitError)", () => { + const ai: AiRepairStatus = { status: "error", counts: { ...baseCounts, total: 2, error: 2 }, + api_error_kinds: { quota: 2, billing: 0, auth: 0, other: 0 }, unsupported_kind_records: [], error_records: [ - { unit_index: 0, source_section_ids: ["03-1"], error: "timeout" }, - { unit_index: 1, source_section_ids: ["03-2"], error: "validation" }, + { + unit_index: 0, + source_section_ids: ["03-1"], + error: "RateLimitError: rate_limit_exceeded", + api_error_kind: "quota", + }, + { + unit_index: 1, + source_section_ids: ["03-2"], + error: "RateLimitError: rate_limit_exceeded", + api_error_kind: "quota", + }, ], coverage_status: "ok", dropped_section_ids: [], human_review_required: true, }; - const msg = formatAiRepairHumanReviewMessage(errored); + const msg = formatAiRepairHumanReviewMessage(ai); expect(msg).not.toBeNull(); - expect(msg).toContain("AI 재구성 호출 실패"); + expect(msg).toContain("API quota"); + expect(msg).toContain("충전 필요"); expect(msg).toContain("2"); - expect(msg).toContain("다른 frame 선택 또는 수동 편집 필요"); }); - it("surfaces coverage violations with the dropped section ids", () => { - const dropped: AiRepairStatus = { + it("surfaces billing operational alert (Anthropic 402 / PermissionDeniedError)", () => { + const ai: AiRepairStatus = { + status: "error", + counts: { ...baseCounts, total: 1, error: 1 }, + api_error_kinds: { quota: 0, billing: 1, auth: 0, other: 0 }, + unsupported_kind_records: [], + error_records: [ + { + unit_index: 0, + source_section_ids: ["03-1"], + error: "PermissionDeniedError: insufficient credits", + api_error_kind: "billing", + }, + ], + coverage_status: "ok", + dropped_section_ids: [], + human_review_required: true, + }; + const msg = formatAiRepairHumanReviewMessage(ai); + expect(msg).not.toBeNull(); + expect(msg).toContain("API billing"); + expect(msg).toContain("결제 정보 확인"); + expect(msg).toContain("1"); + }); + + it("surfaces auth operational alert (Anthropic 401 / AuthenticationError)", () => { + const ai: AiRepairStatus = { + status: "error", + counts: { ...baseCounts, total: 1, error: 1 }, + api_error_kinds: { quota: 0, billing: 0, auth: 1, other: 0 }, + unsupported_kind_records: [], + error_records: [ + { + unit_index: 0, + source_section_ids: ["03-1"], + error: "AuthenticationError: invalid x-api-key", + api_error_kind: "auth", + }, + ], + coverage_status: "ok", + dropped_section_ids: [], + human_review_required: true, + }; + const msg = formatAiRepairHumanReviewMessage(ai); + expect(msg).not.toBeNull(); + expect(msg).toContain("API key 무효"); + expect(msg).toContain(".env"); + expect(msg).toContain("1"); + }); + + it("returns null on generic non-operational 'other' API error (silent)", () => { + const ai: AiRepairStatus = { + status: "error", + counts: { ...baseCounts, total: 1, error: 1 }, + api_error_kinds: { quota: 0, billing: 0, auth: 0, other: 1 }, + unsupported_kind_records: [], + error_records: [ + { + unit_index: 0, + source_section_ids: ["03-1"], + error: "ValidationError: proposal failed schema", + api_error_kind: "other", + }, + ], + coverage_status: "ok", + dropped_section_ids: [], + human_review_required: true, + }; + expect(formatAiRepairHumanReviewMessage(ai)).toBeNull(); + }); + + it("returns null on coverage_violated (non-operational, silent)", () => { + const ai: AiRepairStatus = { status: "coverage_violated", counts: { ...baseCounts, total: 1, applied: 1 }, + api_error_kinds: { ...zeroKinds }, unsupported_kind_records: [], error_records: [], coverage_status: "violated", dropped_section_ids: ["03-2"], human_review_required: true, }; - const msg = formatAiRepairHumanReviewMessage(dropped); - expect(msg).not.toBeNull(); - expect(msg).toContain("콘텐츠 누락"); - expect(msg).toContain("03-2"); - expect(msg).toContain("다른 frame 선택 또는 수동 편집 필요"); + expect(formatAiRepairHumanReviewMessage(ai)).toBeNull(); }); - it("surfaces unsupported proposal kinds with the unsupported count", () => { - const unsupported: AiRepairStatus = { + it("returns null on unsupported_kind (non-operational, silent)", () => { + const ai: AiRepairStatus = { status: "unsupported_kind", counts: { ...baseCounts, total: 1, unsupported_kind: 1 }, + api_error_kinds: { ...zeroKinds }, unsupported_kind_records: [ { unit_index: 0, @@ -110,26 +201,57 @@ describe("formatAiRepairHumanReviewMessage (IMP-47B u11)", () => { dropped_section_ids: [], human_review_required: true, }; - const msg = formatAiRepairHumanReviewMessage(unsupported); - expect(msg).not.toBeNull(); - expect(msg).toContain("AI 제안 형식 미지원"); - expect(msg).toContain("1"); - expect(msg).toContain("다른 frame 선택 또는 수동 편집 필요"); + expect(formatAiRepairHumanReviewMessage(ai)).toBeNull(); }); - it("falls back to a generic human_review message on unknown status enums", () => { - const future: AiRepairStatus = { - status: "future_axis_not_yet_mapped", - counts: { ...baseCounts, total: 0 }, + it("returns null on legacy ai_repair_status without api_error_kinds (pre-u3 runs)", () => { + // Backward-compat: payloads emitted before u3 plumbing landed don't + // carry api_error_kinds. Operational-only contract treats the absence + // as "no operational signal" → silent (no toast). + const legacy: AiRepairStatus = { + status: "error", + counts: { ...baseCounts, total: 1, error: 1 }, + // api_error_kinds intentionally omitted unsupported_kind_records: [], - error_records: [], + error_records: [ + { unit_index: 0, source_section_ids: ["03-1"], error: "timeout" }, + ], coverage_status: "ok", dropped_section_ids: [], human_review_required: true, }; - const msg = formatAiRepairHumanReviewMessage(future); + expect(formatAiRepairHumanReviewMessage(legacy)).toBeNull(); + }); + + it("prioritises quota when multiple operational kinds co-occur", () => { + // Defensive: a run that accumulated quota + billing errors across + // multiple AI repair attempts surfaces the quota line first (the + // most-frequently actionable per the issue body ordering). + const ai: AiRepairStatus = { + status: "error", + counts: { ...baseCounts, total: 2, error: 2 }, + api_error_kinds: { quota: 1, billing: 1, auth: 0, other: 0 }, + unsupported_kind_records: [], + error_records: [ + { + unit_index: 0, + source_section_ids: ["03-1"], + error: "RateLimitError", + api_error_kind: "quota", + }, + { + unit_index: 1, + source_section_ids: ["03-2"], + error: "PermissionDeniedError", + api_error_kind: "billing", + }, + ], + coverage_status: "ok", + dropped_section_ids: [], + human_review_required: true, + }; + const msg = formatAiRepairHumanReviewMessage(ai); expect(msg).not.toBeNull(); - expect(msg).toContain("human_review"); - expect(msg).toContain("future_axis_not_yet_mapped"); + expect(msg).toContain("API quota"); }); }); diff --git a/src/config.py b/src/config.py index fec6a38..772008e 100644 --- a/src/config.py +++ b/src/config.py @@ -17,7 +17,7 @@ class Settings(BaseSettings): # IMP-33 u1 — AI fallback policy. Fallback-path only; normal path AI=0. # Defaults locked by Stage 2 plan; do NOT inline literals downstream. ai_fallback_enabled: bool = False - ai_fallback_model: str = "claude-opus-4-6-20250415" + ai_fallback_model: str = "claude-opus-4-7" ai_fallback_timeout_s: float = 60.0 ai_fallback_max_retries: int = 3 ai_fallback_backoff_base_s: float = 1.0 diff --git a/src/phase_z2_ai_fallback/client.py b/src/phase_z2_ai_fallback/client.py index bc126a3..61450b3 100644 --- a/src/phase_z2_ai_fallback/client.py +++ b/src/phase_z2_ai_fallback/client.py @@ -31,6 +31,55 @@ _TRANSIENT_ERRORS: tuple[type[BaseException], ...] = ( # Output cap is an Anthropic API requirement, not a policy knob (u1). _MAX_OUTPUT_TOKENS = 4096 +# IMP-92 u2 — Anthropic SDK exception → operational error kind classifier. +# Stamped onto Step 12 AI repair records (api_error_kind) so the frontend +# operational alert formatter can surface quota / billing / auth to users +# while keeping non-operational ("other") failures silent. The classifier +# is type-based (not string parsing) and the four kinds are the only +# values frontend operational formatter is allowed to render. +_OPERATIONAL_ERROR_KIND_QUOTA = "quota" +_OPERATIONAL_ERROR_KIND_BILLING = "billing" +_OPERATIONAL_ERROR_KIND_AUTH = "auth" +_OPERATIONAL_ERROR_KIND_OTHER = "other" + + +def classify_operational_error(exc: BaseException) -> str: + """Return the operational error kind for an Anthropic SDK exception. + + Dispatch combines SDK exception type with the HTTP status code so the + issue body's explicit operational contract (429 quota / 402 billing / + 401 auth) is honoured even when the SDK surfaces a 402 as the generic + ``anthropic.APIStatusError`` rather than a typed subclass: + + * ``anthropic.RateLimitError`` OR HTTP 429 → ``"quota"`` + * ``anthropic.PermissionDeniedError`` OR HTTP 402 → ``"billing"`` + (Anthropic Payment Required surfaces as 402; PermissionDenied/403 + is the SDK-typed billing/permission surface) + * ``anthropic.AuthenticationError`` OR HTTP 401 → ``"auth"`` + * everything else → ``"other"`` (silent on UI) + + The frontend formatter renders quota / billing / auth and returns + ``None`` for ``"other"`` so non-operational AI failures stay silent + per the #84 replacement-plan contract. + """ + if isinstance(exc, anthropic.RateLimitError): + return _OPERATIONAL_ERROR_KIND_QUOTA + if isinstance(exc, anthropic.PermissionDeniedError): + return _OPERATIONAL_ERROR_KIND_BILLING + if isinstance(exc, anthropic.AuthenticationError): + return _OPERATIONAL_ERROR_KIND_AUTH + if isinstance(exc, anthropic.APIStatusError): + status_code = getattr(exc, "status_code", None) + if status_code is None: + status_code = getattr(getattr(exc, "response", None), "status_code", None) + if status_code == 429: + return _OPERATIONAL_ERROR_KIND_QUOTA + if status_code == 402: + return _OPERATIONAL_ERROR_KIND_BILLING + if status_code == 401: + return _OPERATIONAL_ERROR_KIND_AUTH + return _OPERATIONAL_ERROR_KIND_OTHER + class AiFallbackBudgetExceeded(RuntimeError): """Per-run AI call budget (u1 ai_fallback_budget_per_run) exhausted.""" diff --git a/src/phase_z2_ai_fallback/step12.py b/src/phase_z2_ai_fallback/step12.py index b2406ef..7915e2f 100644 --- a/src/phase_z2_ai_fallback/step12.py +++ b/src/phase_z2_ai_fallback/step12.py @@ -56,6 +56,7 @@ import hashlib import json from typing import Any, Callable, Iterable +from src.phase_z2_ai_fallback.client import classify_operational_error from src.phase_z2_ai_fallback.router import route_ai_fallback from src.phase_z2_ai_fallback.signature import bucket_char_count, build_signature @@ -96,6 +97,7 @@ def gather_step12_ai_repair_proposals( "skip_reason": str | None, "proposal": dict | None, "error": str | None, + "api_error_kind": str | None, # IMP-92 u2 (quota|billing|auth|other) "cache_key": str | None, # IMP-46 u4 "fingerprints": dict | None, # IMP-46 u4 } @@ -130,6 +132,7 @@ def gather_step12_ai_repair_proposals( "skip_reason": None, "proposal": None, "error": None, + "api_error_kind": None, "cache_key": None, "fingerprints": None, } @@ -205,6 +208,7 @@ def gather_step12_ai_repair_proposals( except Exception as exc: # noqa: BLE001 — record + continue, no AI re-raise record["ai_called"] = True record["error"] = f"{type(exc).__name__}: {exc}" + record["api_error_kind"] = classify_operational_error(exc) records.append(record) continue if proposal is None: diff --git a/src/phase_z2_pipeline.py b/src/phase_z2_pipeline.py index 1ba04cf..be46bc4 100644 --- a/src/phase_z2_pipeline.py +++ b/src/phase_z2_pipeline.py @@ -789,6 +789,14 @@ def _summarize_ai_repair_status( frontend (u11) can surface a notification per the IMP-47B policy ("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification"). Pure: no IO, no AI call. + + IMP-92 u3 — propagate ``api_error_kind`` (quota / billing / auth / + other) stamped by Step 12 (u2 ``classify_operational_error``) through + ``ai_repair_status`` so the frontend operational formatter can route + only operational kinds (quota / billing / auth) to user-visible + alerts. ``api_error_kinds`` aggregates counts by kind at the summary + level; ``error_records[i]["api_error_kind"]`` retains the per-record + kind for unit-level surfacing. """ counts = { "total": len(ai_repair_records), @@ -798,15 +806,20 @@ def _summarize_ai_repair_status( "unsupported_kind": 0, "error": 0, } + api_error_kinds = {"quota": 0, "billing": 0, "auth": 0, "other": 0} unsupported_records: list[dict] = [] error_records: list[dict] = [] for record in ai_repair_records: if record.get("error"): counts["error"] += 1 + kind = record.get("api_error_kind") + if kind in api_error_kinds: + api_error_kinds[kind] += 1 error_records.append({ "unit_index": record.get("unit_index"), "source_section_ids": list(record.get("source_section_ids") or []), "error": record.get("error"), + "api_error_kind": kind, }) continue apply_status = record.get("apply_status") or "" @@ -838,6 +851,7 @@ def _summarize_ai_repair_status( return { "status": status, "counts": counts, + "api_error_kinds": api_error_kinds, "unsupported_kind_records": unsupported_records, "error_records": error_records, "coverage_status": coverage_status, @@ -3588,6 +3602,114 @@ def _build_application_plan_unit( # ─── Main entry ──────────────────────────────────────────────── + +class Step0PreflightError(RuntimeError): + """IMP-92 u4 — Step 0 AI preflight fail-fast surface. + + Raised at boot when ``settings.ai_fallback_enabled`` is True and the + Anthropic API ping reveals a persistent setup problem (invalid API + key, invalid model ID, billing / permission denied). Transient errors + (429 / 5xx) do NOT fail boot — they are recorded as ``"transient"`` + in the Step 0 artifact and the pipeline proceeds; the in-pipeline + retry layer + u2 operational classifier handle them downstream. + """ + + +def _run_step0_ai_preflight() -> dict: + """IMP-92 u4 — Boot-time AI fallback preflight ping (gated). + + When ``settings.ai_fallback_enabled`` is False (default), returns + ``{"status": "skipped", "reason": "ai_fallback_disabled", ...}`` + without instantiating ``anthropic.Anthropic`` — preserves the PZ-1 + AI=0 normal path and the ``feedback_demo_env_toggle_policy`` + default-OFF contract (no API call on normal runs). + + When enabled, issues a single 1-token Anthropic ``messages.create`` + to validate the configured ``(ai_fallback_model, anthropic_api_key)`` + pair. Persistent setup errors raise ``Step0PreflightError`` so the + pipeline fails fast at boot rather than at first AI repair attempt. + Transient errors are recorded as ``"transient"`` and the pipeline + continues. + + Setup errors (fail-fast): + * ``anthropic.AuthenticationError`` (401) — invalid API key + * ``anthropic.PermissionDeniedError`` (403) — billing / permission + * ``anthropic.NotFoundError`` (404) — invalid model ID + * generic ``anthropic.APIStatusError`` (402) — billing / payment + required (Anthropic surfaces 402 without a typed subclass; + dispatched here by HTTP status code, mirroring u2 + ``classify_operational_error``). + + Transient (record + continue): + * ``anthropic.RateLimitError`` (429) + * ``anthropic.InternalServerError`` (5xx) + * generic ``anthropic.APIStatusError`` with HTTP 429 / 5xx + """ + import anthropic + + from src.config import settings as _settings + + if not _settings.ai_fallback_enabled: + return { + "status": "skipped", + "reason": "ai_fallback_disabled", + "model": _settings.ai_fallback_model, + } + try: + client = anthropic.Anthropic( + api_key=_settings.anthropic_api_key, + timeout=_settings.ai_fallback_timeout_s, + ) + client.messages.create( + model=_settings.ai_fallback_model, + max_tokens=1, + messages=[{"role": "user", "content": "ping"}], + ) + except ( + anthropic.AuthenticationError, + anthropic.PermissionDeniedError, + anthropic.NotFoundError, + ) as exc: + raise Step0PreflightError( + f"Anthropic API preflight failed for model " + f"{_settings.ai_fallback_model!r}: " + f"{type(exc).__name__}: {exc}. " + "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." + ) from exc + except (anthropic.RateLimitError, anthropic.InternalServerError) as exc: + return { + "status": "transient", + "model": _settings.ai_fallback_model, + "transient_error": f"{type(exc).__name__}: {exc}", + } + except anthropic.APIStatusError as exc: + # IMP-92 u4 — fall back to HTTP status code dispatch when the SDK + # surfaces a setup error as the generic ``APIStatusError`` instead + # of a typed subclass. Mirrors u2 ``classify_operational_error`` + # so HTTP 402 (Payment Required / billing) becomes a fail-fast + # Step0PreflightError, matching the issue body's explicit + # operational contract. + status_code = getattr(exc, "status_code", None) + if status_code is None: + status_code = getattr(getattr(exc, "response", None), "status_code", None) + if status_code == 429 or (status_code is not None and 500 <= status_code < 600): + return { + "status": "transient", + "model": _settings.ai_fallback_model, + "transient_error": f"{type(exc).__name__}: {exc}", + } + raise Step0PreflightError( + f"Anthropic API preflight failed for model " + f"{_settings.ai_fallback_model!r}: " + f"HTTP {status_code} {type(exc).__name__}: {exc}. " + "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." + ) from exc + return { + "status": "passed", + "model": _settings.ai_fallback_model, + } + + def run_phase_z2_mvp1( mdx_path: Path, run_id: Optional[str] = None, @@ -3629,6 +3751,10 @@ def run_phase_z2_mvp1( print(f"[Phase Z-2 MVP-1.5b] start — mdx={mdx_path.name}, run_id={run_id}") # ─── Step 0: 사전 준비 (precondition snapshot) ─── + # IMP-92 u4 — boot-time AI fallback preflight (gated on + # settings.ai_fallback_enabled; default OFF = skipped, no API call). + # Persistent setup errors raise Step0PreflightError before Step 1. + ai_preflight = _run_step0_ai_preflight() _write_step_artifact( run_dir, 0, "preconditions", data={ @@ -3639,6 +3765,7 @@ def run_phase_z2_mvp1( "frame_contracts_template_ids": sorted(load_frame_contracts().keys()), "v4_label_to_phase_z_status": V4_LABEL_TO_PHASE_Z_STATUS, "mvp1_allowed_statuses": sorted(MVP1_ALLOWED_STATUSES), + "ai_preflight": ai_preflight, }, step_status="partial", pipeline_path_connected=True, diff --git a/tests/phase_z2/test_pipeline_step0_preflight.py b/tests/phase_z2/test_pipeline_step0_preflight.py new file mode 100644 index 0000000..c98bb81 --- /dev/null +++ b/tests/phase_z2/test_pipeline_step0_preflight.py @@ -0,0 +1,214 @@ +"""IMP-92 u4 — Step 0 AI preflight unit tests. + +Scope (Stage 2 plan, u4): + - ``settings.ai_fallback_enabled=False`` → preflight short-circuits to + ``"skipped"`` without instantiating ``anthropic.Anthropic`` (PZ-1 + AI=0 normal path + ``feedback_demo_env_toggle_policy`` default-OFF). + - ``settings.ai_fallback_enabled=True`` + valid (key, model) → preflight + returns ``"passed"`` after a 1-token ``messages.create`` ping. + - Persistent setup errors (Authentication / PermissionDenied / + NotFound) raise ``Step0PreflightError`` so boot fails fast. + - Transient errors (RateLimit / InternalServer) are recorded as + ``"transient"`` without failing boot. + +Cross-references: + - u1 default model literal: ``src/config.py:20`` + + ``tests/test_phase_z2_ai_fallback_config.py:5,31`` + - u2 SDK operational classifier: + ``src/phase_z2_ai_fallback/client.py:46`` + + ``tests/phase_z2_ai_fallback/test_step12.py`` + - u3 ``api_error_kind`` summary plumbing: + ``src/phase_z2_pipeline.py:_summarize_ai_repair_status`` + + ``tests/test_imp47b_failure_surface.py`` +""" +from __future__ import annotations + +from types import SimpleNamespace +from unittest.mock import MagicMock + +import anthropic +import httpx +import pytest + +from src import phase_z2_pipeline as pipeline_mod +from src.config import settings + + +@pytest.fixture(autouse=True) +def _restore_settings(): + snapshot = settings.model_dump() + yield + for key, value in snapshot.items(): + setattr(settings, key, value) + + +def _ok_response() -> SimpleNamespace: + return SimpleNamespace(content=[SimpleNamespace(text="")]) + + +def _status_error( + cls: type[anthropic.APIStatusError], + status_code: int, + message: str, +) -> anthropic.APIStatusError: + req = httpx.Request("POST", "https://api.anthropic.com/v1/messages") + return cls( + message=message, + response=httpx.Response(status_code, request=req), + body=None, + ) + + +def test_preflight_skipped_when_disabled(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", False) + spy = MagicMock() + monkeypatch.setattr(anthropic, "Anthropic", spy) + result = pipeline_mod._run_step0_ai_preflight() + assert result["status"] == "skipped" + assert result["reason"] == "ai_fallback_disabled" + assert result["model"] == settings.ai_fallback_model + spy.assert_not_called() + + +def test_preflight_passed_when_enabled_with_valid_credentials( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.return_value = _ok_response() + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + result = pipeline_mod._run_step0_ai_preflight() + assert result == { + "status": "passed", + "model": settings.ai_fallback_model, + } + fake_client.messages.create.assert_called_once() + kwargs = fake_client.messages.create.call_args.kwargs + assert kwargs["model"] == settings.ai_fallback_model + assert kwargs["max_tokens"] == 1 + + +def test_preflight_fail_fast_on_invalid_api_key(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.AuthenticationError, 401, "invalid x-api-key" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + with pytest.raises(pipeline_mod.Step0PreflightError) as ei: + pipeline_mod._run_step0_ai_preflight() + assert "AuthenticationError" in str(ei.value) + + +def test_preflight_fail_fast_on_invalid_model(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.NotFoundError, 404, "model not found" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + with pytest.raises(pipeline_mod.Step0PreflightError) as ei: + pipeline_mod._run_step0_ai_preflight() + msg = str(ei.value) + assert "NotFoundError" in msg + assert settings.ai_fallback_model in msg + + +def test_preflight_fail_fast_on_billing_permission_denied( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.PermissionDeniedError, 403, "billing required" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + with pytest.raises(pipeline_mod.Step0PreflightError) as ei: + pipeline_mod._run_step0_ai_preflight() + assert "PermissionDeniedError" in str(ei.value) + + +def test_preflight_transient_rate_limit_does_not_fail_boot( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.RateLimitError, 429, "rate limited" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + result = pipeline_mod._run_step0_ai_preflight() + assert result["status"] == "transient" + assert result["model"] == settings.ai_fallback_model + assert "RateLimitError" in result["transient_error"] + + +def test_preflight_transient_internal_server_error_does_not_fail_boot( + monkeypatch: pytest.MonkeyPatch, +) -> None: + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.InternalServerError, 500, "upstream 500" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + result = pipeline_mod._run_step0_ai_preflight() + assert result["status"] == "transient" + assert "InternalServerError" in result["transient_error"] + + +def test_preflight_fail_fast_on_generic_billing_402( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """IMP-92 u4 — HTTP 402 (Payment Required) surfaces as the generic + ``anthropic.APIStatusError`` (no typed subclass). The preflight MUST + dispatch by status code and raise ``Step0PreflightError`` so a + billing setup problem fails boot fast, matching the issue body's + operational contract. + """ + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.APIStatusError, 402, "payment required" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + with pytest.raises(pipeline_mod.Step0PreflightError) as ei: + pipeline_mod._run_step0_ai_preflight() + msg = str(ei.value) + assert "402" in msg + assert settings.ai_fallback_model in msg + assert "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." in msg + + +def test_preflight_generic_status_429_treated_as_transient( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """IMP-92 u4 — a generic ``APIStatusError`` with HTTP 429 must follow + the same transient policy as the typed ``RateLimitError`` branch. + """ + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.APIStatusError, 429, "rate limited (generic)" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + result = pipeline_mod._run_step0_ai_preflight() + assert result["status"] == "transient" + assert "APIStatusError" in result["transient_error"] + + +def test_preflight_generic_status_5xx_treated_as_transient( + monkeypatch: pytest.MonkeyPatch, +) -> None: + """IMP-92 u4 — a generic ``APIStatusError`` with HTTP 5xx must follow + the same transient policy as the typed ``InternalServerError`` branch. + """ + monkeypatch.setattr(settings, "ai_fallback_enabled", True) + fake_client = MagicMock() + fake_client.messages.create.side_effect = _status_error( + anthropic.APIStatusError, 503, "upstream 503 (generic)" + ) + monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client) + result = pipeline_mod._run_step0_ai_preflight() + assert result["status"] == "transient" + assert "APIStatusError" in result["transient_error"] diff --git a/tests/phase_z2_ai_fallback/test_step12.py b/tests/phase_z2_ai_fallback/test_step12.py index 1ad16c6..8206a19 100644 --- a/tests/phase_z2_ai_fallback/test_step12.py +++ b/tests/phase_z2_ai_fallback/test_step12.py @@ -17,6 +17,9 @@ from dataclasses import dataclass, field from typing import Any from unittest.mock import MagicMock +import anthropic +import httpx + from src.phase_z2_ai_fallback import step12 as step12_mod from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind @@ -176,6 +179,9 @@ def test_router_exception_is_captured_per_record(monkeypatch): assert rec["ai_called"] is True assert rec["proposal"] is None assert rec["error"] == "RuntimeError: transient_boom" + # IMP-92 u2 — generic (non-Anthropic) exceptions classify as "other" + # so the frontend operational formatter stays silent for them. + assert rec["api_error_kind"] == "other" router.assert_called_once() @@ -405,6 +411,7 @@ def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch): "skip_reason", "proposal", "error", + "api_error_kind", "cache_key", "fingerprints", } @@ -602,3 +609,146 @@ def test_mixed_units_router_receives_fingerprints_only_for_ai_eligible(monkeypat # Skipped records carry None. assert recs[0]["fingerprints"] is None assert recs[1]["fingerprints"] is None + + +# --------------------------------------------------------------------------- +# IMP-92 u2 — Anthropic SDK exception → api_error_kind classification +# --------------------------------------------------------------------------- +# Step 12 stamps each AI-called record with api_error_kind so the frontend +# operational alert formatter can render quota / billing / auth surfaces +# while keeping "other" failures silent (the #84 replacement-plan contract). +# Classification is type-based (no string parsing); only AI-eligible units +# that actually hit ``route_ai_fallback`` and raise can produce a non-None +# api_error_kind. Skipped units (not_provisional / non-AI route) retain +# api_error_kind=None alongside cache_key/fingerprints=None. + + +def _anthropic_status_error( + error_cls: type[anthropic.APIStatusError], status_code: int +) -> anthropic.APIStatusError: + """Construct an Anthropic SDK status error suitable for side_effect. + + The SDK error constructors require ``response`` and ``body`` kwargs; an + ``httpx.Response`` bound to a stub request is the minimum that satisfies + isinstance dispatch in ``classify_operational_error``. + """ + request = httpx.Request("POST", "https://api.anthropic.com/v1/messages") + response = httpx.Response(status_code, request=request) + return error_cls("simulated", response=response, body=None) + + +def test_router_rate_limit_error_classifies_as_quota(monkeypatch): + """RateLimitError (HTTP 429) → api_error_kind='quota'.""" + err = _anthropic_status_error(anthropic.RateLimitError, 429) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "quota" + assert rec["error"].startswith("RateLimitError: ") + + +def test_router_permission_denied_classifies_as_billing(monkeypatch): + """PermissionDeniedError (HTTP 403) → api_error_kind='billing'.""" + err = _anthropic_status_error(anthropic.PermissionDeniedError, 403) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "billing" + assert rec["error"].startswith("PermissionDeniedError: ") + + +def test_router_payment_required_classifies_as_billing(monkeypatch): + """Generic APIStatusError with HTTP 402 → api_error_kind='billing'. + + The Anthropic SDK has no dedicated PaymentRequired subclass; a 402 + response surfaces as the base ``APIStatusError``. The issue body's + explicit operational contract requires 402 to render as billing, + so the classifier must fall through to ``status_code`` dispatch when + the typed subclass branches miss. + """ + err = _anthropic_status_error(anthropic.APIStatusError, 402) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "billing" + assert rec["error"].startswith("APIStatusError: ") + + +def test_router_authentication_error_classifies_as_auth(monkeypatch): + """AuthenticationError (HTTP 401) → api_error_kind='auth'.""" + err = _anthropic_status_error(anthropic.AuthenticationError, 401) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "auth" + assert rec["error"].startswith("AuthenticationError: ") + + +def test_router_bad_request_classifies_as_other(monkeypatch): + """BadRequestError (HTTP 400) is non-operational → api_error_kind='other'.""" + err = _anthropic_status_error(anthropic.BadRequestError, 400) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "other" + + +def test_router_internal_server_error_classifies_as_other(monkeypatch): + """InternalServerError (HTTP 5xx) is non-operational → api_error_kind='other'.""" + err = _anthropic_status_error(anthropic.InternalServerError, 500) + router = MagicMock(side_effect=err) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["api_error_kind"] == "other" + + +def test_router_success_leaves_api_error_kind_none(monkeypatch): + """Successful proposal record keeps api_error_kind=None (no error to classify).""" + proposal = AiFallbackProposal( + proposal_kind=ProposalKind.PARTIAL_OVERRIDES, + payload={"slots": {"s": "x"}}, + rationale="r", + ) + router = MagicMock(return_value=proposal) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["ai_called"] is True + assert rec["error"] is None + assert rec["api_error_kind"] is None + + +def test_skipped_records_keep_api_error_kind_none(monkeypatch): + """Non-AI-eligible records never see the router, so api_error_kind stays None.""" + monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None)) + units = [ + FakeUnit(label="restructure", provisional=False), # not_provisional + FakeUnit(label="light_edit", provisional=True), # non-AI route + FakeUnit(label="reject", provisional=True), # legacy non-AI route + ] + recs = _call(units) + for rec in recs: + assert rec["api_error_kind"] is None + assert rec["error"] is None + + +def test_router_short_circuit_keeps_api_error_kind_none(monkeypatch): + """Router short-circuit (None return) is not an error path → api_error_kind=None.""" + router = MagicMock(return_value=None) + monkeypatch.setattr(step12_mod, "route_ai_fallback", router) + recs = _call([_ai_unit()]) + rec = recs[0] + assert rec["skip_reason"] == "router_short_circuit" + assert rec["api_error_kind"] is None diff --git a/tests/test_imp47b_failure_surface.py b/tests/test_imp47b_failure_surface.py index 8862214..a7ad4e8 100644 --- a/tests/test_imp47b_failure_surface.py +++ b/tests/test_imp47b_failure_surface.py @@ -26,13 +26,19 @@ def _record( apply_status: str | None = None, error: str | None = None, source_section_ids: list[str] | None = None, + api_error_kind: str | None = None, ) -> dict: - """Minimal Step 12 AI repair record stub — fields u8 reads.""" + """Minimal Step 12 AI repair record stub — fields u8 reads. + + IMP-92 u3 — ``api_error_kind`` is stamped by Step 12 (u2 classifier) + on the exception path; non-error paths leave it ``None``. + """ return { "unit_index": unit_index, "source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"], "apply_status": apply_status, "error": error, + "api_error_kind": api_error_kind, } @@ -45,7 +51,11 @@ _VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]} def test_empty_records_returns_ok_no_human_review(): """No AI work executed → status='ok', human_review_required=False. - The flag-off default (no provisional units) lands here.""" + The flag-off default (no provisional units) lands here. + + IMP-92 u3 — ``api_error_kinds`` aggregation is always present with + every kind initialised to 0 so the frontend operational formatter + can read the bucket structure unconditionally.""" result = _summarize_ai_repair_status([], _OK_COVERAGE) assert result["status"] == "ok" assert result["human_review_required"] is False @@ -53,6 +63,12 @@ def test_empty_records_returns_ok_no_human_review(): assert result["unsupported_kind_records"] == [] assert result["error_records"] == [] assert result["dropped_section_ids"] == [] + assert result["api_error_kinds"] == { + "quota": 0, + "billing": 0, + "auth": 0, + "other": 0, + } # ─── Case 2 : applied → status='applied', no human_review ─────────── @@ -102,7 +118,11 @@ def test_unsupported_kind_marks_human_review_required(): def test_gather_error_marks_status_error_with_records(): """``record['error']`` set means ``gather_step12_ai_repair_proposals`` caught a router exception (AI call / validator). status='error' - is the highest-priority failure axis.""" + is the highest-priority failure axis. + + IMP-92 u3 — non-Anthropic exception path leaves ``api_error_kind`` + as ``None``; the summary retains ``None`` per-record and does not + increment any operational kind bucket.""" records = [_record( unit_index=2, error="ValueError: missing slot 'title'", @@ -117,8 +137,74 @@ def test_gather_error_marks_status_error_with_records(): "unit_index": 2, "source_section_ids": ["MOCK_S2"], "error": "ValueError: missing slot 'title'", + "api_error_kind": None, } ] + assert result["api_error_kinds"] == { + "quota": 0, + "billing": 0, + "auth": 0, + "other": 0, + } + + +# ─── IMP-92 u3 : api_error_kind propagation + aggregation ─────────── + + +def test_api_error_kind_quota_propagates_to_summary_and_record(): + """Step 12 (u2) stamps ``api_error_kind='quota'`` on a 429 + Anthropic exception path. u8 must surface that kind per-record + and increment the ``quota`` bucket in ``api_error_kinds``.""" + records = [_record( + unit_index=3, + error="RateLimitError: 429", + source_section_ids=["MOCK_S3"], + api_error_kind="quota", + )] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "error" + assert result["human_review_required"] is True + assert result["error_records"] == [ + { + "unit_index": 3, + "source_section_ids": ["MOCK_S3"], + "error": "RateLimitError: 429", + "api_error_kind": "quota", + } + ] + assert result["api_error_kinds"] == { + "quota": 1, + "billing": 0, + "auth": 0, + "other": 0, + } + + +def test_api_error_kinds_aggregate_across_all_operational_axes(): + """Mixed batch — one of each operational kind (quota / billing / + auth / other). Aggregation must count each axis exactly once and + keep per-record kinds intact (order preserved).""" + records = [ + _record(unit_index=0, error="RateLimitError", api_error_kind="quota"), + _record(unit_index=1, error="PermissionDeniedError", api_error_kind="billing"), + _record(unit_index=2, error="AuthenticationError", api_error_kind="auth"), + _record(unit_index=3, error="BadRequestError", api_error_kind="other"), + ] + result = _summarize_ai_repair_status(records, _OK_COVERAGE) + assert result["status"] == "error" + assert result["counts"]["error"] == 4 + assert result["api_error_kinds"] == { + "quota": 1, + "billing": 1, + "auth": 1, + "other": 1, + } + assert [rec["api_error_kind"] for rec in result["error_records"]] == [ + "quota", + "billing", + "auth", + "other", + ] # ─── Case 5 : coverage violated → status='coverage_violated' ──────── diff --git a/tests/test_phase_z2_ai_fallback_config.py b/tests/test_phase_z2_ai_fallback_config.py index ee80048..0eac3ba 100644 --- a/tests/test_phase_z2_ai_fallback_config.py +++ b/tests/test_phase_z2_ai_fallback_config.py @@ -2,7 +2,7 @@ These defaults are the binding contract from Stage 2 plan (per-unit u1): - ai_fallback_enabled = False (master flag OFF; fallback path only) - - ai_fallback_model = "claude-opus-4-6-20250415" + - ai_fallback_model = "claude-opus-4-7" - ai_fallback_timeout_s = 60.0 - ai_fallback_max_retries = 3 - ai_fallback_backoff_base_s = 1.0 @@ -28,7 +28,7 @@ def test_ai_fallback_master_flag_default_off() -> None: def test_ai_fallback_model_default_locked() -> None: s = Settings() - assert s.ai_fallback_model == "claude-opus-4-6-20250415" + assert s.ai_fallback_model == "claude-opus-4-7" def test_ai_fallback_retry_timeout_backoff_defaults_locked() -> None: