feat(#92): IMP-92 u1~u5 AI fallback config validation (model ping + operational error classification)
Replaces #84 UI-noise removal plan with positive operational-alert contract. Five-axis stack lands together: (1) default model literal moved to current Opus-family ID, (2) Anthropic SDK error classifier mapping exceptions to quota/billing/auth/other, (3) api_error_kind plumbed through ai_repair_status summary + per-record retention, (4) Step 0 preflight ping gated under ai_fallback_enabled (default OFF preserved) with fail-fast on invalid model/key, (5) frontend formatter rewritten to surface only operational quota/billing/auth toasts (non-operational paths return null per feedback_auto_pipeline_first silent-pipeline policy). u1 - default model literal claude-opus-4-6-20250415 -> claude-opus-4-7 (src/config.py + tests/test_phase_z2_ai_fallback_config.py lock mirror) u2 - classify_operational_error type+status_code dispatch + Step 12 api_error_kind stamp on except path (src/phase_z2_ai_fallback/client.py + src/phase_z2_ai_fallback/step12.py + tests/phase_z2_ai_fallback/test_step12.py) u3 - _summarize_ai_repair_status aggregates api_error_kinds {quota,billing, auth,other}; error_records[i].api_error_kind retained per-record (src/phase_z2_pipeline.py + tests/test_imp47b_failure_surface.py) u4 - _run_step0_ai_preflight + Step0PreflightError; preflight only fires when ai_fallback_enabled=true; one-token ping; invalid key/model => setup failure before Step 1 (src/phase_z2_pipeline.py + tests/phase_z2/test_pipeline_step0_preflight.py NEW) u5 - AiRepairStatus.api_error_kinds? interface + formatAiRepairHumanReview Message rewritten: operational quota/billing/auth -> Korean copy verbatim from issue body (tie-break quota -> billing -> auth); validation/coverage_violated/unsupported_kind/generic-other/legacy payload -> null (Front/client/src/services/designAgentApi.ts + Front/client/tests/imp47b_human_review_toast.test.tsx) Guardrails respected: - feedback_demo_env_toggle_policy: default OFF preserved; preflight skipped when ai_fallback_enabled=false (test_preflight_skipped_when_disabled asserts anthropic.Anthropic() not called). - feedback_auto_pipeline_first: non-operational AI failures stay silent; only quota/billing/auth reach user toast. - feedback_ai_isolation_contract: AI remains fallback-only; no normal-path migration; MDX preserved. - project_imp46_carveout_caveat: cache_key/fingerprints fields untouched on every record; no overlap with #62 cache region. - feedback_no_hardcoding: zero MDX-sample-specific literals; classifier dispatch by SDK type, not by string parsing. - feedback_artifact_status_naming: operational toast scoped to alert axis, not overall PASS signal. Tests: - Targeted u1+u2+u3+u4: 63 passed - u5 vitest (Front/): 10/10 passed - tests/phase_z2_ai_fallback dir regression: 240 passed - tests/phase_z2 dir regression: 323 passed - IMP-92-adjacent (-k "imp47b or ai_fallback or preflight or step12 or step0"): 299 passed (808 deselected) - u1 baseline lock (test_client_mock.py): 8 passed Zero failures, zero regressions outside scope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
214
tests/phase_z2/test_pipeline_step0_preflight.py
Normal file
214
tests/phase_z2/test_pipeline_step0_preflight.py
Normal file
@@ -0,0 +1,214 @@
|
||||
"""IMP-92 u4 — Step 0 AI preflight unit tests.
|
||||
|
||||
Scope (Stage 2 plan, u4):
|
||||
- ``settings.ai_fallback_enabled=False`` → preflight short-circuits to
|
||||
``"skipped"`` without instantiating ``anthropic.Anthropic`` (PZ-1
|
||||
AI=0 normal path + ``feedback_demo_env_toggle_policy`` default-OFF).
|
||||
- ``settings.ai_fallback_enabled=True`` + valid (key, model) → preflight
|
||||
returns ``"passed"`` after a 1-token ``messages.create`` ping.
|
||||
- Persistent setup errors (Authentication / PermissionDenied /
|
||||
NotFound) raise ``Step0PreflightError`` so boot fails fast.
|
||||
- Transient errors (RateLimit / InternalServer) are recorded as
|
||||
``"transient"`` without failing boot.
|
||||
|
||||
Cross-references:
|
||||
- u1 default model literal: ``src/config.py:20``
|
||||
+ ``tests/test_phase_z2_ai_fallback_config.py:5,31``
|
||||
- u2 SDK operational classifier:
|
||||
``src/phase_z2_ai_fallback/client.py:46``
|
||||
+ ``tests/phase_z2_ai_fallback/test_step12.py``
|
||||
- u3 ``api_error_kind`` summary plumbing:
|
||||
``src/phase_z2_pipeline.py:_summarize_ai_repair_status``
|
||||
+ ``tests/test_imp47b_failure_surface.py``
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from types import SimpleNamespace
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import anthropic
|
||||
import httpx
|
||||
import pytest
|
||||
|
||||
from src import phase_z2_pipeline as pipeline_mod
|
||||
from src.config import settings
|
||||
|
||||
|
||||
@pytest.fixture(autouse=True)
|
||||
def _restore_settings():
|
||||
snapshot = settings.model_dump()
|
||||
yield
|
||||
for key, value in snapshot.items():
|
||||
setattr(settings, key, value)
|
||||
|
||||
|
||||
def _ok_response() -> SimpleNamespace:
|
||||
return SimpleNamespace(content=[SimpleNamespace(text="")])
|
||||
|
||||
|
||||
def _status_error(
|
||||
cls: type[anthropic.APIStatusError],
|
||||
status_code: int,
|
||||
message: str,
|
||||
) -> anthropic.APIStatusError:
|
||||
req = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
|
||||
return cls(
|
||||
message=message,
|
||||
response=httpx.Response(status_code, request=req),
|
||||
body=None,
|
||||
)
|
||||
|
||||
|
||||
def test_preflight_skipped_when_disabled(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", False)
|
||||
spy = MagicMock()
|
||||
monkeypatch.setattr(anthropic, "Anthropic", spy)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result["status"] == "skipped"
|
||||
assert result["reason"] == "ai_fallback_disabled"
|
||||
assert result["model"] == settings.ai_fallback_model
|
||||
spy.assert_not_called()
|
||||
|
||||
|
||||
def test_preflight_passed_when_enabled_with_valid_credentials(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.return_value = _ok_response()
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result == {
|
||||
"status": "passed",
|
||||
"model": settings.ai_fallback_model,
|
||||
}
|
||||
fake_client.messages.create.assert_called_once()
|
||||
kwargs = fake_client.messages.create.call_args.kwargs
|
||||
assert kwargs["model"] == settings.ai_fallback_model
|
||||
assert kwargs["max_tokens"] == 1
|
||||
|
||||
|
||||
def test_preflight_fail_fast_on_invalid_api_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.AuthenticationError, 401, "invalid x-api-key"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
with pytest.raises(pipeline_mod.Step0PreflightError) as ei:
|
||||
pipeline_mod._run_step0_ai_preflight()
|
||||
assert "AuthenticationError" in str(ei.value)
|
||||
|
||||
|
||||
def test_preflight_fail_fast_on_invalid_model(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.NotFoundError, 404, "model not found"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
with pytest.raises(pipeline_mod.Step0PreflightError) as ei:
|
||||
pipeline_mod._run_step0_ai_preflight()
|
||||
msg = str(ei.value)
|
||||
assert "NotFoundError" in msg
|
||||
assert settings.ai_fallback_model in msg
|
||||
|
||||
|
||||
def test_preflight_fail_fast_on_billing_permission_denied(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.PermissionDeniedError, 403, "billing required"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
with pytest.raises(pipeline_mod.Step0PreflightError) as ei:
|
||||
pipeline_mod._run_step0_ai_preflight()
|
||||
assert "PermissionDeniedError" in str(ei.value)
|
||||
|
||||
|
||||
def test_preflight_transient_rate_limit_does_not_fail_boot(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.RateLimitError, 429, "rate limited"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result["status"] == "transient"
|
||||
assert result["model"] == settings.ai_fallback_model
|
||||
assert "RateLimitError" in result["transient_error"]
|
||||
|
||||
|
||||
def test_preflight_transient_internal_server_error_does_not_fail_boot(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.InternalServerError, 500, "upstream 500"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result["status"] == "transient"
|
||||
assert "InternalServerError" in result["transient_error"]
|
||||
|
||||
|
||||
def test_preflight_fail_fast_on_generic_billing_402(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""IMP-92 u4 — HTTP 402 (Payment Required) surfaces as the generic
|
||||
``anthropic.APIStatusError`` (no typed subclass). The preflight MUST
|
||||
dispatch by status code and raise ``Step0PreflightError`` so a
|
||||
billing setup problem fails boot fast, matching the issue body's
|
||||
operational contract.
|
||||
"""
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.APIStatusError, 402, "payment required"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
with pytest.raises(pipeline_mod.Step0PreflightError) as ei:
|
||||
pipeline_mod._run_step0_ai_preflight()
|
||||
msg = str(ei.value)
|
||||
assert "402" in msg
|
||||
assert settings.ai_fallback_model in msg
|
||||
assert "Check ANTHROPIC_API_KEY / ai_fallback_model in .env." in msg
|
||||
|
||||
|
||||
def test_preflight_generic_status_429_treated_as_transient(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""IMP-92 u4 — a generic ``APIStatusError`` with HTTP 429 must follow
|
||||
the same transient policy as the typed ``RateLimitError`` branch.
|
||||
"""
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.APIStatusError, 429, "rate limited (generic)"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result["status"] == "transient"
|
||||
assert "APIStatusError" in result["transient_error"]
|
||||
|
||||
|
||||
def test_preflight_generic_status_5xx_treated_as_transient(
|
||||
monkeypatch: pytest.MonkeyPatch,
|
||||
) -> None:
|
||||
"""IMP-92 u4 — a generic ``APIStatusError`` with HTTP 5xx must follow
|
||||
the same transient policy as the typed ``InternalServerError`` branch.
|
||||
"""
|
||||
monkeypatch.setattr(settings, "ai_fallback_enabled", True)
|
||||
fake_client = MagicMock()
|
||||
fake_client.messages.create.side_effect = _status_error(
|
||||
anthropic.APIStatusError, 503, "upstream 503 (generic)"
|
||||
)
|
||||
monkeypatch.setattr(anthropic, "Anthropic", lambda **kwargs: fake_client)
|
||||
result = pipeline_mod._run_step0_ai_preflight()
|
||||
assert result["status"] == "transient"
|
||||
assert "APIStatusError" in result["transient_error"]
|
||||
@@ -17,6 +17,9 @@ from dataclasses import dataclass, field
|
||||
from typing import Any
|
||||
from unittest.mock import MagicMock
|
||||
|
||||
import anthropic
|
||||
import httpx
|
||||
|
||||
from src.phase_z2_ai_fallback import step12 as step12_mod
|
||||
from src.phase_z2_ai_fallback.schema import AiFallbackProposal, ProposalKind
|
||||
|
||||
@@ -176,6 +179,9 @@ def test_router_exception_is_captured_per_record(monkeypatch):
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["proposal"] is None
|
||||
assert rec["error"] == "RuntimeError: transient_boom"
|
||||
# IMP-92 u2 — generic (non-Anthropic) exceptions classify as "other"
|
||||
# so the frontend operational formatter stays silent for them.
|
||||
assert rec["api_error_kind"] == "other"
|
||||
router.assert_called_once()
|
||||
|
||||
|
||||
@@ -405,6 +411,7 @@ def test_record_shape_contract_is_stable_with_u4_fields(monkeypatch):
|
||||
"skip_reason",
|
||||
"proposal",
|
||||
"error",
|
||||
"api_error_kind",
|
||||
"cache_key",
|
||||
"fingerprints",
|
||||
}
|
||||
@@ -602,3 +609,146 @@ def test_mixed_units_router_receives_fingerprints_only_for_ai_eligible(monkeypat
|
||||
# Skipped records carry None.
|
||||
assert recs[0]["fingerprints"] is None
|
||||
assert recs[1]["fingerprints"] is None
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# IMP-92 u2 — Anthropic SDK exception → api_error_kind classification
|
||||
# ---------------------------------------------------------------------------
|
||||
# Step 12 stamps each AI-called record with api_error_kind so the frontend
|
||||
# operational alert formatter can render quota / billing / auth surfaces
|
||||
# while keeping "other" failures silent (the #84 replacement-plan contract).
|
||||
# Classification is type-based (no string parsing); only AI-eligible units
|
||||
# that actually hit ``route_ai_fallback`` and raise can produce a non-None
|
||||
# api_error_kind. Skipped units (not_provisional / non-AI route) retain
|
||||
# api_error_kind=None alongside cache_key/fingerprints=None.
|
||||
|
||||
|
||||
def _anthropic_status_error(
|
||||
error_cls: type[anthropic.APIStatusError], status_code: int
|
||||
) -> anthropic.APIStatusError:
|
||||
"""Construct an Anthropic SDK status error suitable for side_effect.
|
||||
|
||||
The SDK error constructors require ``response`` and ``body`` kwargs; an
|
||||
``httpx.Response`` bound to a stub request is the minimum that satisfies
|
||||
isinstance dispatch in ``classify_operational_error``.
|
||||
"""
|
||||
request = httpx.Request("POST", "https://api.anthropic.com/v1/messages")
|
||||
response = httpx.Response(status_code, request=request)
|
||||
return error_cls("simulated", response=response, body=None)
|
||||
|
||||
|
||||
def test_router_rate_limit_error_classifies_as_quota(monkeypatch):
|
||||
"""RateLimitError (HTTP 429) → api_error_kind='quota'."""
|
||||
err = _anthropic_status_error(anthropic.RateLimitError, 429)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "quota"
|
||||
assert rec["error"].startswith("RateLimitError: ")
|
||||
|
||||
|
||||
def test_router_permission_denied_classifies_as_billing(monkeypatch):
|
||||
"""PermissionDeniedError (HTTP 403) → api_error_kind='billing'."""
|
||||
err = _anthropic_status_error(anthropic.PermissionDeniedError, 403)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "billing"
|
||||
assert rec["error"].startswith("PermissionDeniedError: ")
|
||||
|
||||
|
||||
def test_router_payment_required_classifies_as_billing(monkeypatch):
|
||||
"""Generic APIStatusError with HTTP 402 → api_error_kind='billing'.
|
||||
|
||||
The Anthropic SDK has no dedicated PaymentRequired subclass; a 402
|
||||
response surfaces as the base ``APIStatusError``. The issue body's
|
||||
explicit operational contract requires 402 to render as billing,
|
||||
so the classifier must fall through to ``status_code`` dispatch when
|
||||
the typed subclass branches miss.
|
||||
"""
|
||||
err = _anthropic_status_error(anthropic.APIStatusError, 402)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "billing"
|
||||
assert rec["error"].startswith("APIStatusError: ")
|
||||
|
||||
|
||||
def test_router_authentication_error_classifies_as_auth(monkeypatch):
|
||||
"""AuthenticationError (HTTP 401) → api_error_kind='auth'."""
|
||||
err = _anthropic_status_error(anthropic.AuthenticationError, 401)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "auth"
|
||||
assert rec["error"].startswith("AuthenticationError: ")
|
||||
|
||||
|
||||
def test_router_bad_request_classifies_as_other(monkeypatch):
|
||||
"""BadRequestError (HTTP 400) is non-operational → api_error_kind='other'."""
|
||||
err = _anthropic_status_error(anthropic.BadRequestError, 400)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "other"
|
||||
|
||||
|
||||
def test_router_internal_server_error_classifies_as_other(monkeypatch):
|
||||
"""InternalServerError (HTTP 5xx) is non-operational → api_error_kind='other'."""
|
||||
err = _anthropic_status_error(anthropic.InternalServerError, 500)
|
||||
router = MagicMock(side_effect=err)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["api_error_kind"] == "other"
|
||||
|
||||
|
||||
def test_router_success_leaves_api_error_kind_none(monkeypatch):
|
||||
"""Successful proposal record keeps api_error_kind=None (no error to classify)."""
|
||||
proposal = AiFallbackProposal(
|
||||
proposal_kind=ProposalKind.PARTIAL_OVERRIDES,
|
||||
payload={"slots": {"s": "x"}},
|
||||
rationale="r",
|
||||
)
|
||||
router = MagicMock(return_value=proposal)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["ai_called"] is True
|
||||
assert rec["error"] is None
|
||||
assert rec["api_error_kind"] is None
|
||||
|
||||
|
||||
def test_skipped_records_keep_api_error_kind_none(monkeypatch):
|
||||
"""Non-AI-eligible records never see the router, so api_error_kind stays None."""
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", MagicMock(return_value=None))
|
||||
units = [
|
||||
FakeUnit(label="restructure", provisional=False), # not_provisional
|
||||
FakeUnit(label="light_edit", provisional=True), # non-AI route
|
||||
FakeUnit(label="reject", provisional=True), # legacy non-AI route
|
||||
]
|
||||
recs = _call(units)
|
||||
for rec in recs:
|
||||
assert rec["api_error_kind"] is None
|
||||
assert rec["error"] is None
|
||||
|
||||
|
||||
def test_router_short_circuit_keeps_api_error_kind_none(monkeypatch):
|
||||
"""Router short-circuit (None return) is not an error path → api_error_kind=None."""
|
||||
router = MagicMock(return_value=None)
|
||||
monkeypatch.setattr(step12_mod, "route_ai_fallback", router)
|
||||
recs = _call([_ai_unit()])
|
||||
rec = recs[0]
|
||||
assert rec["skip_reason"] == "router_short_circuit"
|
||||
assert rec["api_error_kind"] is None
|
||||
|
||||
@@ -26,13 +26,19 @@ def _record(
|
||||
apply_status: str | None = None,
|
||||
error: str | None = None,
|
||||
source_section_ids: list[str] | None = None,
|
||||
api_error_kind: str | None = None,
|
||||
) -> dict:
|
||||
"""Minimal Step 12 AI repair record stub — fields u8 reads."""
|
||||
"""Minimal Step 12 AI repair record stub — fields u8 reads.
|
||||
|
||||
IMP-92 u3 — ``api_error_kind`` is stamped by Step 12 (u2 classifier)
|
||||
on the exception path; non-error paths leave it ``None``.
|
||||
"""
|
||||
return {
|
||||
"unit_index": unit_index,
|
||||
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
|
||||
"apply_status": apply_status,
|
||||
"error": error,
|
||||
"api_error_kind": api_error_kind,
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +51,11 @@ _VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]}
|
||||
|
||||
def test_empty_records_returns_ok_no_human_review():
|
||||
"""No AI work executed → status='ok', human_review_required=False.
|
||||
The flag-off default (no provisional units) lands here."""
|
||||
The flag-off default (no provisional units) lands here.
|
||||
|
||||
IMP-92 u3 — ``api_error_kinds`` aggregation is always present with
|
||||
every kind initialised to 0 so the frontend operational formatter
|
||||
can read the bucket structure unconditionally."""
|
||||
result = _summarize_ai_repair_status([], _OK_COVERAGE)
|
||||
assert result["status"] == "ok"
|
||||
assert result["human_review_required"] is False
|
||||
@@ -53,6 +63,12 @@ def test_empty_records_returns_ok_no_human_review():
|
||||
assert result["unsupported_kind_records"] == []
|
||||
assert result["error_records"] == []
|
||||
assert result["dropped_section_ids"] == []
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 0,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
# ─── Case 2 : applied → status='applied', no human_review ───────────
|
||||
@@ -102,7 +118,11 @@ def test_unsupported_kind_marks_human_review_required():
|
||||
def test_gather_error_marks_status_error_with_records():
|
||||
"""``record['error']`` set means ``gather_step12_ai_repair_proposals``
|
||||
caught a router exception (AI call / validator). status='error'
|
||||
is the highest-priority failure axis."""
|
||||
is the highest-priority failure axis.
|
||||
|
||||
IMP-92 u3 — non-Anthropic exception path leaves ``api_error_kind``
|
||||
as ``None``; the summary retains ``None`` per-record and does not
|
||||
increment any operational kind bucket."""
|
||||
records = [_record(
|
||||
unit_index=2,
|
||||
error="ValueError: missing slot 'title'",
|
||||
@@ -117,8 +137,74 @@ def test_gather_error_marks_status_error_with_records():
|
||||
"unit_index": 2,
|
||||
"source_section_ids": ["MOCK_S2"],
|
||||
"error": "ValueError: missing slot 'title'",
|
||||
"api_error_kind": None,
|
||||
}
|
||||
]
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 0,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
# ─── IMP-92 u3 : api_error_kind propagation + aggregation ───────────
|
||||
|
||||
|
||||
def test_api_error_kind_quota_propagates_to_summary_and_record():
|
||||
"""Step 12 (u2) stamps ``api_error_kind='quota'`` on a 429
|
||||
Anthropic exception path. u8 must surface that kind per-record
|
||||
and increment the ``quota`` bucket in ``api_error_kinds``."""
|
||||
records = [_record(
|
||||
unit_index=3,
|
||||
error="RateLimitError: 429",
|
||||
source_section_ids=["MOCK_S3"],
|
||||
api_error_kind="quota",
|
||||
)]
|
||||
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||
assert result["status"] == "error"
|
||||
assert result["human_review_required"] is True
|
||||
assert result["error_records"] == [
|
||||
{
|
||||
"unit_index": 3,
|
||||
"source_section_ids": ["MOCK_S3"],
|
||||
"error": "RateLimitError: 429",
|
||||
"api_error_kind": "quota",
|
||||
}
|
||||
]
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 1,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
def test_api_error_kinds_aggregate_across_all_operational_axes():
|
||||
"""Mixed batch — one of each operational kind (quota / billing /
|
||||
auth / other). Aggregation must count each axis exactly once and
|
||||
keep per-record kinds intact (order preserved)."""
|
||||
records = [
|
||||
_record(unit_index=0, error="RateLimitError", api_error_kind="quota"),
|
||||
_record(unit_index=1, error="PermissionDeniedError", api_error_kind="billing"),
|
||||
_record(unit_index=2, error="AuthenticationError", api_error_kind="auth"),
|
||||
_record(unit_index=3, error="BadRequestError", api_error_kind="other"),
|
||||
]
|
||||
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||
assert result["status"] == "error"
|
||||
assert result["counts"]["error"] == 4
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 1,
|
||||
"billing": 1,
|
||||
"auth": 1,
|
||||
"other": 1,
|
||||
}
|
||||
assert [rec["api_error_kind"] for rec in result["error_records"]] == [
|
||||
"quota",
|
||||
"billing",
|
||||
"auth",
|
||||
"other",
|
||||
]
|
||||
|
||||
|
||||
# ─── Case 5 : coverage violated → status='coverage_violated' ────────
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
These defaults are the binding contract from Stage 2 plan (per-unit u1):
|
||||
- ai_fallback_enabled = False (master flag OFF; fallback path only)
|
||||
- ai_fallback_model = "claude-opus-4-6-20250415"
|
||||
- ai_fallback_model = "claude-opus-4-7"
|
||||
- ai_fallback_timeout_s = 60.0
|
||||
- ai_fallback_max_retries = 3
|
||||
- ai_fallback_backoff_base_s = 1.0
|
||||
@@ -28,7 +28,7 @@ def test_ai_fallback_master_flag_default_off() -> None:
|
||||
|
||||
def test_ai_fallback_model_default_locked() -> None:
|
||||
s = Settings()
|
||||
assert s.ai_fallback_model == "claude-opus-4-6-20250415"
|
||||
assert s.ai_fallback_model == "claude-opus-4-7"
|
||||
|
||||
|
||||
def test_ai_fallback_retry_timeout_backoff_defaults_locked() -> None:
|
||||
|
||||
Reference in New Issue
Block a user