feat(#92): IMP-92 u1~u5 AI fallback config validation (model ping + operational error classification)
Replaces #84 UI-noise removal plan with positive operational-alert contract. Five-axis stack lands together: (1) default model literal moved to current Opus-family ID, (2) Anthropic SDK error classifier mapping exceptions to quota/billing/auth/other, (3) api_error_kind plumbed through ai_repair_status summary + per-record retention, (4) Step 0 preflight ping gated under ai_fallback_enabled (default OFF preserved) with fail-fast on invalid model/key, (5) frontend formatter rewritten to surface only operational quota/billing/auth toasts (non-operational paths return null per feedback_auto_pipeline_first silent-pipeline policy). u1 - default model literal claude-opus-4-6-20250415 -> claude-opus-4-7 (src/config.py + tests/test_phase_z2_ai_fallback_config.py lock mirror) u2 - classify_operational_error type+status_code dispatch + Step 12 api_error_kind stamp on except path (src/phase_z2_ai_fallback/client.py + src/phase_z2_ai_fallback/step12.py + tests/phase_z2_ai_fallback/test_step12.py) u3 - _summarize_ai_repair_status aggregates api_error_kinds {quota,billing, auth,other}; error_records[i].api_error_kind retained per-record (src/phase_z2_pipeline.py + tests/test_imp47b_failure_surface.py) u4 - _run_step0_ai_preflight + Step0PreflightError; preflight only fires when ai_fallback_enabled=true; one-token ping; invalid key/model => setup failure before Step 1 (src/phase_z2_pipeline.py + tests/phase_z2/test_pipeline_step0_preflight.py NEW) u5 - AiRepairStatus.api_error_kinds? interface + formatAiRepairHumanReview Message rewritten: operational quota/billing/auth -> Korean copy verbatim from issue body (tie-break quota -> billing -> auth); validation/coverage_violated/unsupported_kind/generic-other/legacy payload -> null (Front/client/src/services/designAgentApi.ts + Front/client/tests/imp47b_human_review_toast.test.tsx) Guardrails respected: - feedback_demo_env_toggle_policy: default OFF preserved; preflight skipped when ai_fallback_enabled=false (test_preflight_skipped_when_disabled asserts anthropic.Anthropic() not called). - feedback_auto_pipeline_first: non-operational AI failures stay silent; only quota/billing/auth reach user toast. - feedback_ai_isolation_contract: AI remains fallback-only; no normal-path migration; MDX preserved. - project_imp46_carveout_caveat: cache_key/fingerprints fields untouched on every record; no overlap with #62 cache region. - feedback_no_hardcoding: zero MDX-sample-specific literals; classifier dispatch by SDK type, not by string parsing. - feedback_artifact_status_naming: operational toast scoped to alert axis, not overall PASS signal. Tests: - Targeted u1+u2+u3+u4: 63 passed - u5 vitest (Front/): 10/10 passed - tests/phase_z2_ai_fallback dir regression: 240 passed - tests/phase_z2 dir regression: 323 passed - IMP-92-adjacent (-k "imp47b or ai_fallback or preflight or step12 or step0"): 299 passed (808 deselected) - u1 baseline lock (test_client_mock.py): 8 passed Zero failures, zero regressions outside scope. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -26,13 +26,19 @@ def _record(
|
||||
apply_status: str | None = None,
|
||||
error: str | None = None,
|
||||
source_section_ids: list[str] | None = None,
|
||||
api_error_kind: str | None = None,
|
||||
) -> dict:
|
||||
"""Minimal Step 12 AI repair record stub — fields u8 reads."""
|
||||
"""Minimal Step 12 AI repair record stub — fields u8 reads.
|
||||
|
||||
IMP-92 u3 — ``api_error_kind`` is stamped by Step 12 (u2 classifier)
|
||||
on the exception path; non-error paths leave it ``None``.
|
||||
"""
|
||||
return {
|
||||
"unit_index": unit_index,
|
||||
"source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"],
|
||||
"apply_status": apply_status,
|
||||
"error": error,
|
||||
"api_error_kind": api_error_kind,
|
||||
}
|
||||
|
||||
|
||||
@@ -45,7 +51,11 @@ _VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]}
|
||||
|
||||
def test_empty_records_returns_ok_no_human_review():
|
||||
"""No AI work executed → status='ok', human_review_required=False.
|
||||
The flag-off default (no provisional units) lands here."""
|
||||
The flag-off default (no provisional units) lands here.
|
||||
|
||||
IMP-92 u3 — ``api_error_kinds`` aggregation is always present with
|
||||
every kind initialised to 0 so the frontend operational formatter
|
||||
can read the bucket structure unconditionally."""
|
||||
result = _summarize_ai_repair_status([], _OK_COVERAGE)
|
||||
assert result["status"] == "ok"
|
||||
assert result["human_review_required"] is False
|
||||
@@ -53,6 +63,12 @@ def test_empty_records_returns_ok_no_human_review():
|
||||
assert result["unsupported_kind_records"] == []
|
||||
assert result["error_records"] == []
|
||||
assert result["dropped_section_ids"] == []
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 0,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
# ─── Case 2 : applied → status='applied', no human_review ───────────
|
||||
@@ -102,7 +118,11 @@ def test_unsupported_kind_marks_human_review_required():
|
||||
def test_gather_error_marks_status_error_with_records():
|
||||
"""``record['error']`` set means ``gather_step12_ai_repair_proposals``
|
||||
caught a router exception (AI call / validator). status='error'
|
||||
is the highest-priority failure axis."""
|
||||
is the highest-priority failure axis.
|
||||
|
||||
IMP-92 u3 — non-Anthropic exception path leaves ``api_error_kind``
|
||||
as ``None``; the summary retains ``None`` per-record and does not
|
||||
increment any operational kind bucket."""
|
||||
records = [_record(
|
||||
unit_index=2,
|
||||
error="ValueError: missing slot 'title'",
|
||||
@@ -117,8 +137,74 @@ def test_gather_error_marks_status_error_with_records():
|
||||
"unit_index": 2,
|
||||
"source_section_ids": ["MOCK_S2"],
|
||||
"error": "ValueError: missing slot 'title'",
|
||||
"api_error_kind": None,
|
||||
}
|
||||
]
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 0,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
# ─── IMP-92 u3 : api_error_kind propagation + aggregation ───────────
|
||||
|
||||
|
||||
def test_api_error_kind_quota_propagates_to_summary_and_record():
|
||||
"""Step 12 (u2) stamps ``api_error_kind='quota'`` on a 429
|
||||
Anthropic exception path. u8 must surface that kind per-record
|
||||
and increment the ``quota`` bucket in ``api_error_kinds``."""
|
||||
records = [_record(
|
||||
unit_index=3,
|
||||
error="RateLimitError: 429",
|
||||
source_section_ids=["MOCK_S3"],
|
||||
api_error_kind="quota",
|
||||
)]
|
||||
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||
assert result["status"] == "error"
|
||||
assert result["human_review_required"] is True
|
||||
assert result["error_records"] == [
|
||||
{
|
||||
"unit_index": 3,
|
||||
"source_section_ids": ["MOCK_S3"],
|
||||
"error": "RateLimitError: 429",
|
||||
"api_error_kind": "quota",
|
||||
}
|
||||
]
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 1,
|
||||
"billing": 0,
|
||||
"auth": 0,
|
||||
"other": 0,
|
||||
}
|
||||
|
||||
|
||||
def test_api_error_kinds_aggregate_across_all_operational_axes():
|
||||
"""Mixed batch — one of each operational kind (quota / billing /
|
||||
auth / other). Aggregation must count each axis exactly once and
|
||||
keep per-record kinds intact (order preserved)."""
|
||||
records = [
|
||||
_record(unit_index=0, error="RateLimitError", api_error_kind="quota"),
|
||||
_record(unit_index=1, error="PermissionDeniedError", api_error_kind="billing"),
|
||||
_record(unit_index=2, error="AuthenticationError", api_error_kind="auth"),
|
||||
_record(unit_index=3, error="BadRequestError", api_error_kind="other"),
|
||||
]
|
||||
result = _summarize_ai_repair_status(records, _OK_COVERAGE)
|
||||
assert result["status"] == "error"
|
||||
assert result["counts"]["error"] == 4
|
||||
assert result["api_error_kinds"] == {
|
||||
"quota": 1,
|
||||
"billing": 1,
|
||||
"auth": 1,
|
||||
"other": 1,
|
||||
}
|
||||
assert [rec["api_error_kind"] for rec in result["error_records"]] == [
|
||||
"quota",
|
||||
"billing",
|
||||
"auth",
|
||||
"other",
|
||||
]
|
||||
|
||||
|
||||
# ─── Case 5 : coverage violated → status='coverage_violated' ────────
|
||||
|
||||
Reference in New Issue
Block a user