"""IMP-47B u8 — slide_status.ai_repair_status surfacing tests. Scope (this slice): Helper ``_summarize_ai_repair_status(ai_repair_records, coverage_invariant)`` (src/phase_z2_pipeline.py) composes u4 gather ``error`` + u5 ``apply_status`` + u7 ``coverage_invariant`` into a single ``ai_repair_status`` axis attached to ``slide_status``. Failure-axis priority (highest → lowest): ``error`` > ``coverage_violated`` > ``unsupported_kind`` > ``applied`` > ``ok``. ``human_review_required`` flips True on the three failure axes for u11 frontend surfacing. The frontend reads ``slide_status.ai_repair_status`` to render a notification per the IMP-47B policy ("AI 호출 실패 / proposal validation 실패 / coverage 미달 → frontend notification"). u9~u13 are out of scope. The helper is pure (no IO, no AI call) so synthetic record / invariant dicts exercise every branch directly. """ from __future__ import annotations from src.phase_z2_pipeline import _summarize_ai_repair_status def _record( *, unit_index: int = 0, apply_status: str | None = None, error: str | None = None, source_section_ids: list[str] | None = None, api_error_kind: str | None = None, ) -> dict: """Minimal Step 12 AI repair record stub — fields u8 reads. IMP-92 u3 — ``api_error_kind`` is stamped by Step 12 (u2 classifier) on the exception path; non-error paths leave it ``None``. """ return { "unit_index": unit_index, "source_section_ids": source_section_ids or [f"MOCK_S{unit_index}"], "apply_status": apply_status, "error": error, "api_error_kind": api_error_kind, } _OK_COVERAGE = {"status": "ok", "dropped_section_ids": []} _VIOLATED_COVERAGE = {"status": "violated", "dropped_section_ids": ["MOCK_S2"]} # ─── Case 1 : empty pipeline → status='ok' ────────────────────────── def test_empty_records_returns_ok_no_human_review(): """No AI work executed → status='ok', human_review_required=False. The flag-off default (no provisional units) lands here. IMP-92 u3 — ``api_error_kinds`` aggregation is always present with every kind initialised to 0 so the frontend operational formatter can read the bucket structure unconditionally.""" result = _summarize_ai_repair_status([], _OK_COVERAGE) assert result["status"] == "ok" assert result["human_review_required"] is False assert result["counts"]["total"] == 0 assert result["unsupported_kind_records"] == [] assert result["error_records"] == [] assert result["dropped_section_ids"] == [] assert result["api_error_kinds"] == { "quota": 0, "billing": 0, "auth": 0, "other": 0, } # ─── Case 2 : applied → status='applied', no human_review ─────────── def test_applied_partial_overrides_marks_applied_no_human_review(): """Successful AI repair (PARTIAL_OVERRIDES applied) is the happy path. status='applied', no human_review surfacing.""" records = [_record(apply_status="applied:partial_overrides")] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "applied" assert result["human_review_required"] is False assert result["counts"]["applied"] == 1 assert result["counts"]["error"] == 0 # ─── Case 3 : unsupported kind → status='unsupported_kind' ────────── def test_unsupported_kind_marks_human_review_required(): """u5 surfaces ``unsupported_kind_for_reject_route:`` for builder_options_patch / slot_mapping_proposal. u8 must classify as human_review_required so the frontend renders a notification.""" records = [ _record( unit_index=1, apply_status="unsupported_kind_for_reject_route:builder_options_patch", source_section_ids=["MOCK_S1"], ), ] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "unsupported_kind" assert result["human_review_required"] is True assert result["counts"]["unsupported_kind"] == 1 assert result["unsupported_kind_records"] == [ { "unit_index": 1, "source_section_ids": ["MOCK_S1"], "apply_status": "unsupported_kind_for_reject_route:builder_options_patch", } ] # ─── Case 4 : gather error → status='error' (highest priority) ────── def test_gather_error_marks_status_error_with_records(): """``record['error']`` set means ``gather_step12_ai_repair_proposals`` caught a router exception (AI call / validator). status='error' is the highest-priority failure axis. IMP-92 u3 — non-Anthropic exception path leaves ``api_error_kind`` as ``None``; the summary retains ``None`` per-record and does not increment any operational kind bucket.""" records = [_record( unit_index=2, error="ValueError: missing slot 'title'", source_section_ids=["MOCK_S2"], )] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "error" assert result["human_review_required"] is True assert result["counts"]["error"] == 1 assert result["error_records"] == [ { "unit_index": 2, "source_section_ids": ["MOCK_S2"], "error": "ValueError: missing slot 'title'", "api_error_kind": None, } ] assert result["api_error_kinds"] == { "quota": 0, "billing": 0, "auth": 0, "other": 0, } # ─── IMP-92 u3 : api_error_kind propagation + aggregation ─────────── def test_api_error_kind_quota_propagates_to_summary_and_record(): """Step 12 (u2) stamps ``api_error_kind='quota'`` on a 429 Anthropic exception path. u8 must surface that kind per-record and increment the ``quota`` bucket in ``api_error_kinds``.""" records = [_record( unit_index=3, error="RateLimitError: 429", source_section_ids=["MOCK_S3"], api_error_kind="quota", )] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "error" assert result["human_review_required"] is True assert result["error_records"] == [ { "unit_index": 3, "source_section_ids": ["MOCK_S3"], "error": "RateLimitError: 429", "api_error_kind": "quota", } ] assert result["api_error_kinds"] == { "quota": 1, "billing": 0, "auth": 0, "other": 0, } def test_api_error_kinds_aggregate_across_all_operational_axes(): """Mixed batch — one of each operational kind (quota / billing / auth / other). Aggregation must count each axis exactly once and keep per-record kinds intact (order preserved).""" records = [ _record(unit_index=0, error="RateLimitError", api_error_kind="quota"), _record(unit_index=1, error="PermissionDeniedError", api_error_kind="billing"), _record(unit_index=2, error="AuthenticationError", api_error_kind="auth"), _record(unit_index=3, error="BadRequestError", api_error_kind="other"), ] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "error" assert result["counts"]["error"] == 4 assert result["api_error_kinds"] == { "quota": 1, "billing": 1, "auth": 1, "other": 1, } assert [rec["api_error_kind"] for rec in result["error_records"]] == [ "quota", "billing", "auth", "other", ] # ─── Case 5 : coverage violated → status='coverage_violated' ──────── def test_coverage_violation_surfaces_dropped_sections(): """u7 coverage_invariant 'violated' means the AI repair dropped a section_id from the post-AI superset. dropped 절대 룰 — surface as human_review_required.""" records = [_record(apply_status="applied:partial_overrides")] result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE) assert result["status"] == "coverage_violated" assert result["human_review_required"] is True assert result["coverage_status"] == "violated" assert result["dropped_section_ids"] == ["MOCK_S2"] # ─── Case 6 : priority order — error > coverage > unsupported ─────── def test_error_dominates_over_coverage_and_unsupported(): """When multiple failure axes coexist, priority order is error > coverage_violated > unsupported_kind > applied > ok.""" records = [ _record(unit_index=0, error="RuntimeError"), _record(unit_index=1, apply_status="unsupported_kind_for_reject_route:slot_mapping_proposal"), _record(unit_index=2, apply_status="applied:partial_overrides"), ] result = _summarize_ai_repair_status(records, _VIOLATED_COVERAGE) assert result["status"] == "error" assert result["human_review_required"] is True assert result["counts"]["error"] == 1 assert result["counts"]["unsupported_kind"] == 1 assert result["counts"]["applied"] == 1 # ─── Case 7 : no_proposal + no_zone_match counted, not failure ────── def test_no_proposal_and_no_zone_match_do_not_trigger_human_review(): """Flag-off short-circuit, not_provisional, route_not_ai_adaptation, and B4-mismatch (no_zone_match) are structural skips — not AI failures. They count but do not flip human_review_required.""" records = [ _record(unit_index=0, apply_status="no_proposal"), _record(unit_index=1, apply_status="no_zone_match"), ] result = _summarize_ai_repair_status(records, _OK_COVERAGE) assert result["status"] == "ok" assert result["human_review_required"] is False assert result["counts"]["no_proposal"] == 1 assert result["counts"]["no_zone_match"] == 1