- u1: separate templates/phase_z2/catalog/v4_fallback_policy.yaml + load_v4_fallback_policy() loader (catalog pollution prevention — Codex #1 correction) - u2: dynamic effective max_rank in lookup_v4_match_with_fallback (3-variable ceiling min, Codex #2 correction: min(configured, len(judgments_full32))) + 3-tier usable predicate (status + catalog + optional capacity) + trace 8 fields (requested/default/configured_extended/ judgments_count/effective_extended_ceiling/effective_max_rank/usable_count/policy_applied) - u3: 2 production call site cleanup (max_rank=3 removed, HEAD baseline) + tracked Front/vite.config.ts PHASE_Z_MAX_RANK env retired + 4 regression scenarios verified: 32 passed (IMP-38 focused scope) — IMP-05 L4 dedup / L2 schema preserved, IMP-30 allow_provisional byte-identical, caller_override backward compat (tests) Stage cycle (#67, 7 round Claude + 5 round Codex): - Stage 1: Claude #1 -> Codex #1 YES + 5 corrections - Stage 2 r1+r2: Claude #2-#4 -> Codex #2 Q2 -> Codex #3 YES (4 round consensus LOCK 23195) - Stage 3 U1+U2+U3: Claude #5-#9 -> Codex #6 NO 4to3 correction -> Codex #7 YES -> Codex #8 YES - Stage 4: Claude #11 -> Codex #9 (anchor attribution nuance) -> Codex #10 readiness -> Codex #11 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
160 lines
6.6 KiB
Python
160 lines
6.6 KiB
Python
"""IMP-38 U2 — dynamic effective max_rank + trace 8-field + 3-tier usable predicate.
|
|
|
|
Verify:
|
|
- max_rank=None (default) → policy applied (usable_count + effective_max_rank 결정)
|
|
- max_rank=int (caller override) → that value used as-is (backward compat)
|
|
- trace contains 8 IMP-38 fields + legacy "max_rank" alias
|
|
- usable_count >= threshold → default_max_rank (mdx03 정상 case)
|
|
- usable_count < threshold → effective_extended_ceiling (mdx05-2 확장 case)
|
|
- effective_extended_ceiling = min(configured, len(judgments_full32)) (Codex #2)
|
|
- IMP-30 allow_provisional byte-identical (chain_exhausted 후 provisional 합성)
|
|
|
|
4 round 합의 (#67):
|
|
- Codex #1: 별 yaml (catalog 오염 방지)
|
|
- Codex #2: min(configured, len(judgments)) 정정
|
|
- Codex #3: load_frame_contracts() shape 무변
|
|
"""
|
|
from __future__ import annotations
|
|
|
|
import pytest
|
|
|
|
|
|
@pytest.fixture(autouse=True)
|
|
def _reset_policy_cache():
|
|
"""Reset module-level _V4_FALLBACK_POLICY_CACHE for test isolation."""
|
|
import src.phase_z2_mapper as mapper
|
|
mapper._V4_FALLBACK_POLICY_CACHE = None
|
|
yield
|
|
mapper._V4_FALLBACK_POLICY_CACHE = None
|
|
|
|
|
|
def _make_v4_section(judgments: list[dict]) -> dict:
|
|
"""Helper — V4 fixture with mdx_sections[section_id].judgments_full32."""
|
|
return {
|
|
"mdx_sections": {
|
|
"sec-1": {
|
|
"judgments_full32": judgments,
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
def _judgment(template_id: str, label: str, confidence: float = 0.5, frame_id: int = 0) -> dict:
|
|
"""Helper — V4 judgment entry shape."""
|
|
return {
|
|
"template_id": template_id,
|
|
"frame_id": frame_id or hash(template_id) % 10000,
|
|
"frame_number": 0,
|
|
"confidence": confidence,
|
|
"label": label,
|
|
}
|
|
|
|
|
|
# ─── U2 Test: caller override (backward compat) ────────────────────
|
|
|
|
|
|
def test_caller_override_uses_explicit_max_rank():
|
|
"""max_rank=3 explicit → effective_max_rank=3, policy_applied=caller_override."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
judgments = [_judgment(f"t{i}", "reject") for i in range(5)]
|
|
v4 = _make_v4_section(judgments)
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=3)
|
|
assert trace["policy_applied"] == "caller_override"
|
|
assert trace["effective_max_rank"] == 3
|
|
assert trace["max_rank"] == 3 # legacy alias
|
|
|
|
|
|
def test_caller_override_max_rank_5_used_directly():
|
|
"""max_rank=5 explicit → effective_max_rank=5 (policy 무시)."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
judgments = [_judgment(f"t{i}", "reject") for i in range(10)]
|
|
v4 = _make_v4_section(judgments)
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=5)
|
|
assert trace["policy_applied"] == "caller_override"
|
|
assert trace["effective_max_rank"] == 5
|
|
|
|
|
|
# ─── U2 Test: 8 trace fields presence ──────────────────────────────
|
|
|
|
|
|
def test_trace_contains_8_imp38_fields():
|
|
"""trace dict must contain all 8 IMP-38 fields + legacy max_rank alias."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
judgments = [_judgment(f"t{i}", "reject") for i in range(3)]
|
|
v4 = _make_v4_section(judgments)
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
|
|
expected = {
|
|
"requested_max_rank",
|
|
"default_max_rank",
|
|
"configured_extended_max_rank",
|
|
"judgments_count",
|
|
"effective_extended_ceiling",
|
|
"effective_max_rank",
|
|
"usable_count",
|
|
"policy_applied",
|
|
"max_rank", # legacy alias
|
|
}
|
|
missing = expected - set(trace.keys())
|
|
assert not missing, f"missing IMP-38 trace fields: {missing}"
|
|
|
|
|
|
# ─── U2 Test: Codex #2 정정 — min(configured, len(judgments_full32)) ──
|
|
|
|
|
|
def test_effective_extended_ceiling_is_min_of_configured_and_judgments_count():
|
|
"""Codex #2 LOCK — judgments_count < configured 일 때 ceiling = judgments_count."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
# 5 judgments only — configured extended (32) 보다 작음
|
|
judgments = [_judgment(f"t{i}", "reject") for i in range(5)]
|
|
v4 = _make_v4_section(judgments)
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
|
|
assert trace["judgments_count"] == 5
|
|
assert trace["effective_extended_ceiling"] == 5 # min(32, 5) = 5
|
|
|
|
|
|
# ─── U2 Test: no_judgments path ──────────────────────────────────
|
|
|
|
|
|
def test_no_judgments_path():
|
|
"""judgments_count=0 → policy_applied=no_judgments, effective_max_rank=default."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
v4 = _make_v4_section([])
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
|
|
assert trace["policy_applied"] == "no_judgments"
|
|
assert trace["judgments_count"] == 0
|
|
assert trace["effective_max_rank"] == trace["default_max_rank"]
|
|
assert trace["fallback_reason"] == "empty_v4_judgments"
|
|
|
|
|
|
# ─── U2 Test: no_v4_section ─────────────────────────────────────
|
|
|
|
|
|
def test_no_v4_section_path():
|
|
"""unknown section_id → fallback_reason=no_v4_section + trace still has 8 IMP-38 fields."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
v4 = {"mdx_sections": {}}
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "unknown-sec")
|
|
assert trace["fallback_reason"] == "no_v4_section"
|
|
# 8 fields still present even when no section found
|
|
assert "policy_applied" in trace
|
|
assert "effective_max_rank" in trace
|
|
|
|
|
|
# ─── U2 Test: chain_exhausted message reflects effective_max_rank ──
|
|
|
|
|
|
def test_chain_exhausted_message_includes_effective_max_rank():
|
|
"""fallback_reason 메시지가 동적 effective_max_rank 반영 (hardcoded "1_to_3" X)."""
|
|
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
|
# 3 judgments all reject (catalog 등록 X 가정 — t1/t2/t3 는 catalog 에 없음)
|
|
judgments = [_judgment(f"unregistered_t{i}", "reject") for i in range(3)]
|
|
v4 = _make_v4_section(judgments)
|
|
_match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=3)
|
|
# chain exhausted — 메시지 가 effective_max_rank=3 반영
|
|
if trace["selection_path"] == "chain_exhausted":
|
|
# first_skip_reason 가 있으면 그게 우선, 없으면 default 메시지
|
|
assert (
|
|
trace["fallback_reason"] is not None
|
|
and ("no_auto_renderable" in trace["fallback_reason"] or "phase_z_status" in trace["fallback_reason"] or "no_contract" in trace["fallback_reason"])
|
|
)
|