feat(#67): IMP-38 V4 max_rank policy formalization (u1~u3, 4 round consensus)

- u1: separate templates/phase_z2/catalog/v4_fallback_policy.yaml + load_v4_fallback_policy() loader (catalog pollution prevention — Codex #1 correction) - u2: dynamic effective max_rank in lookup_v4_match_with_fallback (3-variable ceiling min, Codex #2 correction: min(configured, len(judgments_full32))) + 3-tier usable predicate (status + catalog + optional capacity) + trace 8 fields (requested/default/configured_extended/ judgments_count/effective_extended_ceiling/effective_max_rank/usable_count/policy_applied) - u3: 2 production call site cleanup (max_rank=3 removed, HEAD baseline) + tracked Front/vite.config.ts PHASE_Z_MAX_RANK env retired + 4 regression scenarios verified: 32 passed (IMP-38 focused scope) — IMP-05 L4 dedup / L2 schema preserved, IMP-30 allow_provisional byte-identical, caller_override backward compat (tests) Stage cycle (#67, 7 round Claude + 5 round Codex): - Stage 1: Claude #1 -> Codex #1 YES + 5 corrections - Stage 2 r1+r2: Claude #2-#4 -> Codex #2 Q2 -> Codex #3 YES (4 round consensus LOCK 23195) - Stage 3 U1+U2+U3: Claude #5-#9 -> Codex #6 NO 4to3 correction -> Codex #7 YES -> Codex #8 YES - Stage 4: Claude #11 -> Codex #9 (anchor attribution nuance) -> Codex #10 readiness -> Codex #11 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
2026-05-21 22:14:05 +09:00
parent dceb10129f
commit 90503cadd6
7 changed files with 576 additions and 15 deletions
--- a/Front/vite.config.ts
+++ b/Front/vite.config.ts
@@ -346,8 +346,10 @@ function vitePluginPhaseZApi(): Plugin {
          const pythonExe = process.platform === "win32" ? "python.exe" : "python";
          // 2026-05-14 — env toggle forward (보고용 일회성).
          //   PHASE_Z_ALLOW_RESTRUCTURE / PHASE_Z_ALLOW_REJECT : status 통과
-          //   PHASE_Z_MAX_RANK=32 : V4 fallback chain 의 max_rank 확대 (등록 frame 까지 검색)
+          // 2026-05-21 — IMP-38 retire PHASE_Z_MAX_RANK env (never read by backend).
-          // 04-1 (all reject) / 05-2 (rank 1~3 미등록) 등 자동 매칭 가능.
+          //   v4 fallback chain max_rank 는 templates/phase_z2/catalog/v4_fallback_policy.yaml 의
          //   정식 정책 (dynamic_usable_count_based) 으로 결정 — backend src/phase_z2_pipeline.py
          //   의 lookup_v4_match_with_fallback() 가 load_v4_fallback_policy() 로 적용.
          const proc = spawn(pythonExe, cliArgs, {
            cwd: DESIGN_AGENT_ROOT,
            shell: false,
@@ -355,7 +357,6 @@ function vitePluginPhaseZApi(): Plugin {
              ...process.env,
              PHASE_Z_ALLOW_RESTRUCTURE: "1",
              PHASE_Z_ALLOW_REJECT: "1",
              PHASE_Z_MAX_RANK: "32",
            },
          });
--- a/src/phase_z2_mapper.py
+++ b/src/phase_z2_mapper.py
@@ -32,6 +32,7 @@ import yaml
 PROJECT_ROOT = Path(__file__).parent.parent
 CATALOG_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml"
 V4_FALLBACK_POLICY_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "v4_fallback_policy.yaml"
 class FitError(Exception):
@@ -57,6 +58,44 @@ def get_contract(template_id: str) -> dict | None:
    return load_frame_contracts().get(template_id)
 # ─── V4 fallback policy loading (IMP-38) ──────────────────────────
 _V4_FALLBACK_POLICY_CACHE: dict | None = None
 _V4_FALLBACK_POLICY_DEFAULT: dict = {
    "policy_type": "static",
    "usable_threshold": 1,
    "default_max_rank": 3,
    "extended_max_rank": 3,  # graceful: yaml 없을 시 확장 X (byte-identical to pre-IMP-38)
 }
 def load_v4_fallback_policy() -> dict:
    """IMP-38 V4 fallback policy loader (separate yaml, catalog 오염 방지).
    Returns dict with keys: policy_type, usable_threshold, default_max_rank, extended_max_rank.
    Codex #1 권장: frame_contracts.yaml top-level 오염 회피 (별 yaml).
    Codex #3 LOCK: load_frame_contracts() shape 변경 X (이 함수는 별 cache).
    Graceful fallback:
        yaml 파일 없을 시 → _V4_FALLBACK_POLICY_DEFAULT (default_max_rank=3, extended=3)
        → backward compat byte-identical to pre-IMP-38 behavior.
    Returns:
        dict — 정책 키 (정책 yaml 의 superset 가능, 알 수 없는 키는 무시 권장).
    """
    global _V4_FALLBACK_POLICY_CACHE
    if _V4_FALLBACK_POLICY_CACHE is None:
        if V4_FALLBACK_POLICY_PATH.exists():
            loaded = yaml.safe_load(V4_FALLBACK_POLICY_PATH.read_text(encoding="utf-8")) or {}
            # merge with default (yaml 키 부분 누락 시 default 로 fall through)
            _V4_FALLBACK_POLICY_CACHE = {**_V4_FALLBACK_POLICY_DEFAULT, **loaded}
        else:
            _V4_FALLBACK_POLICY_CACHE = dict(_V4_FALLBACK_POLICY_DEFAULT)
    return _V4_FALLBACK_POLICY_CACHE
 # ─── Source-shape splitters ──────────────────────────────────────
 def _split_top_bullets(content: str) -> list[tuple[str, list[str]]]:
--- a/src/phase_z2_pipeline.py
+++ b/src/phase_z2_pipeline.py
@@ -52,6 +52,7 @@ from phase_z2_mapper import (
    compute_capacity_fit,
    get_contract,
    load_frame_contracts,
    load_v4_fallback_policy,
    map_with_contract,
 )
 from phase_z2_classifier import classify_visual_runtime_check
@@ -589,34 +590,106 @@ def lookup_v4_match_with_fallback(
    section_id: str,
    *,
    raw_content: Optional[str] = None,
-    max_rank: int = 3,
+    max_rank: Optional[int] = None,
    alias_keys: Optional[list] = None,
    allow_provisional: bool = False,
 ) -> tuple[Optional[V4Match], dict]:
-    """Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable.
+    """Select V4 rank-1, or promote rank-2..N when rank-1 is not auto-renderable.
    This is an IMP-05 selector only. It uses existing V4 labels, frame-contract
    presence, and the Phase Z capacity precheck; it does not call calculate_fit.
-    IMP-30 u1 — when ``allow_provisional=True`` and the rank-1..max_rank chain
+    IMP-30 u1 — when ``allow_provisional=True`` and the rank-1..effective_max_rank
-    is exhausted (no candidate passes MVP1 filter + contract + capacity), the
+    chain is exhausted (no candidate passes MVP1 filter + contract + capacity),
-    selector synthesizes a *provisional* V4Match from the rank-1 judgment so
+    the selector synthesizes a *provisional* V4Match from the rank-1 judgment so
    the first-render invariant can be satisfied downstream. The synthesized
    match carries ``provisional=True``, ``selection_path="provisional_rank_1"``,
    and ``fallback_reason`` mirrors the existing chain-exhaust reason. The
    candidate trace shape is unchanged (synthetic injection only updates the
    top-level ``selection_path`` + ``selected_*`` mirrors). When the rank-1
    judgment itself is missing (``empty_v4_judgments`` / ``no_v4_section``),
-    no provisional is synthesized — the caller (u3 / u4) handles those cases
+    no provisional is synthesized — the caller handles those cases with a
-    with a placeholder zone or empty-shell.
+    placeholder zone or empty-shell.
    Default ``allow_provisional=False`` keeps the IMP-05 behavior byte-identical.
    IMP-38 — dynamic effective max_rank via ``load_v4_fallback_policy()``
    (4 round 합의 / Codex #1~#3 + Claude #1~#4 LOCK at #67 comment 23195):
      - ``max_rank=None`` (default) → policy applied:
          usable_count = candidates in rank 1..default_max_rank passing 3-tier
          predicate (status in MVP1 + catalog registered + optional capacity).
          usable_count >= usable_threshold → effective_max_rank = default_max_rank.
          Otherwise → effective_max_rank = min(extended_max_rank,
          len(judgments_full32)) = effective_extended_ceiling (Codex #2 정정).
      - ``max_rank`` explicitly passed → caller_override: that value is used
          as-is (backward compat for tests / explicit IMP-05/IMP-30 paths).
    Trace gains 8 IMP-38 fields: ``requested_max_rank``, ``default_max_rank``,
    ``configured_extended_max_rank``, ``judgments_count``,
    ``effective_extended_ceiling``, ``effective_max_rank``, ``usable_count``,
    ``policy_applied``. ``max_rank`` legacy field kept as alias for backward
    compat (= effective_max_rank).
    """
    resolved = _resolve_v4_section_key(v4, section_id, alias_keys=alias_keys)
    sec = v4.get("mdx_sections", {}).get(resolved) if resolved else None
    all_judgments = (sec.get("judgments_full32") if sec else None) or []
    judgments_count = len(all_judgments)
    # IMP-38 — load policy (graceful: yaml 없을 시 default_max_rank=3, extended=3)
    _policy = load_v4_fallback_policy()
    default_max_rank = int(_policy.get("default_max_rank", 3))
    configured_extended_max_rank = int(_policy.get("extended_max_rank", default_max_rank))
    usable_threshold = int(_policy.get("usable_threshold", 1))
    # Codex #2 정정: min(configured, len(judgments_full32)) — yaml ceiling 무력화 방지
    effective_extended_ceiling = min(configured_extended_max_rank, judgments_count) if judgments_count else default_max_rank
    usable_count: Optional[int] = None  # set only when policy path active
    if max_rank is not None:
        # caller override (backward compat — explicit IMP-05/IMP-30 paths, tests)
        effective_max_rank = int(max_rank)
        policy_applied = "caller_override"
    elif judgments_count == 0:
        # no judgments — slicing 빈 list 라 어차피 영향 X
        effective_max_rank = default_max_rank
        policy_applied = "no_judgments"
    else:
        # IMP-38 policy path — 3-tier predicate usable_count on default window
        usable_count = 0
        default_window = all_judgments[:default_max_rank]
        for _j in default_window:
            _m = _v4_match_from_judgment(section_id, _j, rank=0)
            if to_phase_z_status(_m) not in MVP1_ALLOWED_STATUSES:
                continue
            if get_contract(_m.template_id) is None:
                continue
            if raw_content is not None:
                _cap = compute_capacity_fit(_m.template_id, raw_content)
                if _cap and _cap.get("fit_status") not in {
                    "ok", "no_contract", "unknown_source_shape",
                }:
                    continue
            usable_count += 1
        if usable_count >= usable_threshold:
            effective_max_rank = default_max_rank
            policy_applied = "default_max_rank"
        else:
            effective_max_rank = effective_extended_ceiling
            policy_applied = "extended_max_rank"
    trace = {
        "section_id": section_id,
-        "max_rank": max_rank,
+        # IMP-38 — 8 trace fields (4 round LOCK)
        "requested_max_rank": max_rank,
        "default_max_rank": default_max_rank,
        "configured_extended_max_rank": configured_extended_max_rank,
        "judgments_count": judgments_count,
        "effective_extended_ceiling": effective_extended_ceiling,
        "effective_max_rank": effective_max_rank,
        "usable_count": usable_count,
        "policy_applied": policy_applied,
        # legacy alias for backward compat (= effective_max_rank)
        "max_rank": effective_max_rank,
        "selection_path": "no_v4_candidate",
        "selected_rank": None,
        "selected_template_id": None,
@@ -630,7 +703,7 @@ def lookup_v4_match_with_fallback(
        trace["fallback_reason"] = "no_v4_section"
        return None, trace
-    judgments = (sec.get("judgments_full32") or [])[:max_rank]
+    judgments = all_judgments[:effective_max_rank]
    if not judgments:
        trace["fallback_reason"] = "empty_v4_judgments"
        return None, trace
@@ -712,7 +785,7 @@ def lookup_v4_match_with_fallback(
        trace["candidates"].append(candidate_trace)
    trace["selection_path"] = "chain_exhausted"
-    trace["fallback_reason"] = first_skip_reason or "no_auto_renderable_rank_1_to_3"
+    trace["fallback_reason"] = first_skip_reason or f"no_auto_renderable_rank_1_to_{effective_max_rank}"
    # IMP-30 u1 — opt-in provisional first-render synthesis. When the caller
    # signals allow_provisional, promote rank-1 judgment as a provisional
@@ -3218,7 +3291,6 @@ def run_phase_z2_mvp1(
            v4,
            sid,
            raw_content=section_content_by_id.get(sid),
            max_rank=3,
            alias_keys=section_alias_by_id.get(sid),
        )
        v4_fallback_traces[sid] = trace
@@ -3434,7 +3506,6 @@ def run_phase_z2_mvp1(
                    v4,
                    sid,
                    raw_content=section_content_by_id.get(sid),
                    max_rank=3,
                    alias_keys=section_alias_by_id.get(sid),
                    allow_provisional=True,
                )
--- a/templates/phase_z2/catalog/v4_fallback_policy.yaml
+++ b/templates/phase_z2/catalog/v4_fallback_policy.yaml
@@ -0,0 +1,45 @@
 # IMP-38 V4 max_rank 정책 — separate yaml (catalog 오염 방지)
 #
 # 도입 배경:
 #   기존 `lookup_v4_match_with_fallback(max_rank=3)` hardcoded → rank 4~32 의 등록 frame 도달 못함
 #   mdx05-2 같이 V4 rank 1~9 가 catalog 미등록 + rank 10~ 등록 case → chain_exhausted → unit 생성 X
 #
 # 4 round 합의 (IMP-38 #67):
 #   - Codex #1: frame_contracts.yaml 오염 회피 → 별 yaml 파일 (이 파일)
 #   - Codex #2: 3 변수 분리 (configured / judgments / catalog count)
 #   - Codex #3: effective_extended_ceiling = min(configured, len(judgments_full32))
 #
 # 적용 path: src/phase_z2_mapper.py 의 load_v4_fallback_policy() loader
 #           + src/phase_z2_pipeline.py 의 lookup_v4_match_with_fallback() 동적 max_rank logic
 policy_type: dynamic_usable_count_based
 # usable_threshold N:
 #   rank 1~default_max_rank 중 "usable" predicate 충족 frame 수 >= N → default_max_rank 유지
 #   < N → extended_max_rank 로 확장
 usable_threshold: 1
 # default_max_rank:
 #   normal case (usable_count >= threshold) 의 fallback chain 길이
 #   mdx03 같이 rank 1 use_as_is 매칭 잘 되는 case 보호
 default_max_rank: 3
 # extended_max_rank:
 #   usable_count < threshold case 의 확장 ceiling
 #   mdx05-2 같이 rank 1~9 미등록 case 처리
 #   ★ 실제 effective_extended_ceiling = min(extended_max_rank, len(judgments_full32))
 #     (Codex #2 정정: yaml ceiling 무력화 방지 + V4 schema 범위 초과 방지)
 extended_max_rank: 32
 # usable predicate (3-tier):
 #   (a) phase_z_status in MVP1_ALLOWED_STATUSES (matched_zone / adapt_matched_zone)
 #   (b) get_contract(template_id) is not None (catalog 등록)
 #   (c) capacity_fit ok (raw_content 제공 시만 — optional)
 # 의미 신뢰 vs catalog presence trade-off:
 #   N=1 = 가장 보수 (rank 1 usable 시 확장 X — mdx03 정상 case 보호)
 #   default_max_rank=3 = 의미 신뢰 범위 (V4 rank 1~3)
 #   extended_max_rank=32 = catalog presence fallback (rank 4~32)
 # graceful fallback (yaml 없을 시):
 #   loader 가 default {default_max_rank: 3, extended_max_rank: 3} 로 fall through (backward compat)
--- a/tests/test_dynamic_max_rank.py
+++ b/tests/test_dynamic_max_rank.py
@@ -0,0 +1,159 @@
 """IMP-38 U2 — dynamic effective max_rank + trace 8-field + 3-tier usable predicate.
 Verify:
  - max_rank=None (default) → policy applied (usable_count + effective_max_rank 결정)
  - max_rank=int (caller override) → that value used as-is (backward compat)
  - trace contains 8 IMP-38 fields + legacy "max_rank" alias
  - usable_count >= threshold → default_max_rank (mdx03 정상 case)
  - usable_count < threshold → effective_extended_ceiling (mdx05-2 확장 case)
  - effective_extended_ceiling = min(configured, len(judgments_full32)) (Codex #2)
  - IMP-30 allow_provisional byte-identical (chain_exhausted 후 provisional 합성)
 4 round 합의 (#67):
  - Codex #1: 별 yaml (catalog 오염 방지)
  - Codex #2: min(configured, len(judgments)) 정정
  - Codex #3: load_frame_contracts() shape 무변
 """
 from __future__ import annotations
 import pytest
@pytest.fixture(autouse=True)
 def _reset_policy_cache():
    """Reset module-level _V4_FALLBACK_POLICY_CACHE for test isolation."""
    import src.phase_z2_mapper as mapper
    mapper._V4_FALLBACK_POLICY_CACHE = None
    yield
    mapper._V4_FALLBACK_POLICY_CACHE = None
 def _make_v4_section(judgments: list[dict]) -> dict:
    """Helper — V4 fixture with mdx_sections[section_id].judgments_full32."""
    return {
        "mdx_sections": {
            "sec-1": {
                "judgments_full32": judgments,
            }
        }
    }
 def _judgment(template_id: str, label: str, confidence: float = 0.5, frame_id: int = 0) -> dict:
    """Helper — V4 judgment entry shape."""
    return {
        "template_id": template_id,
        "frame_id": frame_id or hash(template_id) % 10000,
        "frame_number": 0,
        "confidence": confidence,
        "label": label,
    }
 # ─── U2 Test: caller override (backward compat) ────────────────────
 def test_caller_override_uses_explicit_max_rank():
    """max_rank=3 explicit → effective_max_rank=3, policy_applied=caller_override."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    judgments = [_judgment(f"t{i}", "reject") for i in range(5)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=3)
    assert trace["policy_applied"] == "caller_override"
    assert trace["effective_max_rank"] == 3
    assert trace["max_rank"] == 3  # legacy alias
 def test_caller_override_max_rank_5_used_directly():
    """max_rank=5 explicit → effective_max_rank=5 (policy 무시)."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    judgments = [_judgment(f"t{i}", "reject") for i in range(10)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=5)
    assert trace["policy_applied"] == "caller_override"
    assert trace["effective_max_rank"] == 5
 # ─── U2 Test: 8 trace fields presence ──────────────────────────────
 def test_trace_contains_8_imp38_fields():
    """trace dict must contain all 8 IMP-38 fields + legacy max_rank alias."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    judgments = [_judgment(f"t{i}", "reject") for i in range(3)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
    expected = {
        "requested_max_rank",
        "default_max_rank",
        "configured_extended_max_rank",
        "judgments_count",
        "effective_extended_ceiling",
        "effective_max_rank",
        "usable_count",
        "policy_applied",
        "max_rank",  # legacy alias
    }
    missing = expected - set(trace.keys())
    assert not missing, f"missing IMP-38 trace fields: {missing}"
 # ─── U2 Test: Codex #2 정정 — min(configured, len(judgments_full32)) ──
 def test_effective_extended_ceiling_is_min_of_configured_and_judgments_count():
    """Codex #2 LOCK — judgments_count < configured 일 때 ceiling = judgments_count."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    # 5 judgments only — configured extended (32) 보다 작음
    judgments = [_judgment(f"t{i}", "reject") for i in range(5)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
    assert trace["judgments_count"] == 5
    assert trace["effective_extended_ceiling"] == 5  # min(32, 5) = 5
 # ─── U2 Test: no_judgments path ──────────────────────────────────
 def test_no_judgments_path():
    """judgments_count=0 → policy_applied=no_judgments, effective_max_rank=default."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    v4 = _make_v4_section([])
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
    assert trace["policy_applied"] == "no_judgments"
    assert trace["judgments_count"] == 0
    assert trace["effective_max_rank"] == trace["default_max_rank"]
    assert trace["fallback_reason"] == "empty_v4_judgments"
 # ─── U2 Test: no_v4_section ─────────────────────────────────────
 def test_no_v4_section_path():
    """unknown section_id → fallback_reason=no_v4_section + trace still has 8 IMP-38 fields."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    v4 = {"mdx_sections": {}}
    _match, trace = lookup_v4_match_with_fallback(v4, "unknown-sec")
    assert trace["fallback_reason"] == "no_v4_section"
    # 8 fields still present even when no section found
    assert "policy_applied" in trace
    assert "effective_max_rank" in trace
 # ─── U2 Test: chain_exhausted message reflects effective_max_rank ──
 def test_chain_exhausted_message_includes_effective_max_rank():
    """fallback_reason 메시지가 동적 effective_max_rank 반영 (hardcoded "1_to_3" X)."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    # 3 judgments all reject (catalog 등록 X 가정 — t1/t2/t3 는 catalog 에 없음)
    judgments = [_judgment(f"unregistered_t{i}", "reject") for i in range(3)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=3)
    # chain exhausted — 메시지 가 effective_max_rank=3 반영
    if trace["selection_path"] == "chain_exhausted":
        # first_skip_reason 가 있으면 그게 우선, 없으면 default 메시지
        assert (
            trace["fallback_reason"] is not None
            and ("no_auto_renderable" in trace["fallback_reason"] or "phase_z_status" in trace["fallback_reason"] or "no_contract" in trace["fallback_reason"])
        )
--- a/tests/test_phase_z2_max_rank_regression.py
+++ b/tests/test_phase_z2_max_rank_regression.py
@@ -0,0 +1,137 @@
 """IMP-38 U3 regression — call site cleanup (max_rank=3 제거) 후 policy 활성 검증.
 Scenarios:
  (A) normal case: rank 1~default_max_rank window 에 usable candidate 충분
      → effective_max_rank=default_max_rank (rank-3-preserved)
      → mdx03 식: rank 1 use_as_is 매칭 정상 case 보호 확인
  (B) extended case: rank 1~default_max_rank window 에 usable candidate 0
      → effective_max_rank=effective_extended_ceiling (rank-extended)
      → mdx05-2 식: rank 1~9 미등록/reject + rank 10+ 등록 frame case 처리
 4 round 합의 (#67):
  - Codex #1: 별 yaml + loader (catalog 오염 방지)
  - Codex #2: min(configured, len(judgments)) 정정
  - Codex #6: 2 call site cleanup (HEAD 기준 — IMP-47B 가 추가한 3 번째는 별 axis)
  - Codex #7: U3 execute ready
 """
 from __future__ import annotations
 import pytest
@pytest.fixture(autouse=True)
 def _reset_policy_cache():
    """Reset module-level _V4_FALLBACK_POLICY_CACHE for test isolation."""
    import src.phase_z2_mapper as mapper
    mapper._V4_FALLBACK_POLICY_CACHE = None
    yield
    mapper._V4_FALLBACK_POLICY_CACHE = None
 def _make_v4_section(judgments: list[dict]) -> dict:
    return {"mdx_sections": {"sec-1": {"judgments_full32": judgments}}}
 def _judgment(template_id: str, label: str, confidence: float = 0.5, frame_id: int = 0) -> dict:
    return {
        "template_id": template_id,
        "frame_id": frame_id or (hash(template_id) % 10000),
        "frame_number": 0,
        "confidence": confidence,
        "label": label,
    }
 # ─── Scenario A — normal case (rank-3-preserved) ──────────────────
 def test_normal_case_with_usable_candidates_preserves_default_max_rank():
    """rank 1~3 window 에 usable >= threshold(1) 시 effective_max_rank=default_max_rank(3)."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    from src.phase_z2_mapper import load_frame_contracts
    # mdx03 식 — 첫 rank 가 catalog 등록 + use_as_is/light_edit/restructure(allowed)
    # 실제 catalog 등록 frame 사용 (catalog hardcode 의존 — 단 frame 32 중 어느 게 등록인지는 yaml 기반)
    catalog = load_frame_contracts()
    registered_template_ids = [k for k, v in catalog.items() if isinstance(v, dict)]
    assert len(registered_template_ids) >= 1, "catalog 등록 frame 1+ 필요 (mdx03 식 fixture)"
    # rank 1 = registered frame + use_as_is (auto-renderable)
    # rank 2~3 = reject (catalog 등록 무관)
    first_registered = registered_template_ids[0]
    judgments = [
        _judgment(first_registered, "use_as_is", 0.95),
        _judgment("dummy_rank2", "reject", 0.3),
        _judgment("dummy_rank3", "reject", 0.2),
    ]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")  # no explicit max_rank → policy
    assert trace["policy_applied"] == "default_max_rank", (
        f"normal case 에서 default 유지 기대, got {trace['policy_applied']}"
    )
    assert trace["effective_max_rank"] == trace["default_max_rank"]
    assert trace["usable_count"] >= 1
 # ─── Scenario B — extended case (rank-extended) ────────────────────
 def test_extended_case_with_no_usable_in_default_window_expands_to_ceiling():
    """rank 1~3 window 에 0 usable 시 effective_max_rank=effective_extended_ceiling."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    # mdx05-2 식 — rank 1~3 미등록 (template_id 가 catalog 에 없음) + reject 라벨
    # rank 4~ 도 등록 안 됨 (fixture 단순화)
    # 다만 judgments_count=10 으로 충분 → effective_extended_ceiling = min(extended, 10) = 10
    judgments = [
        _judgment(f"unregistered_t{i}", "reject", 0.1 + i * 0.01) for i in range(10)
    ]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
    assert trace["policy_applied"] == "extended_max_rank", (
        f"extended case 기대, got {trace['policy_applied']}"
    )
    assert trace["usable_count"] == 0
    assert trace["judgments_count"] == 10
    # Codex #2 정정: min(configured, 10) — configured 32 면 10, 5 면 5
    assert trace["effective_extended_ceiling"] == min(
        trace["configured_extended_max_rank"], 10
    )
    assert trace["effective_max_rank"] == trace["effective_extended_ceiling"]
 # ─── Scenario C — call site cleanup byte-identical (caller_override 제거 후 policy 활성) ─
 def test_default_call_site_now_uses_policy_after_cleanup():
    """U3 cleanup 후 call site = no explicit max_rank → policy path 자동 활성.
    이전: caller 가 max_rank=3 명시 → policy_applied=caller_override
    U3 후: caller 가 명시 X → policy_applied=default_max_rank (usable >= 1 시) or extended_max_rank
    """
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    judgments = [_judgment(f"unregistered_t{i}", "reject") for i in range(5)]
    v4 = _make_v4_section(judgments)
    # caller 가 max_rank 명시 X (U3 cleanup 후 production caller 의 새 동작)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1")
    assert trace["policy_applied"] in {"default_max_rank", "extended_max_rank"}
    assert trace["policy_applied"] != "caller_override", (
        "U3 cleanup 후 production caller = no explicit, policy path 활성 기대"
    )
 # ─── Scenario D — explicit caller_override 여전히 동작 (test path 보호) ────
 def test_explicit_caller_override_still_works_for_tests():
    """test 에서 explicit max_rank=N 보낼 시 caller_override 그대로 동작 (backward compat)."""
    from src.phase_z2_pipeline import lookup_v4_match_with_fallback
    judgments = [_judgment(f"unregistered_t{i}", "reject") for i in range(10)]
    v4 = _make_v4_section(judgments)
    _match, trace = lookup_v4_match_with_fallback(v4, "sec-1", max_rank=5)
    assert trace["policy_applied"] == "caller_override"
    assert trace["effective_max_rank"] == 5
--- a/tests/test_v4_fallback_policy_loader.py
+++ b/tests/test_v4_fallback_policy_loader.py
@@ -0,0 +1,109 @@
 """IMP-38 U1 — v4_fallback_policy.yaml loader test.
 Verify:
  - load_v4_fallback_policy() returns dict with expected keys
  - yaml parsed correctly (usable_threshold, default_max_rank, extended_max_rank, policy_type)
  - graceful fallback when yaml missing → _V4_FALLBACK_POLICY_DEFAULT
  - _V4_FALLBACK_POLICY_CACHE pattern (lazy load, mirror of _CATALOG_CACHE)
  - load_frame_contracts() shape unchanged (separate yaml, catalog 오염 X)
 4 round 합의 (#67):
  - Codex #1: separate yaml (not frame_contracts.yaml top-level)
  - Codex #3: load_frame_contracts() shape 변경 X
 """
 from __future__ import annotations
 import importlib
 from pathlib import Path
 from unittest.mock import patch
 import pytest
 PROJECT_ROOT = Path(__file__).parent.parent
 V4_POLICY_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "v4_fallback_policy.yaml"
 CATALOG_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml"
 def _reset_caches():
    """Reset module-level caches for test isolation."""
    import src.phase_z2_mapper as mapper
    mapper._V4_FALLBACK_POLICY_CACHE = None
    mapper._CATALOG_CACHE = None
@pytest.fixture(autouse=True)
 def clean_caches():
    _reset_caches()
    yield
    _reset_caches()
 def test_v4_fallback_policy_yaml_exists():
    """IMP-38 U1 — separate yaml file must exist."""
    assert V4_POLICY_PATH.exists(), (
        f"v4_fallback_policy.yaml not found at {V4_POLICY_PATH}. "
        "IMP-38 U1 expects separate yaml (Codex #1 corr — not frame_contracts.yaml top-level)."
    )
 def test_load_v4_fallback_policy_returns_dict_with_expected_keys():
    """load_v4_fallback_policy() must return dict with policy keys."""
    from src.phase_z2_mapper import load_v4_fallback_policy
    policy = load_v4_fallback_policy()
    assert isinstance(policy, dict)
    expected_keys = {"policy_type", "usable_threshold", "default_max_rank", "extended_max_rank"}
    missing = expected_keys - set(policy.keys())
    assert not missing, f"missing keys in v4_fallback_policy: {missing}"
 def test_load_v4_fallback_policy_values_match_yaml():
    """Loaded policy values must match v4_fallback_policy.yaml (initial commit)."""
    from src.phase_z2_mapper import load_v4_fallback_policy
    policy = load_v4_fallback_policy()
    assert policy["policy_type"] == "dynamic_usable_count_based"
    assert policy["usable_threshold"] == 1
    assert policy["default_max_rank"] == 3
    assert policy["extended_max_rank"] == 32
 def test_load_v4_fallback_policy_cache_pattern():
    """_V4_FALLBACK_POLICY_CACHE pattern — second call returns same dict (lazy load)."""
    from src.phase_z2_mapper import load_v4_fallback_policy
    policy_a = load_v4_fallback_policy()
    policy_b = load_v4_fallback_policy()
    assert policy_a is policy_b, "cache pattern violated (should return same dict instance)"
 def test_load_v4_fallback_policy_graceful_when_yaml_missing():
    """yaml 파일 없을 시 → _V4_FALLBACK_POLICY_DEFAULT (extended_max_rank=3, byte-identical pre-IMP-38)."""
    import src.phase_z2_mapper as mapper
    with patch.object(mapper, "V4_FALLBACK_POLICY_PATH", PROJECT_ROOT / "tests" / "__nonexistent_policy.yaml"):
        # reset cache to force reload via patched path
        mapper._V4_FALLBACK_POLICY_CACHE = None
        policy = mapper.load_v4_fallback_policy()
        assert policy["default_max_rank"] == 3
        assert policy["extended_max_rank"] == 3, (
            "graceful fallback must keep extended==default (byte-identical pre-IMP-38)"
        )
 def test_load_frame_contracts_shape_unchanged():
    """Codex #3 LOCK — load_frame_contracts() must still return template_id → entry dict."""
    from src.phase_z2_mapper import load_frame_contracts, load_v4_fallback_policy
    catalog = load_frame_contracts()
    policy = load_v4_fallback_policy()
    # catalog 의 key 가 모두 frame entry (dict with template_id/frame_id) 여야 함
    for key, entry in catalog.items():
        assert isinstance(entry, dict), f"catalog entry {key} should be dict"
        assert "template_id" in entry, f"catalog entry {key} missing template_id (policy bleed?)"
    # policy keys 는 catalog 에 안 들어감
    policy_keys = {"policy_type", "usable_threshold", "default_max_rank", "extended_max_rank"}
    catalog_top_keys = set(catalog.keys())
    bleed = policy_keys & catalog_top_keys
    assert not bleed, (
        f"policy keys leaked into frame_contracts.yaml: {bleed}. "
        "Codex #1 corr violated — policy must stay in separate v4_fallback_policy.yaml."
    )