feat(#68): IMP-39 u1~u8 ranking_sort_policy single-source + backend↔frontend label-priority mirror

u1: templates/phase_z2/catalog/ranking_sort_policy.yaml — single-source policy (label_priority asc {use_as_is:0, light_edit:1, restructure:2, reject:3} + confidence desc + v4_rank asc tie-break). u2: src/phase_z2_pipeline.py — apply_ranking_sort helper + lookup_v4_match_with_fallback applies policy AFTER IMP-38 raw-window selection (raw default_window + usable_count preserved on RAW all_judgments). u3: src/phase_z2_pipeline.py — _build_application_plan_unit forwards ranking_sort_policy + sorted_candidate_evidence into Step 9 payload. u4: Front/client/src/services/designAgentApi.ts — frame_candidates builder reads unit.sorted_candidate_evidence + unit.ranking_sort_policy first; local LABEL_PRIORITY retained only on warn-fallback path. u5: tests/test_ranking_sort_policy.py — pure permutation coverage (sample-agnostic). u6: tests/phase_z2/test_label_priority_synthetic.py + fixtures/ranking_sort_policy/ synthetic_divergence.yaml — low-conf use_as_is behind high-conf restructure. u7: tests/phase_z2/test_imp39_mdx04_env_toggle_e2e.py — samples/mdx_batch/04.mdx with AI_FALLBACK_ENABLED=off; backend selected_v4_rank == frontend frame_candidates[0]. u8: tests/phase_z2/test_imp39_corpus_audit.py — real corpus sweep over tests/matching/v4_full32_result.yaml (10 MDX sections); section IDs loaded dynamically (RULE 0 / RULE 7 sample-agnostic). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
2026-05-24 17:12:07 +09:00
parent 2e3747c5ab
commit 028042aaa9
8 changed files with 1536 additions and 12 deletions
--- a/src/phase_z2_pipeline.py
+++ b/src/phase_z2_pipeline.py
@@ -108,6 +108,12 @@ ASSETS_SOURCE_BASE = PROJECT_ROOT / "figma_to_html_agent" / "blocks"
 V4_RESULT_PATH = PROJECT_ROOT / "tests" / "matching" / "v4_full32_result.yaml"
 RUNS_DIR = PROJECT_ROOT / "data" / "runs"

+# IMP-39 (#68) u1 — single-source ranking sort policy yaml.
+# Loader + apply_ranking_sort helper below `to_phase_z_status`.
+RANKING_SORT_POLICY_PATH = (
+    PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "ranking_sort_policy.yaml"
+)
+
 # V4 label → Phase Z status (§ 7.4 매트릭스)
 V4_LABEL_TO_PHASE_Z_STATUS = {
    "use_as_is": "matched_zone",
@@ -210,6 +216,106 @@ def to_phase_z_status(match: V4Match) -> str:
    return V4_LABEL_TO_PHASE_Z_STATUS.get(match.label, "unknown")


+# ─── IMP-39 (#68) u1 — single-source ranking sort policy ──────────
+#
+# Single source of (label_priority, tie-break) ordering shared by:
+#   - backend `lookup_v4_match_with_fallback` selector loop (wired in u2)
+#   - Step 9 `_build_application_plan_unit` payload (wired in u3)
+#   - frontend `designAgentApi.ts` candidate builder (wired in u4)
+#
+# u1 scope = additive only (yaml + loader + helper). No selector wiring,
+# no behavior change. Default-fallback matches yaml so missing-file boot
+# keeps deterministic ordering identical to the file-loaded policy.
+
+_RANKING_SORT_POLICY_DEFAULT: dict = {
+    "policy_type": "deterministic_label_priority_then_confidence",
+    "label_priority": {
+        "use_as_is": 0,
+        "light_edit": 1,
+        "restructure": 2,
+        "reject": 3,
+    },
+    "unknown_label_priority": 99,
+    "tie_break_axes": ["confidence_desc", "v4_rank_asc"],
+}
+
+_RANKING_SORT_POLICY_CACHE: Optional[dict] = None
+
+
+def load_ranking_sort_policy() -> dict:
+    """IMP-39 u1 — ranking sort policy loader (separate yaml, additive).
+
+    Returns dict with keys: policy_type, label_priority (dict),
+    unknown_label_priority (int), tie_break_axes (list[str]).
+
+    Graceful fallback: yaml 파일 없을 시 _RANKING_SORT_POLICY_DEFAULT
+    (위 dict) 그대로 — backward-compat boot-safe.
+
+    Cache: module-level, mirrors `load_v4_fallback_policy` pattern.
+    """
+    global _RANKING_SORT_POLICY_CACHE
+    if _RANKING_SORT_POLICY_CACHE is None:
+        if RANKING_SORT_POLICY_PATH.exists():
+            loaded = (
+                yaml.safe_load(RANKING_SORT_POLICY_PATH.read_text(encoding="utf-8"))
+                or {}
+            )
+            # merge with default so partial yaml falls through cleanly
+            merged = dict(_RANKING_SORT_POLICY_DEFAULT)
+            for k, v in loaded.items():
+                merged[k] = v
+            _RANKING_SORT_POLICY_CACHE = merged
+        else:
+            _RANKING_SORT_POLICY_CACHE = dict(_RANKING_SORT_POLICY_DEFAULT)
+    return _RANKING_SORT_POLICY_CACHE
+
+
+def apply_ranking_sort(
+    records: list,
+    *,
+    policy: Optional[dict] = None,
+    label_key: str = "label",
+    confidence_key: str = "confidence",
+    v4_rank_key: str = "v4_rank",
+) -> list:
+    """IMP-39 u1 — stable sort by (label_priority asc, confidence desc, v4_rank asc).
+
+    Shared ordering primitive — backend selector / Step 9 payload / frontend
+    mirror invariant. Sample-agnostic; no hardcoded sample IDs.
+
+    Args:
+        records: list of dicts (selector loop, trace candidates) OR V4Match
+            objects. Field access falls through getitem → getattr.
+        policy: optional explicit policy dict; defaults to `load_ranking_sort_policy()`.
+        label_key / confidence_key / v4_rank_key: per-record field names.
+
+    Returns:
+        NEW list — input is not mutated. Records lacking a key get the
+        unknown-label priority / confidence=0.0 / v4_rank=inf so they sink
+        to the bottom in a deterministic way.
+    """
+    pol = policy if policy is not None else load_ranking_sort_policy()
+    priority_map: dict = pol.get("label_priority", {}) or {}
+    unknown_priority: int = int(pol.get("unknown_label_priority", 99))
+
+    def _get(rec, key):
+        if isinstance(rec, dict):
+            return rec.get(key)
+        return getattr(rec, key, None)
+
+    def _key(rec):
+        label = _get(rec, label_key)
+        conf = _get(rec, confidence_key)
+        v4_rank = _get(rec, v4_rank_key)
+        label_pri = priority_map.get(label, unknown_priority)
+        conf_val = float(conf) if conf is not None else 0.0
+        # confidence desc → negate for asc sort key
+        rank_val = int(v4_rank) if v4_rank is not None else 10**9
+        return (label_pri, -conf_val, rank_val)
+
+    return sorted(records, key=_key)
+
+
 def _b4_mapper_source_enabled() -> bool:
    """IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE env flag reader (default OFF).

@@ -1065,6 +1171,30 @@ def lookup_v4_match_with_fallback(
        trace["fallback_reason"] = "empty_v4_judgments"
        return None, trace

+    # IMP-39 (#68) u2 — apply single-source ranking sort policy to the selected
+    # window AFTER IMP-38 raw-window calc (default_window / usable_count above
+    # remain RAW all_judgments-based — no silent interaction with fallback
+    # expansion). Selection order now follows
+    #   (label_priority asc, confidence desc, v4_rank asc)
+    # so backend selected rank-1 matches frontend frame_candidates[0]
+    # (designAgentApi.ts:578-597 LABEL_PRIORITY + confidence-desc mirror).
+    # `v4_rank_key="v4_full_rank"` reads the RAW V4 confidence-rank from each
+    # judgment dict for tie-break (yaml: tie_break_axes=[confidence_desc,
+    # v4_rank_asc]). Input list is NOT mutated (apply_ranking_sort returns a
+    # new list). Trace fields (sorted_candidate_evidence / ranking_sort_policy)
+    # are forwarded through Step 9 payload in u3.
+    ranking_sort_policy = load_ranking_sort_policy()
+    judgments = apply_ranking_sort(
+        judgments,
+        policy=ranking_sort_policy,
+        label_key="label",
+        confidence_key="confidence",
+        v4_rank_key="v4_full_rank",
+    )
+    trace["ranking_sort_policy_applied"] = ranking_sort_policy.get(
+        "policy_type", "deterministic_label_priority_then_confidence"
+    )
+
    first_skip_reason: Optional[str] = None
    # IMP-05 L4 dedup (Codex #14 ordering — Claude #16 placement precision) :
    # first occurrence claims template_id for the chain regardless of decision
@@ -3937,6 +4067,18 @@ def _build_application_plan_unit(
    - IMP-06 additive plan fields (position / assignment_source / section_
      assignment_override / replaced_auto_unit / skipped_collided_auto_units /
      skipped_reason) — None / False / [] when no override CLI used.
+
+    IMP-39 u3 (issue #68) additive fields :
+    - ``ranking_sort_policy``       : full policy dict from
+      ``load_ranking_sort_policy()`` (cached). Forwards the single-source
+      ordering contract (label_priority map + tie_break_axes) to the Step 9
+      payload so the frontend (``designAgentApi.ts``) can mirror the backend
+      sort without re-implementing the policy locally. u4 wires consumption.
+    - ``sorted_candidate_evidence`` : explicit alias of the policy-sorted
+      ``selection_trace["candidates"]`` list. Identical contents to
+      ``candidate_evidence`` (u2 sorted the underlying ``judgments`` window
+      before the selector loop appended ``trace["candidates"]``), but the
+      explicit name documents the post-u2 contract for the frontend.
    """
    unit_id = "+".join(unit.source_section_ids)

@@ -3945,6 +4087,14 @@ def _build_application_plan_unit(
    application_status = "ok" if has_v4 else "no_v4_candidate"
    current_default = unit.frame_template_id if has_v4 else None

+    # IMP-39 u3 (issue #68) — forward the single-source ranking policy to the
+    # Step 9 per-unit payload. ``load_ranking_sort_policy()`` is module-cached
+    # (``_RANKING_SORT_POLICY_CACHE``), so the per-unit call is O(1) after
+    # first invocation. The full policy dict (not just ``policy_type``) is
+    # forwarded so the frontend can mirror label_priority + tie_break_axes
+    # without re-declaring the contract locally.
+    ranking_sort_policy = load_ranking_sort_policy()
+
    # IMP-06 blocker-fix (Codex #13 Blocker 3 / #16) — plan-aware additive
    # fields. additive = pre-IMP-06 readers (no override CLI used) see
    # position=None / assignment_source=None / section_assignment_override
@@ -4006,6 +4156,12 @@ def _build_application_plan_unit(
        "replaced_auto_unit": plan_replaced_auto,
        "skipped_collided_auto_units": plan_skipped_collided,
        "skipped_reason": plan_skipped_reason,
+        # IMP-39 u3 (issue #68) — single-source ranking policy forwarded to
+        # frontend so backend selector "rank 1" and frontend
+        # ``frame_candidates[0]`` share one ordering contract. Additive only;
+        # pre-u3 readers ignore both keys.
+        "ranking_sort_policy": ranking_sort_policy,
+        "sorted_candidate_evidence": selection_trace.get("candidates", []),
    }