feat(#68): IMP-39 u1~u8 ranking_sort_policy single-source + backend↔frontend label-priority mirror
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 23s
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 23s
u1: templates/phase_z2/catalog/ranking_sort_policy.yaml — single-source policy
(label_priority asc {use_as_is:0, light_edit:1, restructure:2, reject:3}
+ confidence desc + v4_rank asc tie-break).
u2: src/phase_z2_pipeline.py — apply_ranking_sort helper + lookup_v4_match_with_fallback
applies policy AFTER IMP-38 raw-window selection (raw default_window + usable_count
preserved on RAW all_judgments).
u3: src/phase_z2_pipeline.py — _build_application_plan_unit forwards ranking_sort_policy
+ sorted_candidate_evidence into Step 9 payload.
u4: Front/client/src/services/designAgentApi.ts — frame_candidates builder reads
unit.sorted_candidate_evidence + unit.ranking_sort_policy first; local LABEL_PRIORITY
retained only on warn-fallback path.
u5: tests/test_ranking_sort_policy.py — pure permutation coverage (sample-agnostic).
u6: tests/phase_z2/test_label_priority_synthetic.py + fixtures/ranking_sort_policy/
synthetic_divergence.yaml — low-conf use_as_is behind high-conf restructure.
u7: tests/phase_z2/test_imp39_mdx04_env_toggle_e2e.py — samples/mdx_batch/04.mdx with
AI_FALLBACK_ENABLED=off; backend selected_v4_rank == frontend frame_candidates[0].
u8: tests/phase_z2/test_imp39_corpus_audit.py — real corpus sweep over
tests/matching/v4_full32_result.yaml (10 MDX sections); section IDs loaded
dynamically (RULE 0 / RULE 7 sample-agnostic).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -108,6 +108,12 @@ ASSETS_SOURCE_BASE = PROJECT_ROOT / "figma_to_html_agent" / "blocks"
|
||||
V4_RESULT_PATH = PROJECT_ROOT / "tests" / "matching" / "v4_full32_result.yaml"
|
||||
RUNS_DIR = PROJECT_ROOT / "data" / "runs"
|
||||
|
||||
# IMP-39 (#68) u1 — single-source ranking sort policy yaml.
|
||||
# Loader + apply_ranking_sort helper below `to_phase_z_status`.
|
||||
RANKING_SORT_POLICY_PATH = (
|
||||
PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "ranking_sort_policy.yaml"
|
||||
)
|
||||
|
||||
# V4 label → Phase Z status (§ 7.4 매트릭스)
|
||||
V4_LABEL_TO_PHASE_Z_STATUS = {
|
||||
"use_as_is": "matched_zone",
|
||||
@@ -210,6 +216,106 @@ def to_phase_z_status(match: V4Match) -> str:
|
||||
return V4_LABEL_TO_PHASE_Z_STATUS.get(match.label, "unknown")
|
||||
|
||||
|
||||
# ─── IMP-39 (#68) u1 — single-source ranking sort policy ──────────
|
||||
#
|
||||
# Single source of (label_priority, tie-break) ordering shared by:
|
||||
# - backend `lookup_v4_match_with_fallback` selector loop (wired in u2)
|
||||
# - Step 9 `_build_application_plan_unit` payload (wired in u3)
|
||||
# - frontend `designAgentApi.ts` candidate builder (wired in u4)
|
||||
#
|
||||
# u1 scope = additive only (yaml + loader + helper). No selector wiring,
|
||||
# no behavior change. Default-fallback matches yaml so missing-file boot
|
||||
# keeps deterministic ordering identical to the file-loaded policy.
|
||||
|
||||
_RANKING_SORT_POLICY_DEFAULT: dict = {
|
||||
"policy_type": "deterministic_label_priority_then_confidence",
|
||||
"label_priority": {
|
||||
"use_as_is": 0,
|
||||
"light_edit": 1,
|
||||
"restructure": 2,
|
||||
"reject": 3,
|
||||
},
|
||||
"unknown_label_priority": 99,
|
||||
"tie_break_axes": ["confidence_desc", "v4_rank_asc"],
|
||||
}
|
||||
|
||||
_RANKING_SORT_POLICY_CACHE: Optional[dict] = None
|
||||
|
||||
|
||||
def load_ranking_sort_policy() -> dict:
|
||||
"""IMP-39 u1 — ranking sort policy loader (separate yaml, additive).
|
||||
|
||||
Returns dict with keys: policy_type, label_priority (dict),
|
||||
unknown_label_priority (int), tie_break_axes (list[str]).
|
||||
|
||||
Graceful fallback: yaml 파일 없을 시 _RANKING_SORT_POLICY_DEFAULT
|
||||
(위 dict) 그대로 — backward-compat boot-safe.
|
||||
|
||||
Cache: module-level, mirrors `load_v4_fallback_policy` pattern.
|
||||
"""
|
||||
global _RANKING_SORT_POLICY_CACHE
|
||||
if _RANKING_SORT_POLICY_CACHE is None:
|
||||
if RANKING_SORT_POLICY_PATH.exists():
|
||||
loaded = (
|
||||
yaml.safe_load(RANKING_SORT_POLICY_PATH.read_text(encoding="utf-8"))
|
||||
or {}
|
||||
)
|
||||
# merge with default so partial yaml falls through cleanly
|
||||
merged = dict(_RANKING_SORT_POLICY_DEFAULT)
|
||||
for k, v in loaded.items():
|
||||
merged[k] = v
|
||||
_RANKING_SORT_POLICY_CACHE = merged
|
||||
else:
|
||||
_RANKING_SORT_POLICY_CACHE = dict(_RANKING_SORT_POLICY_DEFAULT)
|
||||
return _RANKING_SORT_POLICY_CACHE
|
||||
|
||||
|
||||
def apply_ranking_sort(
|
||||
records: list,
|
||||
*,
|
||||
policy: Optional[dict] = None,
|
||||
label_key: str = "label",
|
||||
confidence_key: str = "confidence",
|
||||
v4_rank_key: str = "v4_rank",
|
||||
) -> list:
|
||||
"""IMP-39 u1 — stable sort by (label_priority asc, confidence desc, v4_rank asc).
|
||||
|
||||
Shared ordering primitive — backend selector / Step 9 payload / frontend
|
||||
mirror invariant. Sample-agnostic; no hardcoded sample IDs.
|
||||
|
||||
Args:
|
||||
records: list of dicts (selector loop, trace candidates) OR V4Match
|
||||
objects. Field access falls through getitem → getattr.
|
||||
policy: optional explicit policy dict; defaults to `load_ranking_sort_policy()`.
|
||||
label_key / confidence_key / v4_rank_key: per-record field names.
|
||||
|
||||
Returns:
|
||||
NEW list — input is not mutated. Records lacking a key get the
|
||||
unknown-label priority / confidence=0.0 / v4_rank=inf so they sink
|
||||
to the bottom in a deterministic way.
|
||||
"""
|
||||
pol = policy if policy is not None else load_ranking_sort_policy()
|
||||
priority_map: dict = pol.get("label_priority", {}) or {}
|
||||
unknown_priority: int = int(pol.get("unknown_label_priority", 99))
|
||||
|
||||
def _get(rec, key):
|
||||
if isinstance(rec, dict):
|
||||
return rec.get(key)
|
||||
return getattr(rec, key, None)
|
||||
|
||||
def _key(rec):
|
||||
label = _get(rec, label_key)
|
||||
conf = _get(rec, confidence_key)
|
||||
v4_rank = _get(rec, v4_rank_key)
|
||||
label_pri = priority_map.get(label, unknown_priority)
|
||||
conf_val = float(conf) if conf is not None else 0.0
|
||||
# confidence desc → negate for asc sort key
|
||||
rank_val = int(v4_rank) if v4_rank is not None else 10**9
|
||||
return (label_pri, -conf_val, rank_val)
|
||||
|
||||
return sorted(records, key=_key)
|
||||
|
||||
|
||||
def _b4_mapper_source_enabled() -> bool:
|
||||
"""IMP-89 89-a u1 — PHASE_Z_B4_MAPPER_SOURCE env flag reader (default OFF).
|
||||
|
||||
@@ -1065,6 +1171,30 @@ def lookup_v4_match_with_fallback(
|
||||
trace["fallback_reason"] = "empty_v4_judgments"
|
||||
return None, trace
|
||||
|
||||
# IMP-39 (#68) u2 — apply single-source ranking sort policy to the selected
|
||||
# window AFTER IMP-38 raw-window calc (default_window / usable_count above
|
||||
# remain RAW all_judgments-based — no silent interaction with fallback
|
||||
# expansion). Selection order now follows
|
||||
# (label_priority asc, confidence desc, v4_rank asc)
|
||||
# so backend selected rank-1 matches frontend frame_candidates[0]
|
||||
# (designAgentApi.ts:578-597 LABEL_PRIORITY + confidence-desc mirror).
|
||||
# `v4_rank_key="v4_full_rank"` reads the RAW V4 confidence-rank from each
|
||||
# judgment dict for tie-break (yaml: tie_break_axes=[confidence_desc,
|
||||
# v4_rank_asc]). Input list is NOT mutated (apply_ranking_sort returns a
|
||||
# new list). Trace fields (sorted_candidate_evidence / ranking_sort_policy)
|
||||
# are forwarded through Step 9 payload in u3.
|
||||
ranking_sort_policy = load_ranking_sort_policy()
|
||||
judgments = apply_ranking_sort(
|
||||
judgments,
|
||||
policy=ranking_sort_policy,
|
||||
label_key="label",
|
||||
confidence_key="confidence",
|
||||
v4_rank_key="v4_full_rank",
|
||||
)
|
||||
trace["ranking_sort_policy_applied"] = ranking_sort_policy.get(
|
||||
"policy_type", "deterministic_label_priority_then_confidence"
|
||||
)
|
||||
|
||||
first_skip_reason: Optional[str] = None
|
||||
# IMP-05 L4 dedup (Codex #14 ordering — Claude #16 placement precision) :
|
||||
# first occurrence claims template_id for the chain regardless of decision
|
||||
@@ -3937,6 +4067,18 @@ def _build_application_plan_unit(
|
||||
- IMP-06 additive plan fields (position / assignment_source / section_
|
||||
assignment_override / replaced_auto_unit / skipped_collided_auto_units /
|
||||
skipped_reason) — None / False / [] when no override CLI used.
|
||||
|
||||
IMP-39 u3 (issue #68) additive fields :
|
||||
- ``ranking_sort_policy`` : full policy dict from
|
||||
``load_ranking_sort_policy()`` (cached). Forwards the single-source
|
||||
ordering contract (label_priority map + tie_break_axes) to the Step 9
|
||||
payload so the frontend (``designAgentApi.ts``) can mirror the backend
|
||||
sort without re-implementing the policy locally. u4 wires consumption.
|
||||
- ``sorted_candidate_evidence`` : explicit alias of the policy-sorted
|
||||
``selection_trace["candidates"]`` list. Identical contents to
|
||||
``candidate_evidence`` (u2 sorted the underlying ``judgments`` window
|
||||
before the selector loop appended ``trace["candidates"]``), but the
|
||||
explicit name documents the post-u2 contract for the frontend.
|
||||
"""
|
||||
unit_id = "+".join(unit.source_section_ids)
|
||||
|
||||
@@ -3945,6 +4087,14 @@ def _build_application_plan_unit(
|
||||
application_status = "ok" if has_v4 else "no_v4_candidate"
|
||||
current_default = unit.frame_template_id if has_v4 else None
|
||||
|
||||
# IMP-39 u3 (issue #68) — forward the single-source ranking policy to the
|
||||
# Step 9 per-unit payload. ``load_ranking_sort_policy()`` is module-cached
|
||||
# (``_RANKING_SORT_POLICY_CACHE``), so the per-unit call is O(1) after
|
||||
# first invocation. The full policy dict (not just ``policy_type``) is
|
||||
# forwarded so the frontend can mirror label_priority + tie_break_axes
|
||||
# without re-declaring the contract locally.
|
||||
ranking_sort_policy = load_ranking_sort_policy()
|
||||
|
||||
# IMP-06 blocker-fix (Codex #13 Blocker 3 / #16) — plan-aware additive
|
||||
# fields. additive = pre-IMP-06 readers (no override CLI used) see
|
||||
# position=None / assignment_source=None / section_assignment_override
|
||||
@@ -4006,6 +4156,12 @@ def _build_application_plan_unit(
|
||||
"replaced_auto_unit": plan_replaced_auto,
|
||||
"skipped_collided_auto_units": plan_skipped_collided,
|
||||
"skipped_reason": plan_skipped_reason,
|
||||
# IMP-39 u3 (issue #68) — single-source ranking policy forwarded to
|
||||
# frontend so backend selector "rank 1" and frontend
|
||||
# ``frame_candidates[0]`` share one ordering contract. Additive only;
|
||||
# pre-u3 readers ignore both keys.
|
||||
"ranking_sort_policy": ranking_sort_policy,
|
||||
"sorted_candidate_evidence": selection_trace.get("candidates", []),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user