feat(#68): IMP-39 u1~u8 ranking_sort_policy single-source + backend↔frontend label-priority mirror
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 23s
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 23s
u1: templates/phase_z2/catalog/ranking_sort_policy.yaml — single-source policy
(label_priority asc {use_as_is:0, light_edit:1, restructure:2, reject:3}
+ confidence desc + v4_rank asc tie-break).
u2: src/phase_z2_pipeline.py — apply_ranking_sort helper + lookup_v4_match_with_fallback
applies policy AFTER IMP-38 raw-window selection (raw default_window + usable_count
preserved on RAW all_judgments).
u3: src/phase_z2_pipeline.py — _build_application_plan_unit forwards ranking_sort_policy
+ sorted_candidate_evidence into Step 9 payload.
u4: Front/client/src/services/designAgentApi.ts — frame_candidates builder reads
unit.sorted_candidate_evidence + unit.ranking_sort_policy first; local LABEL_PRIORITY
retained only on warn-fallback path.
u5: tests/test_ranking_sort_policy.py — pure permutation coverage (sample-agnostic).
u6: tests/phase_z2/test_label_priority_synthetic.py + fixtures/ranking_sort_policy/
synthetic_divergence.yaml — low-conf use_as_is behind high-conf restructure.
u7: tests/phase_z2/test_imp39_mdx04_env_toggle_e2e.py — samples/mdx_batch/04.mdx with
AI_FALLBACK_ENABLED=off; backend selected_v4_rank == frontend frame_candidates[0].
u8: tests/phase_z2/test_imp39_corpus_audit.py — real corpus sweep over
tests/matching/v4_full32_result.yaml (10 MDX sections); section IDs loaded
dynamically (RULE 0 / RULE 7 sample-agnostic).
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -565,6 +565,13 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
|
||||
// sort 우선순위 = label (use_as_is > light_edit > restructure > reject) + confidence desc.
|
||||
// 모두 reject 인 경우 confidence desc 만 적용 (사용자 명시).
|
||||
const TOP_N_FRAMES = 6;
|
||||
// IMP-39 u4 (issue #68) — local LABEL_PRIORITY is now a documentation
|
||||
// mirror of templates/phase_z2/catalog/ranking_sort_policy.yaml (u1).
|
||||
// Primary ordering arrives pre-sorted from the backend selector
|
||||
// (src/phase_z2_pipeline.py lookup_v4_match_with_fallback :1186-1196 +
|
||||
// _build_application_plan_unit u3 payload fields). This constant is read
|
||||
// ONLY on the warn-fallback path below (legacy fixtures pre-u3 / payload
|
||||
// missing). Kept verbatim so the fallback ordering matches u1/u2 contract.
|
||||
const LABEL_PRIORITY: Record<string, number> = {
|
||||
use_as_is: 0,
|
||||
light_edit: 1,
|
||||
@@ -576,9 +583,6 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
|
||||
// 2) unit.v4_all_judgments (pre-IMP-05 audit array)
|
||||
// 3) unit.v4_candidates (legacy minimal)
|
||||
// fallback_chain alias is intentionally NOT read (Stage 2 guardrail).
|
||||
const candidateEvidence = Array.isArray(unit.candidate_evidence)
|
||||
? unit.candidate_evidence
|
||||
: [];
|
||||
const candidateMap = new Map<string, any>();
|
||||
const pushCandidate = (c: any) => {
|
||||
if (!c) return;
|
||||
@@ -586,15 +590,64 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
|
||||
if (!key) return;
|
||||
if (!candidateMap.has(key)) candidateMap.set(key, c);
|
||||
};
|
||||
candidateEvidence.forEach(pushCandidate);
|
||||
(unit.v4_all_judgments ?? []).forEach(pushCandidate);
|
||||
(unit.v4_candidates ?? []).forEach(pushCandidate);
|
||||
const rawSource = Array.from(candidateMap.values());
|
||||
const v4Source = [...rawSource].sort((a: any, b: any) => {
|
||||
const lp = (LABEL_PRIORITY[a.label] ?? 99) - (LABEL_PRIORITY[b.label] ?? 99);
|
||||
if (lp !== 0) return lp;
|
||||
return (b.confidence ?? 0) - (a.confidence ?? 0);
|
||||
});
|
||||
|
||||
// IMP-39 u4 (issue #68) — primary path: consume the backend Step 9
|
||||
// payload as the single source of ordering truth.
|
||||
// • ``unit.sorted_candidate_evidence`` = policy-sorted selector trace
|
||||
// (src/phase_z2_pipeline.py :4163, alias of selection_trace[
|
||||
// "candidates"] sorted by u2 at :1186-1196). Same IMP-05 L2 schema
|
||||
// consumed below (template_id, label, confidence, frame_number,
|
||||
// frame_id, rank, catalog_registered, capacity_fit, route_hint, ...).
|
||||
// • ``unit.ranking_sort_policy`` = full single-source policy dict
|
||||
// (policy_type / label_priority / unknown_label_priority /
|
||||
// tie_break_axes) forwarded for telemetry + fallback parity check.
|
||||
// When both are present we feed sorted_candidate_evidence through the
|
||||
// existing dedup map (first occurrence wins, mirrors backend
|
||||
// ``seen_template_ids`` semantics at :1204-1236) and SKIP the local
|
||||
// re-sort — backend "rank 1" then equals frontend frame_candidates[0]
|
||||
// by construction (Stage 1 root-cause fix).
|
||||
const sortedCandidateEvidence: any[] | null = Array.isArray(
|
||||
unit.sorted_candidate_evidence,
|
||||
)
|
||||
? unit.sorted_candidate_evidence
|
||||
: null;
|
||||
const rankingSortPolicy = unit.ranking_sort_policy ?? null;
|
||||
const backendPolicyPayloadPresent =
|
||||
sortedCandidateEvidence !== null &&
|
||||
sortedCandidateEvidence.length > 0 &&
|
||||
rankingSortPolicy !== null;
|
||||
|
||||
let v4Source: any[];
|
||||
if (backendPolicyPayloadPresent) {
|
||||
sortedCandidateEvidence!.forEach(pushCandidate);
|
||||
v4Source = Array.from(candidateMap.values());
|
||||
} else {
|
||||
// IMP-39 u4 — warn-fallback path. Legacy fixtures predating u3 (or
|
||||
// any code path that strips the payload) lack the backend-sorted
|
||||
// evidence; ordering then derives from local LABEL_PRIORITY mirror.
|
||||
// Warning surfaces drift in dev console without hard-failing the UI
|
||||
// (graceful: production sample audit deck remains renderable).
|
||||
if (typeof console !== "undefined" && typeof console.warn === "function") {
|
||||
console.warn(
|
||||
`[IMP-39 u4] unit ${unit.unit_id ?? "<unknown>"}: backend payload ` +
|
||||
"missing ranking_sort_policy / sorted_candidate_evidence — " +
|
||||
"falling back to local LABEL_PRIORITY (legacy fixture path).",
|
||||
);
|
||||
}
|
||||
const candidateEvidence = Array.isArray(unit.candidate_evidence)
|
||||
? unit.candidate_evidence
|
||||
: [];
|
||||
candidateEvidence.forEach(pushCandidate);
|
||||
(unit.v4_all_judgments ?? []).forEach(pushCandidate);
|
||||
(unit.v4_candidates ?? []).forEach(pushCandidate);
|
||||
const rawSource = Array.from(candidateMap.values());
|
||||
v4Source = [...rawSource].sort((a: any, b: any) => {
|
||||
const lp =
|
||||
(LABEL_PRIORITY[a.label] ?? 99) - (LABEL_PRIORITY[b.label] ?? 99);
|
||||
if (lp !== 0) return lp;
|
||||
return (b.confidence ?? 0) - (a.confidence ?? 0);
|
||||
});
|
||||
}
|
||||
// ─── IMP-41 u4 — application_candidates enrichment (issue #70) ───────────
|
||||
// Backend Step 9 emits `unit.application_candidates[]` (src/phase_z2_pipeline.py
|
||||
// _application_candidates_for_unit, :3071-3092) one entry per v4 candidate with
|
||||
|
||||
Reference in New Issue
Block a user