feat(#68): IMP-39 u1~u8 ranking_sort_policy single-source + backend↔frontend label-priority mirror
Some checks failed
Multi-MDX Regression (IMP-91) / multi-mdx-regression (push) Failing after 23s

u1: templates/phase_z2/catalog/ranking_sort_policy.yaml — single-source policy
    (label_priority asc {use_as_is:0, light_edit:1, restructure:2, reject:3}
    + confidence desc + v4_rank asc tie-break).
u2: src/phase_z2_pipeline.py — apply_ranking_sort helper + lookup_v4_match_with_fallback
    applies policy AFTER IMP-38 raw-window selection (raw default_window + usable_count
    preserved on RAW all_judgments).
u3: src/phase_z2_pipeline.py — _build_application_plan_unit forwards ranking_sort_policy
    + sorted_candidate_evidence into Step 9 payload.
u4: Front/client/src/services/designAgentApi.ts — frame_candidates builder reads
    unit.sorted_candidate_evidence + unit.ranking_sort_policy first; local LABEL_PRIORITY
    retained only on warn-fallback path.
u5: tests/test_ranking_sort_policy.py — pure permutation coverage (sample-agnostic).
u6: tests/phase_z2/test_label_priority_synthetic.py + fixtures/ranking_sort_policy/
    synthetic_divergence.yaml — low-conf use_as_is behind high-conf restructure.
u7: tests/phase_z2/test_imp39_mdx04_env_toggle_e2e.py — samples/mdx_batch/04.mdx with
    AI_FALLBACK_ENABLED=off; backend selected_v4_rank == frontend frame_candidates[0].
u8: tests/phase_z2/test_imp39_corpus_audit.py — real corpus sweep over
    tests/matching/v4_full32_result.yaml (10 MDX sections); section IDs loaded
    dynamically (RULE 0 / RULE 7 sample-agnostic).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
2026-05-24 17:12:07 +09:00
parent 2e3747c5ab
commit 028042aaa9
8 changed files with 1536 additions and 12 deletions

View File

@@ -565,6 +565,13 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
// sort 우선순위 = label (use_as_is > light_edit > restructure > reject) + confidence desc.
// 모두 reject 인 경우 confidence desc 만 적용 (사용자 명시).
const TOP_N_FRAMES = 6;
// IMP-39 u4 (issue #68) — local LABEL_PRIORITY is now a documentation
// mirror of templates/phase_z2/catalog/ranking_sort_policy.yaml (u1).
// Primary ordering arrives pre-sorted from the backend selector
// (src/phase_z2_pipeline.py lookup_v4_match_with_fallback :1186-1196 +
// _build_application_plan_unit u3 payload fields). This constant is read
// ONLY on the warn-fallback path below (legacy fixtures pre-u3 / payload
// missing). Kept verbatim so the fallback ordering matches u1/u2 contract.
const LABEL_PRIORITY: Record<string, number> = {
use_as_is: 0,
light_edit: 1,
@@ -576,9 +583,6 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
// 2) unit.v4_all_judgments (pre-IMP-05 audit array)
// 3) unit.v4_candidates (legacy minimal)
// fallback_chain alias is intentionally NOT read (Stage 2 guardrail).
const candidateEvidence = Array.isArray(unit.candidate_evidence)
? unit.candidate_evidence
: [];
const candidateMap = new Map<string, any>();
const pushCandidate = (c: any) => {
if (!c) return;
@@ -586,15 +590,64 @@ export async function loadRun(runId: string): Promise<LoadRunResult> {
if (!key) return;
if (!candidateMap.has(key)) candidateMap.set(key, c);
};
candidateEvidence.forEach(pushCandidate);
(unit.v4_all_judgments ?? []).forEach(pushCandidate);
(unit.v4_candidates ?? []).forEach(pushCandidate);
const rawSource = Array.from(candidateMap.values());
const v4Source = [...rawSource].sort((a: any, b: any) => {
const lp = (LABEL_PRIORITY[a.label] ?? 99) - (LABEL_PRIORITY[b.label] ?? 99);
if (lp !== 0) return lp;
return (b.confidence ?? 0) - (a.confidence ?? 0);
});
// IMP-39 u4 (issue #68) — primary path: consume the backend Step 9
// payload as the single source of ordering truth.
// • ``unit.sorted_candidate_evidence`` = policy-sorted selector trace
// (src/phase_z2_pipeline.py :4163, alias of selection_trace[
// "candidates"] sorted by u2 at :1186-1196). Same IMP-05 L2 schema
// consumed below (template_id, label, confidence, frame_number,
// frame_id, rank, catalog_registered, capacity_fit, route_hint, ...).
// • ``unit.ranking_sort_policy`` = full single-source policy dict
// (policy_type / label_priority / unknown_label_priority /
// tie_break_axes) forwarded for telemetry + fallback parity check.
// When both are present we feed sorted_candidate_evidence through the
// existing dedup map (first occurrence wins, mirrors backend
// ``seen_template_ids`` semantics at :1204-1236) and SKIP the local
// re-sort — backend "rank 1" then equals frontend frame_candidates[0]
// by construction (Stage 1 root-cause fix).
const sortedCandidateEvidence: any[] | null = Array.isArray(
unit.sorted_candidate_evidence,
)
? unit.sorted_candidate_evidence
: null;
const rankingSortPolicy = unit.ranking_sort_policy ?? null;
const backendPolicyPayloadPresent =
sortedCandidateEvidence !== null &&
sortedCandidateEvidence.length > 0 &&
rankingSortPolicy !== null;
let v4Source: any[];
if (backendPolicyPayloadPresent) {
sortedCandidateEvidence!.forEach(pushCandidate);
v4Source = Array.from(candidateMap.values());
} else {
// IMP-39 u4 — warn-fallback path. Legacy fixtures predating u3 (or
// any code path that strips the payload) lack the backend-sorted
// evidence; ordering then derives from local LABEL_PRIORITY mirror.
// Warning surfaces drift in dev console without hard-failing the UI
// (graceful: production sample audit deck remains renderable).
if (typeof console !== "undefined" && typeof console.warn === "function") {
console.warn(
`[IMP-39 u4] unit ${unit.unit_id ?? "<unknown>"}: backend payload ` +
"missing ranking_sort_policy / sorted_candidate_evidence — " +
"falling back to local LABEL_PRIORITY (legacy fixture path).",
);
}
const candidateEvidence = Array.isArray(unit.candidate_evidence)
? unit.candidate_evidence
: [];
candidateEvidence.forEach(pushCandidate);
(unit.v4_all_judgments ?? []).forEach(pushCandidate);
(unit.v4_candidates ?? []).forEach(pushCandidate);
const rawSource = Array.from(candidateMap.values());
v4Source = [...rawSource].sort((a: any, b: any) => {
const lp =
(LABEL_PRIORITY[a.label] ?? 99) - (LABEL_PRIORITY[b.label] ?? 99);
if (lp !== 0) return lp;
return (b.confidence ?? 0) - (a.confidence ?? 0);
});
}
// ─── IMP-41 u4 — application_candidates enrichment (issue #70) ───────────
// Backend Step 9 emits `unit.application_candidates[]` (src/phase_z2_pipeline.py
// _application_candidates_for_unit, :3071-3092) one entry per v4 candidate with