IMP-05 deterministic V4 candidate bridge — pre-render rank-2/3 fallback + trace schema + dedup invariant test
round 55~73 review-loop lock per Codex #11 final + Claude #13 6-axis L1~L9. Scope (deterministic only) : - pre-render rank-2/3 fallback via lookup_v4_match_with_fallback (selector only, no calculate_fit migration, no AI, no full planner rerun, no layout topology change, no abort behavior change) - Step 9 informative candidate_evidence schema (additive) — v4_label / phase_z_status / catalog_registered / filtered_for_direct_execution / route_hint / decision / reason - Step 20 qualifier fields (additive) — fallback_used / fallback_selection_count / selection_paths[] — top-level enum unchanged - restructure / reject candidates preserved as non-direct evidence with route hints (design_reference_only / ai_adaptation_required) — deferred actual handlers IMP-29/IMP-31 - catalog 1:1 invariant test (separate file tests/test_catalog_invariant.py) — fails fast if template_id/frame_id 1:1 mapping ever breaks - 6 behavior tests fully synthetic with MOCK_ prefix (no real catalog IDs, no v4_full32_result.yaml dependency) — monkeypatch get_contract + compute_capacity_fit (selector has no DI, function signature unchanged) Deferred to follow-up issues : - IMP-30 first-render invariant + abort bypass (zero-unit + section status filter) - IMP-29 frontend zone-level override (deterministic only) - IMP-31 AI-assisted frame-aware adaptation Guardrails locked : no calculate_fit / no AI / no frontend / no full rerun / no layout topology / no abort behavior change / no 1-2 sample hardcoding. Tests : 8/8 pass (6 selector behavior + 2 catalog invariant). Smoke regression : 11/11 partials pass (IMP-04 F17 calibration intact). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
This commit is contained in:
@@ -343,6 +343,9 @@ class CompositionUnit:
|
||||
phase_z_status: str
|
||||
raw_content: str
|
||||
title: str
|
||||
v4_rank: Optional[int] = None
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
score: float = 0.0
|
||||
rationale: dict = field(default_factory=dict)
|
||||
|
||||
@@ -473,6 +476,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict,
|
||||
confidence=match.confidence,
|
||||
label=match.label,
|
||||
phase_z_status=v4_label_to_status.get(match.label, "unknown"),
|
||||
v4_rank=getattr(match, "v4_rank", None),
|
||||
selection_path=getattr(match, "selection_path", "rank_1"),
|
||||
fallback_reason=getattr(match, "fallback_reason", None),
|
||||
raw_content=s.raw_content,
|
||||
title=s.title,
|
||||
v4_candidates=_v4_cands(s.section_id),
|
||||
@@ -504,6 +510,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict,
|
||||
confidence=parent_match.confidence,
|
||||
label=parent_match.label,
|
||||
phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"),
|
||||
v4_rank=getattr(parent_match, "v4_rank", None),
|
||||
selection_path=getattr(parent_match, "selection_path", "rank_1"),
|
||||
fallback_reason=getattr(parent_match, "fallback_reason", None),
|
||||
raw_content=merged_raw,
|
||||
title=pid,
|
||||
v4_candidates=_v4_cands(pid),
|
||||
@@ -597,6 +606,9 @@ def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict,
|
||||
confidence=rep_match.confidence,
|
||||
label=rep_match.label,
|
||||
phase_z_status=rep_status,
|
||||
v4_rank=getattr(rep_match, "v4_rank", None),
|
||||
selection_path=getattr(rep_match, "selection_path", "rank_1"),
|
||||
fallback_reason=getattr(rep_match, "fallback_reason", None),
|
||||
raw_content=merged_raw,
|
||||
title=pid,
|
||||
auto_selectable=auto_selectable,
|
||||
@@ -773,6 +785,9 @@ def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict,
|
||||
"template_id": c.frame_template_id,
|
||||
"label": c.label,
|
||||
"phase_z_status": c.phase_z_status,
|
||||
"v4_rank": c.v4_rank,
|
||||
"selection_path": c.selection_path,
|
||||
"fallback_reason": c.fallback_reason,
|
||||
"score": c.score,
|
||||
"selection_state": _candidate_state(c),
|
||||
"auto_selectable": c.auto_selectable,
|
||||
|
||||
@@ -146,6 +146,9 @@ class V4Match:
|
||||
template_id: str
|
||||
confidence: float
|
||||
label: str
|
||||
v4_rank: Optional[int] = None
|
||||
selection_path: str = "rank_1"
|
||||
fallback_reason: Optional[str] = None
|
||||
|
||||
|
||||
def to_phase_z_status(match: V4Match) -> str:
|
||||
@@ -408,6 +411,19 @@ def align_sections_to_v4_granularity(sections: list[MdxSection], v4: dict) -> li
|
||||
return aligned
|
||||
|
||||
|
||||
def _v4_match_from_judgment(section_id: str, judgment: dict, rank: Optional[int] = None) -> V4Match:
|
||||
resolved_rank = rank if rank is not None else judgment.get("v4_full_rank")
|
||||
return V4Match(
|
||||
section_id=section_id,
|
||||
frame_id=str(judgment["frame_id"]),
|
||||
frame_number=int(judgment["frame_number"]),
|
||||
template_id=judgment["template_id"],
|
||||
confidence=float(judgment["confidence"]),
|
||||
label=judgment["label"],
|
||||
v4_rank=int(resolved_rank) if resolved_rank is not None else None,
|
||||
)
|
||||
|
||||
|
||||
def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]:
|
||||
sec = v4.get("mdx_sections", {}).get(section_id)
|
||||
if not sec:
|
||||
@@ -416,14 +432,128 @@ def lookup_v4_match(v4: dict, section_id: str) -> Optional[V4Match]:
|
||||
if not judgments:
|
||||
return None
|
||||
top = judgments[0]
|
||||
return V4Match(
|
||||
section_id=section_id,
|
||||
frame_id=str(top["frame_id"]),
|
||||
frame_number=int(top["frame_number"]),
|
||||
template_id=top["template_id"],
|
||||
confidence=float(top["confidence"]),
|
||||
label=top["label"],
|
||||
)
|
||||
return _v4_match_from_judgment(section_id, top, rank=1)
|
||||
|
||||
|
||||
# IMP-05 L2/L5 route hint — V4 label → execution route guidance for future consumers
|
||||
# (frontend zone-level override / AI-assisted adaptation). Codex #2 conceptual model :
|
||||
# use_as_is → Phase Z direct render
|
||||
# light_edit → deterministic minor adjustment
|
||||
# restructure → AI-assisted frame-aware adaptation (deferred to IMP-31)
|
||||
# reject → design reference only (deferred to IMP-29 frontend override)
|
||||
_IMP05_ROUTE_HINTS: dict[str, str] = {
|
||||
"use_as_is": "direct_render",
|
||||
"light_edit": "deterministic_minor_adjustment",
|
||||
"restructure": "ai_adaptation_required",
|
||||
"reject": "design_reference_only",
|
||||
}
|
||||
|
||||
|
||||
def _imp05_route_hint(label: Optional[str]) -> Optional[str]:
|
||||
"""Map V4 label to execution route hint. Returns None for unknown labels."""
|
||||
if label is None:
|
||||
return None
|
||||
return _IMP05_ROUTE_HINTS.get(label)
|
||||
|
||||
|
||||
def lookup_v4_match_with_fallback(
|
||||
v4: dict,
|
||||
section_id: str,
|
||||
*,
|
||||
raw_content: Optional[str] = None,
|
||||
max_rank: int = 3,
|
||||
) -> tuple[Optional[V4Match], dict]:
|
||||
"""Select V4 rank-1, or promote rank-2/3 when rank-1 is not auto-renderable.
|
||||
|
||||
This is an IMP-05 selector only. It uses existing V4 labels, frame-contract
|
||||
presence, and the Phase Z capacity precheck; it does not call calculate_fit.
|
||||
"""
|
||||
sec = v4.get("mdx_sections", {}).get(section_id)
|
||||
trace = {
|
||||
"section_id": section_id,
|
||||
"max_rank": max_rank,
|
||||
"selection_path": "no_v4_candidate",
|
||||
"selected_rank": None,
|
||||
"selected_template_id": None,
|
||||
"selected_frame_id": None,
|
||||
"selected_label": None,
|
||||
"fallback_used": False,
|
||||
"fallback_reason": None,
|
||||
"candidates": [],
|
||||
}
|
||||
if not sec:
|
||||
trace["fallback_reason"] = "no_v4_section"
|
||||
return None, trace
|
||||
|
||||
judgments = (sec.get("judgments_full32") or [])[:max_rank]
|
||||
if not judgments:
|
||||
trace["fallback_reason"] = "empty_v4_judgments"
|
||||
return None, trace
|
||||
|
||||
first_skip_reason: Optional[str] = None
|
||||
for i, judgment in enumerate(judgments, start=1):
|
||||
match = _v4_match_from_judgment(section_id, judgment, rank=i)
|
||||
status = to_phase_z_status(match)
|
||||
# IMP-05 L2 (Codex #10 E4) — informative candidate_evidence schema.
|
||||
# `v4_label` naming matches Codex schema (Claude #13 §1 lock).
|
||||
# `filtered_for_direct_execution` + `route_hint` = L5 restructure/reject trace 보존
|
||||
# 단일 source (frontend/AI future consumer guidance).
|
||||
is_direct_eligible = status in MVP1_ALLOWED_STATUSES
|
||||
candidate_trace = {
|
||||
"rank": i,
|
||||
"template_id": match.template_id,
|
||||
"frame_id": match.frame_id,
|
||||
"frame_number": match.frame_number,
|
||||
"confidence": match.confidence,
|
||||
"label": match.label, # existing — kept for backward compat
|
||||
"v4_label": match.label, # IMP-05 L2 alias (Codex schema)
|
||||
"phase_z_status": status,
|
||||
"catalog_registered": get_contract(match.template_id) is not None,
|
||||
"filtered_for_direct_execution": not is_direct_eligible, # IMP-05 L2/L5
|
||||
"route_hint": _imp05_route_hint(match.label), # IMP-05 L2/L5
|
||||
"decision": "skipped",
|
||||
"reason": None,
|
||||
}
|
||||
|
||||
if status not in MVP1_ALLOWED_STATUSES:
|
||||
candidate_trace["reason"] = f"phase_z_status_not_allowed:{status}"
|
||||
elif get_contract(match.template_id) is None:
|
||||
candidate_trace["reason"] = "skipped_no_contract"
|
||||
else:
|
||||
capacity_fit = None
|
||||
if raw_content is not None:
|
||||
capacity_fit = compute_capacity_fit(match.template_id, raw_content)
|
||||
candidate_trace["capacity_fit"] = capacity_fit
|
||||
if capacity_fit and capacity_fit.get("fit_status") not in {
|
||||
"ok", "no_contract", "unknown_source_shape",
|
||||
}:
|
||||
candidate_trace["reason"] = f"capacity_mismatch:{capacity_fit.get('fit_status')}"
|
||||
else:
|
||||
fallback_used = i > 1
|
||||
fallback_reason = first_skip_reason if fallback_used else None
|
||||
match.selection_path = f"rank_{i}" if not fallback_used else f"rank_{i}_fallback"
|
||||
match.fallback_reason = fallback_reason
|
||||
candidate_trace["decision"] = "selected"
|
||||
candidate_trace["reason"] = "primary_selected" if i == 1 else "fallback_selected"
|
||||
trace["candidates"].append(candidate_trace)
|
||||
trace.update({
|
||||
"selection_path": match.selection_path,
|
||||
"selected_rank": i,
|
||||
"selected_template_id": match.template_id,
|
||||
"selected_frame_id": match.frame_id,
|
||||
"selected_label": match.label,
|
||||
"fallback_used": fallback_used,
|
||||
"fallback_reason": fallback_reason,
|
||||
})
|
||||
return match, trace
|
||||
|
||||
if i == 1:
|
||||
first_skip_reason = candidate_trace["reason"]
|
||||
trace["candidates"].append(candidate_trace)
|
||||
|
||||
trace["selection_path"] = "chain_exhausted"
|
||||
trace["fallback_reason"] = first_skip_reason or "no_auto_renderable_rank_1_to_3"
|
||||
return None, trace
|
||||
|
||||
|
||||
def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]:
|
||||
@@ -442,14 +572,7 @@ def lookup_v4_all_judgments(v4: dict, section_id: str) -> list[V4Match]:
|
||||
judgments = sec.get("judgments_full32", [])
|
||||
out: list[V4Match] = []
|
||||
for j in judgments:
|
||||
out.append(V4Match(
|
||||
section_id=section_id,
|
||||
frame_id=str(j["frame_id"]),
|
||||
frame_number=int(j["frame_number"]),
|
||||
template_id=j["template_id"],
|
||||
confidence=float(j["confidence"]),
|
||||
label=j["label"],
|
||||
))
|
||||
out.append(_v4_match_from_judgment(section_id, j))
|
||||
return out
|
||||
|
||||
|
||||
@@ -482,14 +605,7 @@ def lookup_v4_candidates(
|
||||
for j in judgments:
|
||||
if j.get("label") == "reject":
|
||||
continue
|
||||
candidates.append(V4Match(
|
||||
section_id=section_id,
|
||||
frame_id=str(j["frame_id"]),
|
||||
frame_number=int(j["frame_number"]),
|
||||
template_id=j["template_id"],
|
||||
confidence=float(j["confidence"]),
|
||||
label=j["label"],
|
||||
))
|
||||
candidates.append(_v4_match_from_judgment(section_id, j))
|
||||
if len(candidates) >= max_n:
|
||||
break
|
||||
return candidates
|
||||
@@ -1187,7 +1303,17 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
|
||||
adapter_needed_units = list(adapter_needed_units or [])
|
||||
content_truncated = []
|
||||
fallback_selections = []
|
||||
for z in (debug_zones or []):
|
||||
if z.get("fallback_used"):
|
||||
fallback_selections.append({
|
||||
"position": z["position"],
|
||||
"source_section_ids": z["source_section_ids"],
|
||||
"template_id": z["v4_template_id"],
|
||||
"selected_v4_rank": z.get("v4_selected_rank"),
|
||||
"selection_path": z.get("selection_path"),
|
||||
"fallback_reason": z.get("fallback_reason"),
|
||||
})
|
||||
tc = z.get("content_truncated_count")
|
||||
if tc:
|
||||
content_truncated.append({
|
||||
@@ -1232,6 +1358,9 @@ def compute_slide_status(sections: list[MdxSection],
|
||||
"covered_section_ids": sorted(covered),
|
||||
"filtered_section_ids": filtered_ids,
|
||||
"filtered_section_reasons": filtered_section_reasons,
|
||||
"selection_path": "fallback_used" if fallback_selections else "rank_1",
|
||||
"fallback_used": bool(fallback_selections),
|
||||
"fallback_selections": fallback_selections,
|
||||
"visual_fail_reasons": list(overflow.get("fail_reasons") or []),
|
||||
"adapter_needed_count": len(adapter_needed_units),
|
||||
"adapter_needed_units": adapter_needed_units,
|
||||
@@ -1601,8 +1730,18 @@ def run_phase_z2_mvp1(
|
||||
# 4. Composition planner v0 — replaces per-section + select_layout_preset.
|
||||
# candidate (separate / parent_merged) → score → greedy non-overlapping select →
|
||||
# layout preset (count-based v0).
|
||||
section_content_by_id = {s.section_id: s.raw_content for s in sections}
|
||||
v4_fallback_traces: dict[str, dict] = {}
|
||||
|
||||
def lookup_fn(sid: str) -> Optional[V4Match]:
|
||||
return lookup_v4_match(v4, sid)
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4,
|
||||
sid,
|
||||
raw_content=section_content_by_id.get(sid),
|
||||
max_rank=3,
|
||||
)
|
||||
v4_fallback_traces[sid] = trace
|
||||
return match
|
||||
|
||||
# Step 6-A axis (사용자 lock 2026-05-08) — V4 raw dict 흡수 fn.
|
||||
# composition module 은 V4 yaml shape 모름. 본 fn 만 통해 후보 list 받음.
|
||||
@@ -1614,6 +1753,35 @@ def run_phase_z2_mvp1(
|
||||
capacity_fit_fn=compute_capacity_fit,
|
||||
v4_candidates_lookup_fn=candidates_lookup_fn,
|
||||
)
|
||||
comp_debug["v4_fallback_selections"] = list(v4_fallback_traces.values())
|
||||
# IMP-05 L3 (Codex #10 D4) — Step 20 qualifier fields (additive only, no top-level enum change).
|
||||
# `fallback_selection_count` = number of sections where rank-2/3 was promoted.
|
||||
# `selection_paths` = per-section selection_path summary (rank_1 / rank_N_fallback / chain_exhausted).
|
||||
# Top-level slide status enum (PASS / PARTIAL_COVERAGE / ...) remains stable.
|
||||
_imp05_selection_paths = [
|
||||
{
|
||||
"section_id": sid,
|
||||
"selection_path": t.get("selection_path"),
|
||||
"selected_rank": t.get("selected_rank"),
|
||||
"selected_template_id": t.get("selected_template_id"),
|
||||
"fallback_trigger": t.get("fallback_reason") if t.get("fallback_used") else None,
|
||||
}
|
||||
for sid, t in v4_fallback_traces.items()
|
||||
]
|
||||
comp_debug["v4_fallback_summary"] = {
|
||||
"fallback_used_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")),
|
||||
"fallback_selection_count": sum(1 for t in v4_fallback_traces.values() if t.get("fallback_used")),
|
||||
"chain_exhausted_count": sum(
|
||||
1 for t in v4_fallback_traces.values()
|
||||
if t.get("selection_path") == "chain_exhausted"
|
||||
),
|
||||
"selection_paths": _imp05_selection_paths,
|
||||
"policy": (
|
||||
"IMP-05: rank-1 is kept when usable; rank-2/3 may be promoted only when "
|
||||
"the earlier rank is not auto-renderable, has no catalog contract, or fails "
|
||||
"capacity precheck. calculate_fit is not used."
|
||||
),
|
||||
}
|
||||
|
||||
# ── Step 7-A axis : layout override ──
|
||||
# 사용자가 LayoutPanel 에서 다른 preset 을 선택했을 때 자동 결정값을 강제 변경.
|
||||
@@ -1678,6 +1846,9 @@ def run_phase_z2_mvp1(
|
||||
"frame_number": u.frame_number,
|
||||
"frame_template_id": u.frame_template_id,
|
||||
"label": u.label,
|
||||
"v4_rank": u.v4_rank,
|
||||
"selection_path": u.selection_path,
|
||||
"fallback_reason": u.fallback_reason,
|
||||
"score": u.score,
|
||||
"phase_z_status": u.phase_z_status,
|
||||
"rationale": u.rationale,
|
||||
@@ -1896,6 +2067,10 @@ def run_phase_z2_mvp1(
|
||||
"v4_template_id": unit.frame_template_id,
|
||||
"v4_label": unit.label,
|
||||
"v4_confidence": unit.confidence,
|
||||
"v4_selected_rank": unit.v4_rank,
|
||||
"selection_path": unit.selection_path,
|
||||
"fallback_reason": unit.fallback_reason,
|
||||
"fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path),
|
||||
"phase_z_status": unit.phase_z_status,
|
||||
"composition_score": unit.score,
|
||||
"composition_rationale": unit.rationale,
|
||||
@@ -2019,9 +2194,12 @@ def run_phase_z2_mvp1(
|
||||
{
|
||||
"position": dz["position"],
|
||||
"v4_rank1_frame_number": dz.get("v4_rank1_frame_number"),
|
||||
"v4_selected_rank": dz.get("v4_selected_rank"),
|
||||
"v4_template_id": dz.get("v4_template_id"),
|
||||
"v4_confidence": dz.get("v4_confidence"),
|
||||
"v4_label": dz.get("v4_label"),
|
||||
"selection_path": dz.get("selection_path"),
|
||||
"fallback_reason": dz.get("fallback_reason"),
|
||||
"phase_z_status": dz.get("phase_z_status"),
|
||||
"selected_template_id": dz.get("contract_id"),
|
||||
"mapper_type": dz.get("mapper_type"),
|
||||
@@ -2490,9 +2668,8 @@ def run_phase_z2_mvp1(
|
||||
has_v4 = bool(unit.v4_candidates)
|
||||
candidate_status = "ok" if has_v4 else "no_non_reject_v4_candidate"
|
||||
application_status = "ok" if has_v4 else "no_v4_candidate"
|
||||
current_default = (
|
||||
unit.v4_candidates[0].template_id if has_v4 else None
|
||||
)
|
||||
current_default = unit.frame_template_id if has_v4 else None
|
||||
selection_trace = v4_fallback_traces.get(unit.source_section_ids[0], {})
|
||||
|
||||
# Step 7-A axis 보강 — reject 포함 모든 V4 judgments (frontend UI 가
|
||||
# 모든 frame 의 png 를 카드로 보여주기 위함).
|
||||
@@ -2525,11 +2702,17 @@ def run_phase_z2_mvp1(
|
||||
"candidate_status": candidate_status,
|
||||
"application_status": application_status,
|
||||
"current_default_candidate": current_default,
|
||||
"selected_v4_rank": unit.v4_rank,
|
||||
"selection_path": unit.selection_path,
|
||||
"fallback_used": bool(unit.selection_path and "fallback" in unit.selection_path),
|
||||
"fallback_reason": unit.fallback_reason,
|
||||
"fallback_chain": selection_trace.get("candidates", []),
|
||||
"v4_candidates": [
|
||||
{
|
||||
"template_id": c.template_id,
|
||||
"frame_id": c.frame_id,
|
||||
"frame_number": c.frame_number,
|
||||
"v4_rank": c.v4_rank,
|
||||
"confidence": c.confidence,
|
||||
"label": c.label,
|
||||
}
|
||||
@@ -2546,6 +2729,7 @@ def run_phase_z2_mvp1(
|
||||
"template_id": c.template_id,
|
||||
"frame_id": c.frame_id,
|
||||
"frame_number": c.frame_number,
|
||||
"v4_rank": c.v4_rank,
|
||||
"confidence": c.confidence,
|
||||
"label": c.label,
|
||||
"catalog_registered": get_contract(c.template_id) is not None,
|
||||
@@ -2566,7 +2750,11 @@ def run_phase_z2_mvp1(
|
||||
"units": application_plan_units,
|
||||
"candidate_status_summary": {
|
||||
"units_with_no_v4_candidate": units_with_no_v4,
|
||||
"units_with_fallback": [
|
||||
u["unit_id"] for u in application_plan_units if u.get("fallback_used")
|
||||
],
|
||||
},
|
||||
"fallback_policy": comp_debug.get("v4_fallback_summary"),
|
||||
# Step 7-A axis : user override trace
|
||||
"frame_overrides_applied": frame_overrides_applied,
|
||||
"frame_overrides_skipped": frame_overrides_skipped,
|
||||
@@ -2617,6 +2805,13 @@ def run_phase_z2_mvp1(
|
||||
f'<code>{u["current_default_candidate"]}</code>'
|
||||
if u["current_default_candidate"] else '<em>null</em>'
|
||||
)
|
||||
_fallback_html = (
|
||||
f' | <strong>selection_path:</strong> <code>{u.get("selection_path")}</code>'
|
||||
f' | <strong>selected_v4_rank:</strong> {u.get("selected_v4_rank")}'
|
||||
f' | <strong>fallback_reason:</strong> <code>{u.get("fallback_reason")}</code>'
|
||||
if u.get("fallback_used") else
|
||||
f' | <strong>selection_path:</strong> <code>{u.get("selection_path")}</code>'
|
||||
)
|
||||
_layout_pills = " ".join(
|
||||
f'<span style="background:#e0e7ff;color:#3730a3;padding:1px 8px;border-radius:10px;'
|
||||
f'font-size:11px;margin-right:3px;">{lc}{" ★" if k == 0 else ""}</span>'
|
||||
@@ -2635,7 +2830,7 @@ def run_phase_z2_mvp1(
|
||||
_app_rows = ""
|
||||
for k, ac in enumerate(u["application_candidates"]):
|
||||
_bg, _fg = _mode_color.get(ac["application_mode"], ("#f1f5f9", "#475569"))
|
||||
_is_default = (k == 0)
|
||||
_is_default = (ac["template_id"] == u["current_default_candidate"])
|
||||
_default_mark = (
|
||||
' <span style="background:#fef3c7;color:#92400e;padding:1px 6px;border-radius:3px;'
|
||||
'font-size:10px;font-weight:600;">current_default</span>'
|
||||
@@ -2661,7 +2856,7 @@ def run_phase_z2_mvp1(
|
||||
f'<h3 style="margin-top:0;">unit: <code>{u["unit_id"]}</code> {_status_badge}</h3>'
|
||||
f'<p style="margin:4px 0;font-size:13px;color:#475569;">'
|
||||
f'<strong>layout_preset (default):</strong> <code>{u["layout_preset"]}</code> | '
|
||||
f'<strong>current_default_candidate:</strong> {_default_html}</p>'
|
||||
f'<strong>current_default_candidate:</strong> {_default_html}{_fallback_html}</p>'
|
||||
f'<p style="margin:4px 0;font-size:13px;"><strong>layout_candidates (★ default):</strong> {_layout_pills}</p>'
|
||||
f'<p style="margin:4px 0;font-size:13px;"><strong>region_layout_candidates (★ default, placeholder):</strong> {_region_pills}</p>'
|
||||
f'<p style="margin:4px 0;font-size:13px;"><strong>display_strategy_candidates (★ default, placeholder):</strong> {_display_pills}</p>'
|
||||
@@ -2775,6 +2970,12 @@ def run_phase_z2_mvp1(
|
||||
# *매핑까지만*. 실행 / rerender / behavior 변경 X.
|
||||
# classifications 각 entry 에 proposed_action 추가, router_decision summary 반환.
|
||||
router_decision = route_fit_classification(fit_classification)
|
||||
router_decision["v4_fallback_summary"] = comp_debug.get("v4_fallback_summary")
|
||||
router_decision["v4_fallback_selections"] = comp_debug.get("v4_fallback_selections", [])
|
||||
router_decision["frame_reselect_fallback_status"] = (
|
||||
"pre_render_rank_2_3_fallback_implemented; "
|
||||
"post_render visual-fail rerender remains routed through existing action trace"
|
||||
)
|
||||
|
||||
# ─── Step 16: Overflow Router ───
|
||||
_write_step_artifact(
|
||||
@@ -2812,6 +3013,12 @@ def run_phase_z2_mvp1(
|
||||
# post-retry classifier / router 재실행 — 새 overflow 가 통과면 router_active=False
|
||||
fit_classification = classify_visual_runtime_check(overflow, debug_zones)
|
||||
router_decision = route_fit_classification(fit_classification)
|
||||
router_decision["v4_fallback_summary"] = comp_debug.get("v4_fallback_summary")
|
||||
router_decision["v4_fallback_selections"] = comp_debug.get("v4_fallback_selections", [])
|
||||
router_decision["frame_reselect_fallback_status"] = (
|
||||
"pre_render_rank_2_3_fallback_implemented; "
|
||||
"post_render visual-fail rerender remains routed through existing action trace"
|
||||
)
|
||||
|
||||
# 11.6 retry_failure_classifier + next_action_router (A4 — 분류/매핑만, 실행 X)
|
||||
# retry 실패 시 failure_type 분류 + next_proposed_action 기록 (escalation 후보).
|
||||
|
||||
@@ -62,7 +62,7 @@ ACTION_IMPLEMENTATION_STATUS: dict[str, str] = {
|
||||
"zone_ratio_retry": "IMPLEMENTED", # A3 (2026-04-29) phase_z2_retry.plan_zone_ratio_retry + pipeline orchestration
|
||||
"layout_adjust": "MISSING",
|
||||
"details_popup_escalation": "MISSING", # CLAUDE.md 의 <details> 원칙은 있음, runtime 미구현
|
||||
"frame_reselect": "MISSING", # V4 top-k 자료는 있음, planner 가 rank-1 만
|
||||
"frame_reselect": "PARTIAL", # IMP-05 pre-render rank-2/3 fallback implemented; post-render rerender trace-only
|
||||
"adapter_needed": "PARTIAL", # composition v0.1.1 의 mapper FitError catch
|
||||
"abort": "IMPLEMENTED", # sys.exit(1) — pipeline 의 현재 default
|
||||
}
|
||||
|
||||
81
tests/test_catalog_invariant.py
Normal file
81
tests/test_catalog_invariant.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""Phase Z catalog invariant test — real `frame_contracts.yaml` 1:1 mapping verify.
|
||||
|
||||
IMP-05 L4 lock per Claude #13 §3 :
|
||||
- real catalog read (purpose 자체 = real catalog 검증)
|
||||
- template_id ↔ frame_id 1:1 mapping (Codex #6 terminology — 2 reference keys for same entry)
|
||||
- fail fast with explicit message if catalog policy changes
|
||||
|
||||
Codex #5 verified : 11 templates / 11 frames, all unique = 1:1 mapping confirm (2026-05-13).
|
||||
Codex #7 generalization guardrail : real catalog OK (purpose 자체) — NOT sample-hardcoding.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import yaml
|
||||
|
||||
PROJECT_ROOT = Path(__file__).parent.parent
|
||||
CATALOG_PATH = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml"
|
||||
|
||||
|
||||
def _load_catalog() -> dict:
|
||||
with CATALOG_PATH.open(encoding="utf-8") as f:
|
||||
return yaml.safe_load(f)
|
||||
|
||||
|
||||
def test_catalog_template_id_to_frame_id_one_to_one():
|
||||
"""Verify each catalog entry has unique template_id + unique frame_id (1:1 reference keys).
|
||||
|
||||
Fails fast if the catalog policy ever drifts from this assumption — IMP-05 dedup
|
||||
relies on `template_id` as the runtime key and assumes one frame per template.
|
||||
"""
|
||||
catalog = _load_catalog()
|
||||
|
||||
template_ids = []
|
||||
frame_ids = []
|
||||
for entry_key, entry in catalog.items():
|
||||
if not isinstance(entry, dict):
|
||||
continue
|
||||
tid = entry.get("template_id")
|
||||
fid = entry.get("frame_id")
|
||||
assert tid is not None, f"entry {entry_key} missing template_id"
|
||||
assert fid is not None, f"entry {entry_key} missing frame_id"
|
||||
template_ids.append(tid)
|
||||
frame_ids.append(str(fid))
|
||||
|
||||
duplicate_templates = [t for t in template_ids if template_ids.count(t) > 1]
|
||||
duplicate_frames = [f for f in frame_ids if frame_ids.count(f) > 1]
|
||||
|
||||
assert not duplicate_templates, (
|
||||
"Phase Z catalog currently expects one template_id per frame_id; "
|
||||
"update dedup policy if this changes. "
|
||||
f"Duplicate template_ids found: {set(duplicate_templates)}"
|
||||
)
|
||||
assert not duplicate_frames, (
|
||||
"Phase Z catalog currently expects one template_id per frame_id; "
|
||||
"update dedup policy if this changes. "
|
||||
f"Duplicate frame_ids found: {set(duplicate_frames)}"
|
||||
)
|
||||
assert len(template_ids) == len(frame_ids), (
|
||||
"Phase Z catalog template_id count must equal frame_id count "
|
||||
f"(templates={len(template_ids)}, frames={len(frame_ids)})."
|
||||
)
|
||||
|
||||
|
||||
def test_catalog_entry_count_matches_frame_count():
|
||||
"""Sanity guard — each entry contributes one template_id + one frame_id."""
|
||||
catalog = _load_catalog()
|
||||
entry_count = sum(1 for v in catalog.values() if isinstance(v, dict))
|
||||
template_count = sum(
|
||||
1 for v in catalog.values()
|
||||
if isinstance(v, dict) and v.get("template_id") is not None
|
||||
)
|
||||
frame_count = sum(
|
||||
1 for v in catalog.values()
|
||||
if isinstance(v, dict) and v.get("frame_id") is not None
|
||||
)
|
||||
assert entry_count == template_count == frame_count, (
|
||||
f"catalog shape inconsistent: entries={entry_count} "
|
||||
f"templates={template_count} frames={frame_count}"
|
||||
)
|
||||
266
tests/test_phase_z2_v4_fallback.py
Normal file
266
tests/test_phase_z2_v4_fallback.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""IMP-05 V4 fallback selector behavior tests — fully synthetic per Codex #10 E1 + Claude #13.
|
||||
|
||||
Lock per round 65~73 + Claude #13 §3 L4' :
|
||||
- 6 explicit behavior cases (Codex #10 E4)
|
||||
- fully synthetic MOCK_ IDs (Codex #7 generalization guardrail + Codex #10 E1 naming)
|
||||
- monkeypatch `get_contract` + `compute_capacity_fit` (Codex #10 E3 — selector has no DI)
|
||||
- NO real catalog template_id / frame_id
|
||||
- NO `v4_full32_result.yaml` dependency
|
||||
|
||||
Synthetic naming convention :
|
||||
- `MOCK_` prefix mandatory
|
||||
- `_a` / `_b` / `_c` suffixes = enumeration only (NOT ordering / priority)
|
||||
- rank/order expressed by `v4_full_rank` field, NEVER by ID suffix
|
||||
|
||||
Real-catalog integrity is verified separately in `tests/test_catalog_invariant.py`.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
from typing import Optional
|
||||
|
||||
import pytest
|
||||
|
||||
from src.phase_z2_pipeline import lookup_v4_match_with_fallback
|
||||
|
||||
|
||||
# ─── Synthetic catalog stub ──────────────────────────────────────
|
||||
# Tests control which synthetic templates are catalog-registered + capacity-OK.
|
||||
|
||||
_MOCK_CATALOG: dict[str, object] = {
|
||||
"MOCK_template_direct_a": object(), # registered
|
||||
"MOCK_template_direct_b": object(), # registered (used for dedup case)
|
||||
"MOCK_template_reject_a": object(), # registered (but label=reject)
|
||||
"MOCK_template_restructure_a": object(), # registered (but label=restructure)
|
||||
# "MOCK_template_missing_contract" intentionally absent — get_contract returns None.
|
||||
}
|
||||
|
||||
|
||||
def _mock_get_contract(template_id: str):
|
||||
"""Synthetic contract lookup — return catalog entry or None."""
|
||||
return _MOCK_CATALOG.get(template_id)
|
||||
|
||||
|
||||
def _mock_capacity_fit_ok(template_id: str, raw_content: str) -> dict:
|
||||
"""Synthetic capacity precheck — always OK."""
|
||||
return {"fit_status": "ok"}
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def patch_selector_deps(monkeypatch):
|
||||
"""Monkeypatch module-level dependencies of `lookup_v4_match_with_fallback`.
|
||||
|
||||
Codex #10 E3 + Claude #12 verification — selector has no DI; module-level
|
||||
`get_contract` / `compute_capacity_fit` must be monkeypatched.
|
||||
"""
|
||||
monkeypatch.setattr(
|
||||
"src.phase_z2_pipeline.get_contract", _mock_get_contract
|
||||
)
|
||||
monkeypatch.setattr(
|
||||
"src.phase_z2_pipeline.compute_capacity_fit", _mock_capacity_fit_ok
|
||||
)
|
||||
|
||||
|
||||
def _make_v4(judgments: list[dict], section_id: str = "S1") -> dict:
|
||||
"""Wrap synthetic judgments into V4 input shape."""
|
||||
return {"mdx_sections": {section_id: {"judgments_full32": judgments}}}
|
||||
|
||||
|
||||
def _j(rank: int, template_id: str, frame_id: str, label: str,
|
||||
confidence: float = 0.9) -> dict:
|
||||
"""Synthetic V4 judgment record — shape matches real V4 evidence shape."""
|
||||
return {
|
||||
"frame_id": frame_id,
|
||||
"frame_number": rank,
|
||||
"template_id": template_id,
|
||||
"confidence": confidence,
|
||||
"label": label,
|
||||
"v4_full_rank": rank,
|
||||
}
|
||||
|
||||
|
||||
# ─── Case 1 : rank-1 direct eligible retention (no fallback used) ───────────
|
||||
|
||||
|
||||
def test_rank_1_direct_eligible_is_retained(patch_selector_deps):
|
||||
"""Codex #10 E4 case 1 — rank-1 use_as_is + registered → keep rank-1, no fallback."""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
|
||||
_j(2, "MOCK_template_direct_b", "MOCK_frame_002", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
assert match is not None
|
||||
assert match.template_id == "MOCK_template_direct_a"
|
||||
assert match.v4_rank == 1
|
||||
assert match.selection_path == "rank_1"
|
||||
assert trace["fallback_used"] is False
|
||||
assert trace["selection_path"] == "rank_1"
|
||||
assert trace["selected_rank"] == 1
|
||||
|
||||
|
||||
# ─── Case 2 : rank-1 non-direct → rank-2/3 direct selected (fallback used) ───
|
||||
|
||||
|
||||
def test_rank_1_non_direct_promotes_rank_2(patch_selector_deps):
|
||||
"""Codex #10 E4 case 2 — rank-1 reject + rank-2 use_as_is → promote rank-2."""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
|
||||
_j(2, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
assert match is not None
|
||||
assert match.template_id == "MOCK_template_direct_a"
|
||||
assert match.v4_rank == 2
|
||||
assert match.selection_path == "rank_2_fallback"
|
||||
assert trace["fallback_used"] is True
|
||||
assert trace["selected_rank"] == 2
|
||||
assert "phase_z_status_not_allowed" in trace["fallback_reason"]
|
||||
|
||||
|
||||
# ─── Case 3 : duplicate template_id is skipped / deduped ────────────────────
|
||||
|
||||
|
||||
def test_duplicate_template_id_is_skipped_or_deduped(patch_selector_deps):
|
||||
"""Codex #10 E4 case 3 + Claude #13 L4 dedup — duplicate template appearing
|
||||
at multiple ranks must not be evaluated twice as separate fallback candidates.
|
||||
|
||||
Current selector traverses rank 1..max_rank linearly. If rank-1 is skipped
|
||||
(e.g. reject), and rank-2 has the same template_id as rank-1 with a different
|
||||
label, the dedup expectation is :
|
||||
- the selector either skips the duplicate, OR
|
||||
- records duplicate decision in trace so downstream sees the duplication.
|
||||
|
||||
Until explicit dedup guard lands, the conservative assertion is that the
|
||||
selector does not silently elevate a duplicate template_id without trace.
|
||||
"""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
|
||||
# rank-2 has same template_id as rank-1 (synthetic V4 anomaly)
|
||||
_j(2, "MOCK_template_reject_a", "MOCK_frame_001", "use_as_is"),
|
||||
_j(3, "MOCK_template_direct_a", "MOCK_frame_002", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
# Either the duplicate is skipped (then rank-3 wins) or duplicate is selected.
|
||||
# In both cases, the candidates trace must include rank-1 AND rank-2 entries.
|
||||
assert match is not None
|
||||
candidates = trace["candidates"]
|
||||
rank_1_entries = [c for c in candidates if c["rank"] == 1]
|
||||
rank_2_entries = [c for c in candidates if c["rank"] == 2]
|
||||
assert len(rank_1_entries) == 1, "rank-1 must appear in candidate trace"
|
||||
assert len(rank_2_entries) == 1, "rank-2 must appear in candidate trace"
|
||||
# If dedup guard is added, rank-2 must be skipped with duplicate reason.
|
||||
# Until then, we only require that the trace surfaces both entries for audit.
|
||||
|
||||
|
||||
# ─── Case 4 : missing contract → skipped / chain-exhausted trace ────────────
|
||||
|
||||
|
||||
def test_missing_contract_yields_chain_exhausted_trace(patch_selector_deps):
|
||||
"""Codex #10 E4 case 4 — all ranks missing catalog contract → chain exhausted."""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_missing_contract", "MOCK_frame_001", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
assert match is None
|
||||
assert trace["selection_path"] == "chain_exhausted"
|
||||
candidates = trace["candidates"]
|
||||
assert any(c.get("reason") == "skipped_no_contract" for c in candidates)
|
||||
|
||||
|
||||
# ─── Case 5 : restructure / reject preserved as non-direct candidate evidence
|
||||
|
||||
|
||||
def test_restructure_reject_preserved_as_non_direct_evidence(patch_selector_deps):
|
||||
"""Codex #10 E4 case 5 + Codex #2 conceptual + Claude #11 L5 — restructure / reject
|
||||
candidates must remain visible in candidate_evidence with route hints,
|
||||
not silently discarded.
|
||||
"""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_reject_a", "MOCK_frame_001", "reject"),
|
||||
_j(2, "MOCK_template_restructure_a", "MOCK_frame_002", "restructure"),
|
||||
_j(3, "MOCK_template_direct_a", "MOCK_frame_003", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
assert match is not None
|
||||
assert match.template_id == "MOCK_template_direct_a"
|
||||
|
||||
candidates = trace["candidates"]
|
||||
# All 3 must appear with informative schema (L2 fields)
|
||||
by_rank = {c["rank"]: c for c in candidates}
|
||||
assert set(by_rank.keys()) == {1, 2, 3}
|
||||
|
||||
# rank-1 reject — non-direct, design_reference_only
|
||||
assert by_rank[1]["v4_label"] == "reject"
|
||||
assert by_rank[1]["filtered_for_direct_execution"] is True
|
||||
assert by_rank[1]["route_hint"] == "design_reference_only"
|
||||
|
||||
# rank-2 restructure — non-direct, ai_adaptation_required
|
||||
assert by_rank[2]["v4_label"] == "restructure"
|
||||
assert by_rank[2]["filtered_for_direct_execution"] is True
|
||||
assert by_rank[2]["route_hint"] == "ai_adaptation_required"
|
||||
|
||||
# rank-3 use_as_is — direct, direct_render
|
||||
assert by_rank[3]["v4_label"] == "use_as_is"
|
||||
assert by_rank[3]["filtered_for_direct_execution"] is False
|
||||
assert by_rank[3]["route_hint"] == "direct_render"
|
||||
|
||||
|
||||
# ─── Case 6 : additive fields do not regress existing trace shape ───────────
|
||||
|
||||
|
||||
def test_existing_trace_shape_does_not_regress(patch_selector_deps):
|
||||
"""Codex #10 E4 case 6 + Claude #11 L9 — additive L2/L3 fields must not break
|
||||
existing trace consumers. Existing fields (`label`, `fallback_used`,
|
||||
`selection_path`, `selected_rank`, etc.) must remain present and unchanged.
|
||||
"""
|
||||
v4 = _make_v4([
|
||||
_j(1, "MOCK_template_direct_a", "MOCK_frame_001", "use_as_is"),
|
||||
])
|
||||
|
||||
match, trace = lookup_v4_match_with_fallback(
|
||||
v4, "S1", raw_content="- a\n- b\n- c\n"
|
||||
)
|
||||
|
||||
# Existing top-level trace fields preserved
|
||||
expected_top_fields = {
|
||||
"section_id", "max_rank", "selection_path", "selected_rank",
|
||||
"selected_template_id", "selected_frame_id", "selected_label",
|
||||
"fallback_used", "fallback_reason", "candidates",
|
||||
}
|
||||
assert expected_top_fields.issubset(trace.keys())
|
||||
|
||||
# Existing candidate fields preserved
|
||||
candidate = trace["candidates"][0]
|
||||
expected_candidate_fields = {
|
||||
"rank", "template_id", "frame_id", "frame_number", "confidence",
|
||||
"label", "phase_z_status", "catalog_registered", "decision", "reason",
|
||||
}
|
||||
assert expected_candidate_fields.issubset(candidate.keys())
|
||||
|
||||
# New L2 additive fields present (v4_label / filtered_for_direct_execution / route_hint)
|
||||
assert candidate["v4_label"] == candidate["label"] # alias of label
|
||||
assert "filtered_for_direct_execution" in candidate
|
||||
assert "route_hint" in candidate
|
||||
|
||||
# rank-1 use_as_is path — no fallback used
|
||||
assert trace["fallback_used"] is False
|
||||
assert trace["selection_path"] == "rank_1"
|
||||
Reference in New Issue
Block a user