Files
C.E.L_Slide_test2/src/phase_z2_composition.py
kyeongmin e7848b602d Add Phase Z runtime foundation
- add visual fit classifier, router, retry, and failure routing modules
- add composition planner and catalog-driven mapper
- add Phase Z pipeline orchestration and architecture docs
2026-05-04 08:21:28 +09:00

572 lines
23 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Phase Z-2 Composition Planner v0.
Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는 결정 layer.
위치 :
parse_mdx → align_sections_to_v4_granularity → [본 모듈] → render
원칙 (절대 룰) :
- 특정 MDX / frame / section 하드코딩 X (예: "04-2 면" / "F16 이면")
- 모든 결정 = catalog 메타 + V4 evidence parametric
- 같은 코드가 MDX 02/03/04/05/06... 모두 처리 — 결과는 케이스마다 다름
- drilling 결과 = 입력 (재료), composition planner 결과 = 출력 (zone units)
- slide-level layout = zone 까지만 나눔. zone 내부 분할은 frame partial 책임
8 layout preset vocabulary :
L1 single / L2 horizontal-2 / L3 vertical-2
L4 top-1-bottom-2 / L5 top-2-bottom-1
L6 left-1-right-2 / L7 left-2-right-1
L8 grid-2x2
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Optional
# ─── 8 Layout Preset Vocabulary ────────────────────────────────
LAYOUT_PRESETS: dict[str, dict] = {
"single": {
"zones": 1,
"topology": "single",
"positions": ["primary"],
"css_areas": '"primary"',
"css_cols": "1fr",
"css_rows": "1fr",
},
"horizontal-2": {
"zones": 2,
"topology": "rows",
"positions": ["top", "bottom"],
"css_areas": '"top" "bottom"',
"css_cols": "1fr",
"css_rows": "1fr 1fr",
},
"vertical-2": {
"zones": 2,
"topology": "cols",
"positions": ["left", "right"],
"css_areas": '"left right"',
"css_cols": "1fr 1fr",
"css_rows": "1fr",
},
"top-1-bottom-2": {
"zones": 3,
"topology": "T",
"positions": ["top", "bottom-left", "bottom-right"],
"css_areas": '"top top" "bottom-left bottom-right"',
"css_cols": "1fr 1fr",
"css_rows": "1fr 1fr",
},
"top-2-bottom-1": {
"zones": 3,
"topology": "inverted-T",
"positions": ["top-left", "top-right", "bottom"],
"css_areas": '"top-left top-right" "bottom bottom"',
"css_cols": "1fr 1fr",
"css_rows": "1fr 1fr",
},
"left-1-right-2": {
"zones": 3,
"topology": "side-T-left",
"positions": ["left", "right-top", "right-bottom"],
"css_areas": '"left right-top" "left right-bottom"',
"css_cols": "1fr 1fr",
"css_rows": "1fr 1fr",
},
"left-2-right-1": {
"zones": 3,
"topology": "side-T-right",
"positions": ["left-top", "right", "left-bottom"],
"css_areas": '"left-top right" "left-bottom right"',
"css_cols": "1fr 1fr",
"css_rows": "1fr 1fr",
},
"grid-2x2": {
"zones": 4,
"topology": "2x2",
"positions": ["top-left", "top-right", "bottom-left", "bottom-right"],
"css_areas": '"top-left top-right" "bottom-left bottom-right"',
"css_cols": "1fr 1fr",
"css_rows": "1fr 1fr",
},
}
# ─── CompositionUnit ────────────────────────────────────────────
@dataclass
class CompositionUnit:
"""Slide 내 1 zone 후보 = MDX section(s) + 매칭된 frame.
source_section_ids : 1 개 = single, 2+ = merged
merge_type :
- "single" : 단일 section
- "parent_merged" : parent V4 entry 존재 (v0)
- "parent_merged_inferred" : parent V4 entry 없음, child evidence 로 추론 (v0.1)
frame_* : V4 evidence 그대로 (catalog 메타 X 하드코딩 X)
score : 종합 점수
rationale : score breakdown 추적
review_required : True 면 자동 선택 X — debug 에만 노출, 사용자/AI 검토 후
별도 path (light_edit / restructure / AI restructuring) 로 처리
review_reasons : 왜 review_required 가 True 인지 (자가검증용 — child label mix /
template_id 불일치 / cardinality 불호환 등)
"""
source_section_ids: list[str]
merge_type: str
frame_template_id: str
frame_id: str
frame_number: int
confidence: float
label: str # use_as_is / light_edit / restructure / reject
phase_z_status: str
raw_content: str
title: str
score: float = 0.0
rationale: dict = field(default_factory=dict)
# 자동 파이프라인 단계 상태 (review/UI 개념 X — 현재는 자동 결정 + 명확한 실패 기록만)
# auto_selectable=False 면 자동 선택 단계에서 제외. filter_reasons 가 그 이유.
# 예: parent_merged_inferred 의 W1/W2/W3 (rep status / all reject / majority not-auto-renderable)
# 사용자/AI 검토는 별 layer (interactive editor) 에서 처리. 본 dataclass 는 자동 결정 완결.
auto_selectable: bool = True
filter_reasons: list[str] = field(default_factory=list)
# informational signals — auto_selectable 여부와 무관. future axis 가 점수화할 영역.
# 예: "children disagree on rank-1 template_id" / "minority of children non-auto-renderable"
notes: list[str] = field(default_factory=list)
# ─── Heading Tree ──────────────────────────────────────────────
def derive_parent_id(section_id: str) -> Optional[str]:
"""section_id 에서 parent 도출 — V4 키 컨벤션 기반.
예시 (코멘트, 룰 X) :
- "04-2.1""04-2" (decimal suffix → strip)
- "04-1" → None (top-level, no parent)
- "04" → None
"""
parts = section_id.split("-", 1)
if len(parts) != 2:
return None
mdx_id, suffix = parts
if "." in suffix:
parent_suffix = suffix.split(".")[0]
return f"{mdx_id}-{parent_suffix}"
return None
def build_heading_tree(sections) -> dict:
"""Section list → tree {section_id: {section, children}}."""
tree = {s.section_id: {"section": s, "children": []} for s in sections}
for s in sections:
parent = derive_parent_id(s.section_id)
if parent and parent in tree:
tree[parent]["children"].append(s.section_id)
return tree
# ─── Candidate Generation ──────────────────────────────────────
def _apply_capacity_fit(candidate: CompositionUnit, capacity_fit_fn) -> None:
"""capacity_fit_fn 결과를 candidate 의 rationale + auto_selectable + filter_reasons 에 반영.
fit_status 가 'ok' / 'no_contract' / 'unknown_source_shape' 이면 auto_selectable 영향 X
(no_contract 는 catalog-only mapper 가 별도로 ValueError 처리).
그 외 (strict_mismatch / exceeds_max / below_min / exceeds_truncate) 는 silent loss 또는
mapper FitError 가 발생할 후보 → auto_selectable=False + filter_reasons 'C1: ...'.
"""
if capacity_fit_fn is None:
return
fit = capacity_fit_fn(candidate.frame_template_id, candidate.raw_content)
candidate.rationale["capacity_fit"] = fit
if fit["fit_status"] in {"ok", "no_contract", "unknown_source_shape"}:
return
candidate.auto_selectable = False
candidate.filter_reasons.append(
f"C1: capacity mismatch ({fit['fit_status']}) — {fit['mismatch_reason']}"
)
def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict,
auto_renderable_statuses: Optional[set[str]] = None,
capacity_fit_fn=None):
"""Generate composition candidates.
v0.1 candidate types :
1. single : per leaf section (V4 entry 필수)
2. parent_merged : parent 자체에 V4 entry 존재 (parent 가 직접 매칭됨)
3. parent_merged_inferred : parent V4 없음. child evidence 로 representative
template_id 추론
원칙 :
- 특정 section_id / template_id / frame 하드코딩 X
- 모든 결정 = derive_parent_id() + V4 evidence + v4_label_to_status mapping + 주입된 fn (파라메트릭)
Args:
sections : align 결과
v4_lookup_fn : (section_id) → V4Match | None
v4_label_to_status : V4 label → Phase Z status mapping
auto_renderable_statuses : 자동 렌더 허용 status set (W1/W3 판정 입력)
capacity_fit_fn : Optional (template_id, content) → fit dict.
제공되면 모든 candidate 에 적용 — capacity mismatch 시 auto_selectable=False
(silent truncate / mapper FitError 사전 차단).
Returns:
list[CompositionUnit]
"""
if auto_renderable_statuses is None:
auto_renderable_statuses = set()
candidates = []
# 1. Separate
for s in sections:
match = v4_lookup_fn(s.section_id)
if match is None:
continue
c = CompositionUnit(
source_section_ids=[s.section_id],
merge_type="single",
frame_template_id=match.template_id,
frame_id=match.frame_id,
frame_number=match.frame_number,
confidence=match.confidence,
label=match.label,
phase_z_status=v4_label_to_status.get(match.label, "unknown"),
raw_content=s.raw_content,
title=s.title,
)
_apply_capacity_fit(c, capacity_fit_fn)
candidates.append(c)
# parent → children 그룹화
parent_to_children: dict[str, list] = {}
for s in sections:
pid = derive_parent_id(s.section_id)
if pid:
parent_to_children.setdefault(pid, []).append(s)
# 2. parent_merged (parent 자체가 V4 에 매칭된 경우)
for pid, children in parent_to_children.items():
parent_match = v4_lookup_fn(pid)
if parent_match is None:
continue # branch 3 가 처리
if len(children) < 2:
continue # merge 의미 없음
merged_raw = "\n\n".join(c.raw_content for c in children)
c_pm = CompositionUnit(
source_section_ids=[c.section_id for c in children],
merge_type="parent_merged",
frame_template_id=parent_match.template_id,
frame_id=parent_match.frame_id,
frame_number=parent_match.frame_number,
confidence=parent_match.confidence,
label=parent_match.label,
phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"),
raw_content=merged_raw,
title=pid,
)
_apply_capacity_fit(c_pm, capacity_fit_fn)
candidates.append(c_pm)
# 3. parent_merged_inferred (v0.1) — parent V4 없음, child evidence 기반
for pid, children in parent_to_children.items():
if v4_lookup_fn(pid) is not None:
continue # branch 2 가 이미 처리
if len(children) < 2:
continue
# children 중 V4 매칭 있는 것들만 evidence 로 사용
child_matches: list[tuple] = []
for c in children:
m = v4_lookup_fn(c.section_id)
if m is not None:
child_matches.append((c, m))
if len(child_matches) < 2:
continue # 최소 2 child evidence 필요
# representative = 가장 confidence 높은 child match (v0.1.1 단순 룰)
# 향후 axes : top-k convergence, template family agreement, cardinality_fit 등
rep_child, rep_match = max(child_matches, key=lambda cm: cm[1].confidence)
# 자동 선택 가능 여부 = auto_selectable. default True (strong inferred merge).
# 다음 weak 신호 중 하나라도 있으면 auto_selectable=False (filter_reasons 에 사유) :
# W1 : representative status 가 auto-renderable 아님 → 자동 렌더 자체가 막힘
# W2 : 모든 child 가 reject → merge 의미 자체가 없음
# W3 : auto-renderable 아닌 child label 이 majority (>50%)
# informational notes (auto_selectable 영향 X, future axis 점수화 영역) :
# N1 : children 의 rank-1 template_id 가 서로 다름 → top-k / family compat
# N2 : non-auto-renderable child label 이 일부 (소수) 존재
rep_status = v4_label_to_status.get(rep_match.label, "unknown")
child_labels = [m.label for _, m in child_matches]
child_template_ids_unique = sorted({m.template_id for _, m in child_matches})
n_children = len(child_matches)
n_not_auto = sum(
1 for l in child_labels
if v4_label_to_status.get(l) not in auto_renderable_statuses
)
filter_reasons: list[str] = []
notes: list[str] = []
if rep_status not in auto_renderable_statuses:
filter_reasons.append(
f"W1: representative status '{rep_status}' (label={rep_match.label}) "
f"not in auto_renderable_statuses={sorted(auto_renderable_statuses)}."
)
if all(l == "reject" for l in child_labels):
filter_reasons.append(
"W2: all children labeled 'reject' — merge has no fit basis."
)
if n_children > 0 and n_not_auto * 2 > n_children:
non_auto_labels = sorted({
l for l in child_labels
if v4_label_to_status.get(l) not in auto_renderable_statuses
})
filter_reasons.append(
f"W3: majority of children ({n_not_auto}/{n_children}) have "
f"non-auto-renderable labels {non_auto_labels}."
)
if len(child_template_ids_unique) > 1:
notes.append(
f"N1: children's rank-1 template_id differs ({child_template_ids_unique}). "
f"representative='{rep_match.template_id}' (highest child confidence). "
f"top-k / family compatibility 평가는 future axis."
)
if 0 < n_not_auto <= n_children // 2:
non_auto_labels_minority = sorted({
l for l in child_labels
if v4_label_to_status.get(l) not in auto_renderable_statuses
})
notes.append(
f"N2: minority ({n_not_auto}/{n_children}) of children non-auto-renderable "
f"({non_auto_labels_minority}). representative is auto-renderable, merge proceeds."
)
auto_selectable = len(filter_reasons) == 0
merged_raw = "\n\n".join(c.raw_content for c, _ in child_matches)
c_inf = CompositionUnit(
source_section_ids=[c.section_id for c, _ in child_matches],
merge_type="parent_merged_inferred",
frame_template_id=rep_match.template_id,
frame_id=rep_match.frame_id,
frame_number=rep_match.frame_number,
confidence=rep_match.confidence,
label=rep_match.label,
phase_z_status=rep_status,
raw_content=merged_raw,
title=pid,
auto_selectable=auto_selectable,
filter_reasons=filter_reasons,
notes=notes,
)
_apply_capacity_fit(c_inf, capacity_fit_fn)
candidates.append(c_inf)
return candidates
# ─── Scoring ───────────────────────────────────────────────────
# v0 label weights — V4 label → score multiplier.
# 향후 axes 추가 (cardinality_fit / hierarchy_coherence / density) 시 확장.
V0_LABEL_WEIGHT = {
"use_as_is": 1.0,
"light_edit": 0.7,
"restructure": 0.4,
"reject": 0.0,
}
def score_candidate(c: CompositionUnit) -> CompositionUnit:
"""v0 scoring : confidence × label_weight.
추후 추가될 axes (rationale 에 자리만 잡아둠) :
- cardinality_fit : item_count vs frame ideal/min/max
- hierarchy_coherence : merge_type 적합도
- density_score : content 밀도 vs zone 크기
"""
label_weight = V0_LABEL_WEIGHT.get(c.label, 0.0)
frame_compat = c.confidence * label_weight
c.score = frame_compat
# 기존 rationale 보존 (예: collect_candidates 가 넣은 capacity_fit)
c.rationale.update({
"frame_compat": round(frame_compat, 4),
"confidence": c.confidence,
"label": c.label,
"label_weight": label_weight,
"merge_type": c.merge_type,
# placeholders for future axes
"hierarchy_coherence": None,
"density_score": None,
})
return c
# ─── Selection ─────────────────────────────────────────────────
def select_composition_units(candidates, allowed_statuses: set[str]) -> list[CompositionUnit]:
"""Greedy non-overlapping selection by score, with coverage tiebreak.
1. 모든 candidate 점수 매김
2. filter :
- phase_z_status ∈ allowed_statuses
- auto_selectable=True (W1/W2/W3 신호 통과)
3. 정렬 키 = (score desc, source_section_ids 수 desc)
— 동점이면 더 많은 section 을 cover 하는 후보 우선.
parent_merged_inferred 가 같은 점수의 single 후보를 *coverage 우위* 로 이김.
4. greedy : 이미 covered 된 section 을 가진 후보는 skip
5. 최종 선택 = covered set 채워나감
auto_selectable=False candidate 는 자동 선택 X. debug 의 candidates_summary 에는 남음.
UI/editor layer 에서 사용자가 별도 처리 가능 (현 v0 범위 X).
"""
scored = [score_candidate(c) for c in candidates]
viable = [
c for c in scored
if c.phase_z_status in allowed_statuses and c.auto_selectable
]
viable.sort(key=lambda c: (c.score, len(c.source_section_ids)), reverse=True)
selected = []
covered = set()
for c in viable:
if any(sid in covered for sid in c.source_section_ids):
continue
selected.append(c)
covered.update(c.source_section_ids)
return selected
# ─── Layout Preset Selection ───────────────────────────────────
def select_layout_preset(units: list[CompositionUnit]) -> Optional[str]:
"""v0 : count-based default selection.
1 unit → single
2 units → horizontal-2 (default. vertical-2 는 aspect signal 추가 시 분기)
3 units → top-1-bottom-2 (default. 다른 3-zone variant 는 content-weight signal 추가 시 분기)
4 units → grid-2x2
v0 한계 :
- aspect / content-weight 신호 미반영 → 2 units 는 항상 horizontal, 3 units 는 항상 top-1-bottom-2
- 향후 unit.raw_content 기반 weight 산정 시 정교화
"""
n = len(units)
if n == 0:
return None
if n == 1:
return "single"
if n == 2:
return "horizontal-2"
if n == 3:
return "top-1-bottom-2"
if n == 4:
return "grid-2x2"
raise ValueError(
f"Composition v0 : layout for {n} units not supported (max 4). "
"Larger counts require split-into-multiple-slides decision (future)."
)
# ─── Public entry — composition pipeline ───────────────────────
def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict,
allowed_statuses: set[str],
capacity_fit_fn=None) -> tuple[list[CompositionUnit], Optional[str], dict]:
"""Composition planner v0.2 entry.
v0.2 변경 :
- capacity_fit_fn 주입 시 모든 candidate 에 capacity 사전 검사
(silent truncate / mapper FitError 사전 차단). 불일치 시 auto_selectable=False
+ filter_reason 'C1: ...'.
v0.1 / v0.1.1 동작 (유지) :
- parent_merged_inferred candidate 생성 (parent V4 없어도)
- review 개념 X. auto_selectable + filter_reasons 만으로 자동 결정
- selection : score desc + coverage 우세 tiebreak
Returns:
units : 자동 선택된 composition units
layout_preset : 8 vocabulary 중 하나 (또는 None)
debug : 후보 전체 + capacity_fit + filter_reasons + preset 결정 근거
"""
candidates = collect_candidates(
sections, v4_lookup_fn, v4_label_to_status,
auto_renderable_statuses=allowed_statuses,
capacity_fit_fn=capacity_fit_fn,
)
scored_all = [score_candidate(c) for c in candidates]
units = select_composition_units(candidates, allowed_statuses)
preset = select_layout_preset(units)
def _candidate_state(c: CompositionUnit) -> str:
if c in units:
return "selected"
if c.phase_z_status not in allowed_statuses:
return "filtered_status" # V4 label → status not auto-renderable
if not c.auto_selectable:
# filter_reasons prefix 로 capacity 와 weak 구분
if any(r.startswith("C") for r in c.filter_reasons):
return "filtered_capacity" # C1 (capacity mismatch)
return "filtered_weak" # W1/W2/W3 (parent_merged_inferred only)
return "filtered_lost" # viable 였지만 coverage 충돌로 밀림
candidates_summary = [
{
"source_section_ids": c.source_section_ids,
"merge_type": c.merge_type,
"template_id": c.frame_template_id,
"label": c.label,
"phase_z_status": c.phase_z_status,
"score": c.score,
"selection_state": _candidate_state(c),
"auto_selectable": c.auto_selectable,
"filter_reasons": list(c.filter_reasons),
"notes": list(c.notes),
"capacity_fit": c.rationale.get("capacity_fit"),
}
for c in scored_all
]
merge_candidates = [
s for s in candidates_summary
if s["merge_type"] in {"parent_merged", "parent_merged_inferred"}
]
capacity_mismatches = [
s for s in candidates_summary
if s["selection_state"] == "filtered_capacity"
]
debug = {
"planner_version": "v0.2",
"selection_rule": (
"score desc, then source_section_ids count desc (coverage tiebreak). "
"filter = phase_z_status ∉ allowed_statuses OR auto_selectable=False. "
"auto_selectable=False 사유 : C1 (capacity mismatch — silent truncate / FitError 차단), "
"W1 (rep not auto-renderable), W2 (all children reject), W3 (majority children non-auto-renderable)."
),
"candidates_total": len(scored_all),
"candidates_viable_auto": len([
c for c in scored_all
if c.phase_z_status in allowed_statuses and c.auto_selectable
]),
"candidates_summary": candidates_summary,
"merge_candidates": merge_candidates,
"capacity_mismatches": capacity_mismatches,
"selected_units_count": len(units),
"layout_preset": preset,
"layout_preset_rationale": (
f"v0 count-based: {len(units)} units → {preset}"
if preset else "no viable units"
),
}
return units, preset, debug