"""Phase Z-2 Composition Planner v0. Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는 결정 layer. 위치 : parse_mdx → align_sections_to_v4_granularity → [본 모듈] → render 원칙 (절대 룰) : - 특정 MDX / frame / section 하드코딩 X (예: "04-2 면" / "F16 이면") - 모든 결정 = catalog 메타 + V4 evidence parametric - 같은 코드가 MDX 02/03/04/05/06... 모두 처리 — 결과는 케이스마다 다름 - drilling 결과 = 입력 (재료), composition planner 결과 = 출력 (zone units) - slide-level layout = zone 까지만 나눔. zone 내부 분할은 frame partial 책임 8 layout preset vocabulary : L1 single / L2 horizontal-2 / L3 vertical-2 L4 top-1-bottom-2 / L5 top-2-bottom-1 L6 left-1-right-2 / L7 left-2-right-1 L8 grid-2x2 """ from __future__ import annotations from dataclasses import dataclass, field from typing import Optional # ─── 8 Layout Preset Vocabulary ──────────────────────────────── LAYOUT_PRESETS: dict[str, dict] = { "single": { "zones": 1, "topology": "single", "positions": ["primary"], "css_areas": '"primary"', "css_cols": "1fr", "css_rows": "1fr", }, "horizontal-2": { "zones": 2, "topology": "rows", "positions": ["top", "bottom"], "css_areas": '"top" "bottom"', "css_cols": "1fr", "css_rows": "1fr 1fr", }, "vertical-2": { "zones": 2, "topology": "cols", "positions": ["left", "right"], "css_areas": '"left right"', "css_cols": "1fr 1fr", "css_rows": "1fr", }, "top-1-bottom-2": { "zones": 3, "topology": "T", "positions": ["top", "bottom-left", "bottom-right"], "css_areas": '"top top" "bottom-left bottom-right"', "css_cols": "1fr 1fr", "css_rows": "1fr 1fr", }, "top-2-bottom-1": { "zones": 3, "topology": "inverted-T", "positions": ["top-left", "top-right", "bottom"], "css_areas": '"top-left top-right" "bottom bottom"', "css_cols": "1fr 1fr", "css_rows": "1fr 1fr", }, "left-1-right-2": { "zones": 3, "topology": "side-T-left", "positions": ["left", "right-top", "right-bottom"], "css_areas": '"left right-top" "left right-bottom"', "css_cols": "1fr 1fr", "css_rows": "1fr 1fr", }, "left-2-right-1": { "zones": 3, "topology": "side-T-right", "positions": ["left-top", "right", "left-bottom"], "css_areas": '"left-top right" "left-bottom right"', "css_cols": "1fr 1fr", "css_rows": "1fr 1fr", }, "grid-2x2": { "zones": 4, "topology": "2x2", "positions": ["top-left", "top-right", "bottom-left", "bottom-right"], "css_areas": '"top-left top-right" "bottom-left bottom-right"', "css_cols": "1fr 1fr", "css_rows": "1fr 1fr", }, } # ─── CompositionUnit ──────────────────────────────────────────── @dataclass class CompositionUnit: """Slide 내 1 zone 후보 = MDX section(s) + 매칭된 frame. source_section_ids : 1 개 = single, 2+ = merged merge_type : - "single" : 단일 section - "parent_merged" : parent V4 entry 존재 (v0) - "parent_merged_inferred" : parent V4 entry 없음, child evidence 로 추론 (v0.1) frame_* : V4 evidence 그대로 (catalog 메타 X 하드코딩 X) score : 종합 점수 rationale : score breakdown 추적 review_required : True 면 자동 선택 X — debug 에만 노출, 사용자/AI 검토 후 별도 path (light_edit / restructure / AI restructuring) 로 처리 review_reasons : 왜 review_required 가 True 인지 (자가검증용 — child label mix / template_id 불일치 / cardinality 불호환 등) """ source_section_ids: list[str] merge_type: str frame_template_id: str frame_id: str frame_number: int confidence: float label: str # use_as_is / light_edit / restructure / reject phase_z_status: str raw_content: str title: str score: float = 0.0 rationale: dict = field(default_factory=dict) # 자동 파이프라인 단계 상태 (review/UI 개념 X — 현재는 자동 결정 + 명확한 실패 기록만) # auto_selectable=False 면 자동 선택 단계에서 제외. filter_reasons 가 그 이유. # 예: parent_merged_inferred 의 W1/W2/W3 (rep status / all reject / majority not-auto-renderable) # 사용자/AI 검토는 별 layer (interactive editor) 에서 처리. 본 dataclass 는 자동 결정 완결. auto_selectable: bool = True filter_reasons: list[str] = field(default_factory=list) # informational signals — auto_selectable 여부와 무관. future axis 가 점수화할 영역. # 예: "children disagree on rank-1 template_id" / "minority of children non-auto-renderable" notes: list[str] = field(default_factory=list) # ─── Heading Tree ────────────────────────────────────────────── def derive_parent_id(section_id: str) -> Optional[str]: """section_id 에서 parent 도출 — V4 키 컨벤션 기반. 예시 (코멘트, 룰 X) : - "04-2.1" → "04-2" (decimal suffix → strip) - "04-1" → None (top-level, no parent) - "04" → None """ parts = section_id.split("-", 1) if len(parts) != 2: return None mdx_id, suffix = parts if "." in suffix: parent_suffix = suffix.split(".")[0] return f"{mdx_id}-{parent_suffix}" return None def build_heading_tree(sections) -> dict: """Section list → tree {section_id: {section, children}}.""" tree = {s.section_id: {"section": s, "children": []} for s in sections} for s in sections: parent = derive_parent_id(s.section_id) if parent and parent in tree: tree[parent]["children"].append(s.section_id) return tree # ─── Candidate Generation ────────────────────────────────────── def _apply_capacity_fit(candidate: CompositionUnit, capacity_fit_fn) -> None: """capacity_fit_fn 결과를 candidate 의 rationale + auto_selectable + filter_reasons 에 반영. fit_status 가 'ok' / 'no_contract' / 'unknown_source_shape' 이면 auto_selectable 영향 X (no_contract 는 catalog-only mapper 가 별도로 ValueError 처리). 그 외 (strict_mismatch / exceeds_max / below_min / exceeds_truncate) 는 silent loss 또는 mapper FitError 가 발생할 후보 → auto_selectable=False + filter_reasons 'C1: ...'. """ if capacity_fit_fn is None: return fit = capacity_fit_fn(candidate.frame_template_id, candidate.raw_content) candidate.rationale["capacity_fit"] = fit if fit["fit_status"] in {"ok", "no_contract", "unknown_source_shape"}: return candidate.auto_selectable = False candidate.filter_reasons.append( f"C1: capacity mismatch ({fit['fit_status']}) — {fit['mismatch_reason']}" ) def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, auto_renderable_statuses: Optional[set[str]] = None, capacity_fit_fn=None): """Generate composition candidates. v0.1 candidate types : 1. single : per leaf section (V4 entry 필수) 2. parent_merged : parent 자체에 V4 entry 존재 (parent 가 직접 매칭됨) 3. parent_merged_inferred : parent V4 없음. child evidence 로 representative template_id 추론 원칙 : - 특정 section_id / template_id / frame 하드코딩 X - 모든 결정 = derive_parent_id() + V4 evidence + v4_label_to_status mapping + 주입된 fn (파라메트릭) Args: sections : align 결과 v4_lookup_fn : (section_id) → V4Match | None v4_label_to_status : V4 label → Phase Z status mapping auto_renderable_statuses : 자동 렌더 허용 status set (W1/W3 판정 입력) capacity_fit_fn : Optional (template_id, content) → fit dict. 제공되면 모든 candidate 에 적용 — capacity mismatch 시 auto_selectable=False (silent truncate / mapper FitError 사전 차단). Returns: list[CompositionUnit] """ if auto_renderable_statuses is None: auto_renderable_statuses = set() candidates = [] # 1. Separate for s in sections: match = v4_lookup_fn(s.section_id) if match is None: continue c = CompositionUnit( source_section_ids=[s.section_id], merge_type="single", frame_template_id=match.template_id, frame_id=match.frame_id, frame_number=match.frame_number, confidence=match.confidence, label=match.label, phase_z_status=v4_label_to_status.get(match.label, "unknown"), raw_content=s.raw_content, title=s.title, ) _apply_capacity_fit(c, capacity_fit_fn) candidates.append(c) # parent → children 그룹화 parent_to_children: dict[str, list] = {} for s in sections: pid = derive_parent_id(s.section_id) if pid: parent_to_children.setdefault(pid, []).append(s) # 2. parent_merged (parent 자체가 V4 에 매칭된 경우) for pid, children in parent_to_children.items(): parent_match = v4_lookup_fn(pid) if parent_match is None: continue # branch 3 가 처리 if len(children) < 2: continue # merge 의미 없음 merged_raw = "\n\n".join(c.raw_content for c in children) c_pm = CompositionUnit( source_section_ids=[c.section_id for c in children], merge_type="parent_merged", frame_template_id=parent_match.template_id, frame_id=parent_match.frame_id, frame_number=parent_match.frame_number, confidence=parent_match.confidence, label=parent_match.label, phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"), raw_content=merged_raw, title=pid, ) _apply_capacity_fit(c_pm, capacity_fit_fn) candidates.append(c_pm) # 3. parent_merged_inferred (v0.1) — parent V4 없음, child evidence 기반 for pid, children in parent_to_children.items(): if v4_lookup_fn(pid) is not None: continue # branch 2 가 이미 처리 if len(children) < 2: continue # children 중 V4 매칭 있는 것들만 evidence 로 사용 child_matches: list[tuple] = [] for c in children: m = v4_lookup_fn(c.section_id) if m is not None: child_matches.append((c, m)) if len(child_matches) < 2: continue # 최소 2 child evidence 필요 # representative = 가장 confidence 높은 child match (v0.1.1 단순 룰) # 향후 axes : top-k convergence, template family agreement, cardinality_fit 등 rep_child, rep_match = max(child_matches, key=lambda cm: cm[1].confidence) # 자동 선택 가능 여부 = auto_selectable. default True (strong inferred merge). # 다음 weak 신호 중 하나라도 있으면 auto_selectable=False (filter_reasons 에 사유) : # W1 : representative status 가 auto-renderable 아님 → 자동 렌더 자체가 막힘 # W2 : 모든 child 가 reject → merge 의미 자체가 없음 # W3 : auto-renderable 아닌 child label 이 majority (>50%) # informational notes (auto_selectable 영향 X, future axis 점수화 영역) : # N1 : children 의 rank-1 template_id 가 서로 다름 → top-k / family compat # N2 : non-auto-renderable child label 이 일부 (소수) 존재 rep_status = v4_label_to_status.get(rep_match.label, "unknown") child_labels = [m.label for _, m in child_matches] child_template_ids_unique = sorted({m.template_id for _, m in child_matches}) n_children = len(child_matches) n_not_auto = sum( 1 for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses ) filter_reasons: list[str] = [] notes: list[str] = [] if rep_status not in auto_renderable_statuses: filter_reasons.append( f"W1: representative status '{rep_status}' (label={rep_match.label}) " f"not in auto_renderable_statuses={sorted(auto_renderable_statuses)}." ) if all(l == "reject" for l in child_labels): filter_reasons.append( "W2: all children labeled 'reject' — merge has no fit basis." ) if n_children > 0 and n_not_auto * 2 > n_children: non_auto_labels = sorted({ l for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses }) filter_reasons.append( f"W3: majority of children ({n_not_auto}/{n_children}) have " f"non-auto-renderable labels {non_auto_labels}." ) if len(child_template_ids_unique) > 1: notes.append( f"N1: children's rank-1 template_id differs ({child_template_ids_unique}). " f"representative='{rep_match.template_id}' (highest child confidence). " f"top-k / family compatibility 평가는 future axis." ) if 0 < n_not_auto <= n_children // 2: non_auto_labels_minority = sorted({ l for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses }) notes.append( f"N2: minority ({n_not_auto}/{n_children}) of children non-auto-renderable " f"({non_auto_labels_minority}). representative is auto-renderable, merge proceeds." ) auto_selectable = len(filter_reasons) == 0 merged_raw = "\n\n".join(c.raw_content for c, _ in child_matches) c_inf = CompositionUnit( source_section_ids=[c.section_id for c, _ in child_matches], merge_type="parent_merged_inferred", frame_template_id=rep_match.template_id, frame_id=rep_match.frame_id, frame_number=rep_match.frame_number, confidence=rep_match.confidence, label=rep_match.label, phase_z_status=rep_status, raw_content=merged_raw, title=pid, auto_selectable=auto_selectable, filter_reasons=filter_reasons, notes=notes, ) _apply_capacity_fit(c_inf, capacity_fit_fn) candidates.append(c_inf) return candidates # ─── Scoring ─────────────────────────────────────────────────── # v0 label weights — V4 label → score multiplier. # 향후 axes 추가 (cardinality_fit / hierarchy_coherence / density) 시 확장. V0_LABEL_WEIGHT = { "use_as_is": 1.0, "light_edit": 0.7, "restructure": 0.4, "reject": 0.0, } def score_candidate(c: CompositionUnit) -> CompositionUnit: """v0 scoring : confidence × label_weight. 추후 추가될 axes (rationale 에 자리만 잡아둠) : - cardinality_fit : item_count vs frame ideal/min/max - hierarchy_coherence : merge_type 적합도 - density_score : content 밀도 vs zone 크기 """ label_weight = V0_LABEL_WEIGHT.get(c.label, 0.0) frame_compat = c.confidence * label_weight c.score = frame_compat # 기존 rationale 보존 (예: collect_candidates 가 넣은 capacity_fit) c.rationale.update({ "frame_compat": round(frame_compat, 4), "confidence": c.confidence, "label": c.label, "label_weight": label_weight, "merge_type": c.merge_type, # placeholders for future axes "hierarchy_coherence": None, "density_score": None, }) return c # ─── Selection ───────────────────────────────────────────────── def select_composition_units(candidates, allowed_statuses: set[str]) -> list[CompositionUnit]: """Greedy non-overlapping selection by score, with coverage tiebreak. 1. 모든 candidate 점수 매김 2. filter : - phase_z_status ∈ allowed_statuses - auto_selectable=True (W1/W2/W3 신호 통과) 3. 정렬 키 = (score desc, source_section_ids 수 desc) — 동점이면 더 많은 section 을 cover 하는 후보 우선. parent_merged_inferred 가 같은 점수의 single 후보를 *coverage 우위* 로 이김. 4. greedy : 이미 covered 된 section 을 가진 후보는 skip 5. 최종 선택 = covered set 채워나감 auto_selectable=False candidate 는 자동 선택 X. debug 의 candidates_summary 에는 남음. UI/editor layer 에서 사용자가 별도 처리 가능 (현 v0 범위 X). """ scored = [score_candidate(c) for c in candidates] viable = [ c for c in scored if c.phase_z_status in allowed_statuses and c.auto_selectable ] viable.sort(key=lambda c: (c.score, len(c.source_section_ids)), reverse=True) selected = [] covered = set() for c in viable: if any(sid in covered for sid in c.source_section_ids): continue selected.append(c) covered.update(c.source_section_ids) return selected # ─── Layout Preset Selection ─────────────────────────────────── def select_layout_preset(units: list[CompositionUnit]) -> Optional[str]: """v0 : count-based default selection. 1 unit → single 2 units → horizontal-2 (default. vertical-2 는 aspect signal 추가 시 분기) 3 units → top-1-bottom-2 (default. 다른 3-zone variant 는 content-weight signal 추가 시 분기) 4 units → grid-2x2 v0 한계 : - aspect / content-weight 신호 미반영 → 2 units 는 항상 horizontal, 3 units 는 항상 top-1-bottom-2 - 향후 unit.raw_content 기반 weight 산정 시 정교화 """ n = len(units) if n == 0: return None if n == 1: return "single" if n == 2: return "horizontal-2" if n == 3: return "top-1-bottom-2" if n == 4: return "grid-2x2" raise ValueError( f"Composition v0 : layout for {n} units not supported (max 4). " "Larger counts require split-into-multiple-slides decision (future)." ) # ─── Public entry — composition pipeline ─────────────────────── def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, allowed_statuses: set[str], capacity_fit_fn=None) -> tuple[list[CompositionUnit], Optional[str], dict]: """Composition planner v0.2 entry. v0.2 변경 : - capacity_fit_fn 주입 시 모든 candidate 에 capacity 사전 검사 (silent truncate / mapper FitError 사전 차단). 불일치 시 auto_selectable=False + filter_reason 'C1: ...'. v0.1 / v0.1.1 동작 (유지) : - parent_merged_inferred candidate 생성 (parent V4 없어도) - review 개념 X. auto_selectable + filter_reasons 만으로 자동 결정 - selection : score desc + coverage 우세 tiebreak Returns: units : 자동 선택된 composition units layout_preset : 8 vocabulary 중 하나 (또는 None) debug : 후보 전체 + capacity_fit + filter_reasons + preset 결정 근거 """ candidates = collect_candidates( sections, v4_lookup_fn, v4_label_to_status, auto_renderable_statuses=allowed_statuses, capacity_fit_fn=capacity_fit_fn, ) scored_all = [score_candidate(c) for c in candidates] units = select_composition_units(candidates, allowed_statuses) preset = select_layout_preset(units) def _candidate_state(c: CompositionUnit) -> str: if c in units: return "selected" if c.phase_z_status not in allowed_statuses: return "filtered_status" # V4 label → status not auto-renderable if not c.auto_selectable: # filter_reasons prefix 로 capacity 와 weak 구분 if any(r.startswith("C") for r in c.filter_reasons): return "filtered_capacity" # C1 (capacity mismatch) return "filtered_weak" # W1/W2/W3 (parent_merged_inferred only) return "filtered_lost" # viable 였지만 coverage 충돌로 밀림 candidates_summary = [ { "source_section_ids": c.source_section_ids, "merge_type": c.merge_type, "template_id": c.frame_template_id, "label": c.label, "phase_z_status": c.phase_z_status, "score": c.score, "selection_state": _candidate_state(c), "auto_selectable": c.auto_selectable, "filter_reasons": list(c.filter_reasons), "notes": list(c.notes), "capacity_fit": c.rationale.get("capacity_fit"), } for c in scored_all ] merge_candidates = [ s for s in candidates_summary if s["merge_type"] in {"parent_merged", "parent_merged_inferred"} ] capacity_mismatches = [ s for s in candidates_summary if s["selection_state"] == "filtered_capacity" ] debug = { "planner_version": "v0.2", "selection_rule": ( "score desc, then source_section_ids count desc (coverage tiebreak). " "filter = phase_z_status ∉ allowed_statuses OR auto_selectable=False. " "auto_selectable=False 사유 : C1 (capacity mismatch — silent truncate / FitError 차단), " "W1 (rep not auto-renderable), W2 (all children reject), W3 (majority children non-auto-renderable)." ), "candidates_total": len(scored_all), "candidates_viable_auto": len([ c for c in scored_all if c.phase_z_status in allowed_statuses and c.auto_selectable ]), "candidates_summary": candidates_summary, "merge_candidates": merge_candidates, "capacity_mismatches": capacity_mismatches, "selected_units_count": len(units), "layout_preset": preset, "layout_preset_rationale": ( f"v0 count-based: {len(units)} units → {preset}" if preset else "no viable units" ), } return units, preset, debug