"""Phase Z-2 Composition Planner v0. Pipeline 의 빠진 layer = MDX 덩어리들을 *최종 zone unit* 으로 묶는 결정 layer. 위치 : parse_mdx → align_sections_to_v4_granularity → [본 모듈] → render 원칙 (절대 룰) : - 특정 MDX / frame / section 하드코딩 X (예: "04-2 면" / "F16 이면") - 모든 결정 = catalog 메타 + V4 evidence parametric - 같은 코드가 MDX 02/03/04/05/06... 모두 처리 — 결과는 케이스마다 다름 - drilling 결과 = 입력 (재료), composition planner 결과 = 출력 (zone units) - slide-level layout = zone 까지만 나눔. zone 내부 분할은 frame partial 책임 8 layout preset vocabulary : L1 single / L2 horizontal-2 / L3 vertical-2 L4 top-1-bottom-2 / L5 top-2-bottom-1 L6 left-1-right-2 / L7 left-2-right-1 L8 grid-2x2 """ from __future__ import annotations import re from dataclasses import dataclass, field from pathlib import Path from typing import Optional import yaml # ─── 8 Layout Preset Vocabulary — catalog-loaded (사용자 lock 2026-05-07) ─── # # Source of truth = templates/phase_z2/layouts/layouts.yaml (사람이 보고 추가/수정 가능). # 코드 hardcoded dict 폐기 (Step 7-A catalog 화). logic 변경 X — backward compat. # # catalog 의 추가 필드 (render_ready / default_selection / candidate_when) 는 # 기존 사용처에서 무시됨 — Step 7-B (multiple 후보) / Step 9 (layout × frame # fit eval) 진입 시 입력. _LAYOUTS_CATALOG_PATH = ( Path(__file__).resolve().parent.parent / "templates" / "phase_z2" / "layouts" / "layouts.yaml" ) def load_layout_presets() -> dict[str, dict]: """Load 8 layout presets from catalog. backward compat: returns same dict shape as old hardcoded LAYOUT_PRESETS — keys = layout id (single / horizontal-2 / ...), each value contains zones / topology / positions / css_areas / css_cols / css_rows. Additional fields (render_ready / default_selection / candidate_when) ignored by existing callers, consumed by Step 7-B / Step 9 (별 axis). """ with open(_LAYOUTS_CATALOG_PATH, encoding="utf-8") as f: return yaml.safe_load(f) or {} LAYOUT_PRESETS: dict[str, dict] = load_layout_presets() def select_layout_candidates(unit_count: int) -> list[str]: """Return layout id candidates matching given unit_count. Step 7-B (사용자 lock 2026-05-07) — multiple 후보 generation. Args: unit_count: Final layout placement unit count (Step 4 output). = section_count + promoted lead_orphans 등. NOT raw MDX section count — Step 2 raw section count 가 아님. Returns: List of layout ids matching candidate_when.unit_count. Sort order: 1. default_selection: true 먼저 (catalog 정의 순서) 2. default_selection: false 그 다음 (catalog 정의 순서) Layouts with render_ready: false 는 제외. Raises: ValueError: if unit_count < 1 or > 4 (current catalog scope). Note: 호출처 박힘 (Step 7-conn 2026-05-08) — phase_z2_pipeline.py 의 step07 artifact 가 본 함수 결과 기록 (passive). 기존 select_layout_preset() 은 default 결정 그대로. 후보 평가 / auto decision 은 Step 9 v1 (별 axis). """ if unit_count < 1 or unit_count > 4: raise ValueError( f"unit_count {unit_count} out of catalog scope [1, 4]" ) defaults: list[str] = [] alternatives: list[str] = [] for layout_id, spec in LAYOUT_PRESETS.items(): if not spec.get("render_ready", False): continue cw = spec.get("candidate_when") or {} if cw.get("unit_count") != unit_count: continue if spec.get("default_selection", False): defaults.append(layout_id) else: alternatives.append(layout_id) return defaults + alternatives # ─── Region Layout Catalog — Step 8-B-1 (사용자 lock 2026-05-07) ──────── # # Source = templates/phase_z2/regions/region_layouts.yaml (SPEC §2.5). # load 함수 + select_region_layout_candidates(). # 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가 # 본 함수 결과 기록 (placeholder signals: region_count=1, Step 3/4 부재 종속). _REGION_LAYOUTS_CATALOG_PATH = ( Path(__file__).resolve().parent.parent / "templates" / "phase_z2" / "regions" / "region_layouts.yaml" ) def load_region_layouts() -> dict[str, dict]: """Load Internal Region layout catalog (SPEC §2.5, 6 entry). Returns same dict shape as catalog yaml. Step 7-A 와 같은 패턴 — source of truth = yaml, code 는 read 만. """ with open(_REGION_LAYOUTS_CATALOG_PATH, encoding="utf-8") as f: return yaml.safe_load(f) or {} REGION_LAYOUTS: dict[str, dict] = load_region_layouts() def select_region_layout_candidates( region_count: int, content_type_mix: Optional[list[str]] = None, details_presence: bool = False, role_pattern: Optional[str] = None, ratio_asymmetric: bool = False, flow_type: Optional[str] = None, has_visual_element: bool = False, large_table: bool = False, long_text: bool = False, ) -> list[str]: """Return Internal Region layout candidates per SPEC §2.5 decision tree. Step 8-B-1 (사용자 lock 2026-05-07) — 후보 generation 함수. Step 7-B 와 다른 점: SPEC §2.5 는 *순차 결정 트리* (첫 매칭 채택). Step 7-B 는 단순 매칭 (unit_count 같은 모든 entry). Decision rule (sequential, first match wins) — catalog 와 1:1 일치: 1. region_count == 1 -> region-single 2. details_presence / large_table / long_text -> region-preview-details 3. region_count == 4 AND flow_type == 'parallel_4' -> region-grid-2x2 4. region_count == 2 AND role_pattern == 'primary_supporting' AND ratio_asymmetric -> region-main-support 5. region_count == 2 AND has_visual_element -> region-horizontal-split 6. fallback (위 미매칭) -> region-vertical-stack Sort: region_count == 1 -> [region-single] (fallback X) region_count >= 2 -> [매칭, region-vertical-stack] 또는 [region-vertical-stack] Raises: ValueError: region_count < 1 or > 4 (SPEC §2.5 vocabulary scope). Note: 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가 본 함수 결과 기록. 현재 placeholder signals (region_count=1, content_type= "text_block") 종속 — 실제 신호 활성화는 Step 3/4 별 axis. Step 9 v0 (application_plan) 가 본 후보 list 를 application_candidates 로 해석. """ if region_count < 1 or region_count > 4: raise ValueError( f"region_count {region_count} out of catalog scope [1, 4]" ) fallback = "region-vertical-stack" # 1. region_count == 1 if region_count == 1: return ["region-single"] # 2. details_presence / large_table / long_text if details_presence or large_table or long_text: match = "region-preview-details" # 3. region_count == 4 + parallel_4 elif region_count == 4 and flow_type == "parallel_4": match = "region-grid-2x2" # 4. region_count == 2 + role_pattern primary_supporting + ratio_asymmetric elif ( region_count == 2 and role_pattern == "primary_supporting" and ratio_asymmetric ): match = "region-main-support" # 5. region_count == 2 + visual element elif region_count == 2 and has_visual_element: match = "region-horizontal-split" # 6. fallback else: return [fallback] # 매칭됨 + fallback (단 매칭 == fallback 인 경우 1개만) if match == fallback: return [fallback] return [match, fallback] # ─── Display Strategy Catalog — Step 8-B-2 (사용자 lock 2026-05-07) ──── # # Source = templates/phase_z2/regions/display_strategies.yaml (4 entry). # load 함수 + select_display_strategy_candidates(). # 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가 # 본 함수 결과 기록 (placeholder signals: content_type="text_block", Step 3/4 부재 종속). _DISPLAY_STRATEGIES_CATALOG_PATH = ( Path(__file__).resolve().parent.parent / "templates" / "phase_z2" / "regions" / "display_strategies.yaml" ) def load_display_strategies() -> dict[str, dict]: """Load display strategy catalog (4 entry). Returns same dict shape as catalog yaml. Step 7-A / 8-B-1 와 같은 패턴 — source of truth = yaml, code 는 read 만. """ with open(_DISPLAY_STRATEGIES_CATALOG_PATH, encoding="utf-8") as f: return yaml.safe_load(f) or {} DISPLAY_STRATEGIES: dict[str, dict] = load_display_strategies() _KNOWN_CONTENT_TYPES = frozenset({ "text_block", "table", "image", "details", "decorative_element", }) def select_display_strategy_candidates( content_type: str, long_text: bool = False, large_table: bool = False, fits_in_region: Optional[bool] = None, ) -> list[str]: """Return display strategy candidates per catalog (display_strategies.yaml). Step 8-B-2 (사용자 lock 2026-05-07) — 후보 generation 함수. display_strategies.yaml 만 본다 (region_layouts / frame 은 Step 9 axis). Hard filter (catalog 박힌 절대 제약 — applies_to / forbidden_for): - content_type 이 strategy.applies_to 에 있어야 후보 - content_type 이 strategy.forbidden_for 에 있으면 자동 제외 - 핵심 user lock: text_block / table / image / details 는 dropped 절대 X (catalog forbidden_for 에 박혀 있음 — 원문 무손실 보존) Ranking (content_type + fit signal): decorative_element -> [inline_full, dropped] image -> [inline_full] text_block / table / details long_text / large_table / fits_in_region == False -> [inline_preview_with_details, details_only, inline_full] 그 외 -> [inline_full, inline_preview_with_details, details_only] Note: - fits_in_region 은 가벼운 hint 만. 실제 overflow 판단은 Step 9/14/17 axis. - dropped 는 decorative_element 의 후순위 (공간 부족 신호 전엔 일단 보여주기). Raises: ValueError: content_type 이 catalog scope 밖 (text_block / table / image / details / decorative_element 외). Note: 호출처 박힘 (Step 8-conn 2026-05-08) — phase_z2_pipeline.py 의 step08 artifact 가 본 함수 결과 기록. 현재 placeholder signal (content_type="text_block") 종속 — 실제 신호 활성화는 Step 3/4 별 axis. Step 9 v0 (application_plan) 가 본 후보 list 를 application_candidates 의 display_strategy axis 로 해석. """ if content_type not in _KNOWN_CONTENT_TYPES: raise ValueError( f"content_type {content_type!r} out of catalog scope " f"(known: {sorted(_KNOWN_CONTENT_TYPES)})" ) # Hard filter — applies_to / forbidden_for (catalog 직독) eligible = set() for name, meta in DISPLAY_STRATEGIES.items(): applies_to = meta.get("applies_to") or [] forbidden_for = meta.get("forbidden_for") or [] if content_type in applies_to and content_type not in forbidden_for: eligible.add(name) # Ranking — content_type + fit signal if content_type == "decorative_element": order = ["inline_full", "dropped"] else: escalate = long_text or large_table or fits_in_region is False if escalate: order = [ "inline_preview_with_details", "details_only", "inline_full", ] else: order = [ "inline_full", "inline_preview_with_details", "details_only", ] return [s for s in order if s in eligible] # ─── CompositionUnit ──────────────────────────────────────────── @dataclass class CompositionUnit: """Slide 내 1 zone 후보 = MDX section(s) + 매칭된 frame. source_section_ids : 1 개 = single, 2+ = merged merge_type : - "single" : 단일 section - "parent_merged" : parent V4 entry 존재 (v0) - "parent_merged_inferred" : parent V4 entry 없음, child evidence 로 추론 (v0.1) frame_* : V4 evidence 그대로 (catalog 메타 X 하드코딩 X) score : 종합 점수 rationale : score breakdown 추적 review_required : True 면 자동 선택 X — debug 에만 노출, 사용자/AI 검토 후 별도 path (light_edit / restructure / AI restructuring) 로 처리 review_reasons : 왜 review_required 가 True 인지 (자가검증용 — child label mix / template_id 불일치 / cardinality 불호환 등) """ source_section_ids: list[str] merge_type: str frame_template_id: str frame_id: str frame_number: int confidence: float label: str # use_as_is / light_edit / restructure / reject phase_z_status: str raw_content: str title: str v4_rank: Optional[int] = None selection_path: str = "rank_1" fallback_reason: Optional[str] = None score: float = 0.0 rationale: dict = field(default_factory=dict) # 자동 파이프라인 단계 상태 (review/UI 개념 X — 현재는 자동 결정 + 명확한 실패 기록만) # auto_selectable=False 면 자동 선택 단계에서 제외. filter_reasons 가 그 이유. # 예: parent_merged_inferred 의 W1/W2/W3 (rep status / all reject / majority not-auto-renderable) # 사용자/AI 검토는 별 layer (interactive editor) 에서 처리. 본 dataclass 는 자동 결정 완결. auto_selectable: bool = True filter_reasons: list[str] = field(default_factory=list) # informational signals — auto_selectable 여부와 무관. future axis 가 점수화할 영역. # 예: "children disagree on rank-1 template_id" / "minority of children non-auto-renderable" notes: list[str] = field(default_factory=list) # Step 6-A axis 추가 (사용자 lock 2026-05-08). # V4 후보 list (V4Match-shape duck typed — composition module 은 V4Match dataclass 미import, # circular dep 회피). 각 entry attrs : template_id / frame_id / frame_number / confidence / label. # list 순서 = V4 rank (candidates[0] = rank-1 non-reject — 단일 frame_template_id / # frame_id / label / confidence 와 일치, backward compat lock). # 0 길이 = "no_non_reject_v4_candidate" 신호 (Step 9 application_plan input). v4_candidates: list = field(default_factory=list) # IMP-30 u2 — provisional first-render flag. True when the V4Match # backing this unit was synthesized via lookup_v4_match_with_fallback # (allow_provisional=True) after chain_exhausted, or when u3 inserts # a last-resort provisional fill for an uncovered section. Carried as # data (not re-derived from label/selection_path downstream) so the # render path / status / zone template can surface "needs adaptation" # uniformly. Default False keeps non-provisional units byte-identical. provisional: bool = False # ─── Heading Tree ────────────────────────────────────────────── def derive_parent_id(section_id: str) -> Optional[str]: """Section id -> parent id derivation by V4 key convention. IMP-08 B-3 : canonical ordinal `${parent}-sub-${n}` recognised first; legacy decimal `04-2.1` kept as fallback alias path. Examples (illustrative, not rules) : - "03-1-sub-2" -> "03-1" (canonical ordinal, IMP-08) - "04-2.1" -> "04-2" (decimal suffix, legacy V4 key style) - "04-1" -> None (top-level, no parent) - "04" -> None """ m = re.fullmatch(r"(.+?)-sub-(\d+)", section_id) if m: return m.group(1) parts = section_id.split("-", 1) if len(parts) != 2: return None mdx_id, suffix = parts if "." in suffix: parent_suffix = suffix.split(".")[0] return f"{mdx_id}-{parent_suffix}" return None def build_heading_tree(sections) -> dict: """Section list → tree {section_id: {section, children}}.""" tree = {s.section_id: {"section": s, "children": []} for s in sections} for s in sections: parent = derive_parent_id(s.section_id) if parent and parent in tree: tree[parent]["children"].append(s.section_id) return tree # ─── Candidate Generation ────────────────────────────────────── def _apply_capacity_fit(candidate: CompositionUnit, capacity_fit_fn) -> None: """capacity_fit_fn 결과를 candidate 의 rationale + auto_selectable + filter_reasons 에 반영. fit_status 가 'ok' / 'no_contract' / 'unknown_source_shape' 이면 auto_selectable 영향 X (no_contract 는 catalog-only mapper 가 별도로 ValueError 처리). 그 외 (strict_mismatch / exceeds_max / below_min / exceeds_truncate) 는 silent loss 또는 mapper FitError 가 발생할 후보 → auto_selectable=False + filter_reasons 'C1: ...'. """ if capacity_fit_fn is None: return fit = capacity_fit_fn(candidate.frame_template_id, candidate.raw_content) candidate.rationale["capacity_fit"] = fit if fit["fit_status"] in {"ok", "no_contract", "unknown_source_shape"}: return candidate.auto_selectable = False candidate.filter_reasons.append( f"C1: capacity mismatch ({fit['fit_status']}) — {fit['mismatch_reason']}" ) def collect_candidates(sections, v4_lookup_fn, v4_label_to_status: dict, auto_renderable_statuses: Optional[set[str]] = None, capacity_fit_fn=None, v4_candidates_lookup_fn=None): """Generate composition candidates. v0.1 candidate types : 1. single : per leaf section (V4 entry 필수) 2. parent_merged : parent 자체에 V4 entry 존재 (parent 가 직접 매칭됨) 3. parent_merged_inferred : parent V4 없음. child evidence 로 representative template_id 추론 원칙 : - 특정 section_id / template_id / frame 하드코딩 X - 모든 결정 = derive_parent_id() + V4 evidence + v4_label_to_status mapping + 주입된 fn (파라메트릭) Args: sections : align 결과 v4_lookup_fn : (section_id) → V4Match | None (rank-1 only, 기존 호환) v4_label_to_status : V4 label → Phase Z status mapping auto_renderable_statuses : 자동 렌더 허용 status set (W1/W3 판정 입력) capacity_fit_fn : Optional (template_id, content) → fit dict. 제공되면 모든 candidate 에 적용 — capacity mismatch 시 auto_selectable=False (silent truncate / mapper FitError 사전 차단). v4_candidates_lookup_fn : Optional (section_id) → list[V4Match]. Step 6-A axis (사용자 lock 2026-05-08). non-reject max-N 후보 list. 제공되면 모든 candidate 에 v4_candidates 필드 채움. None 이면 v4_candidates = [] (backward compat). 본 fn 이 V4 raw dict 구조를 흡수 — composition module 은 V4 yaml shape 모름. Returns: list[CompositionUnit] """ if auto_renderable_statuses is None: auto_renderable_statuses = set() def _v4_cands(section_id: str) -> list: # v4_candidates_lookup_fn 미제공 시 빈 list (backward compat). return v4_candidates_lookup_fn(section_id) if v4_candidates_lookup_fn else [] candidates = [] # 1. Separate for s in sections: match = v4_lookup_fn(s.section_id) if match is None: continue c = CompositionUnit( source_section_ids=[s.section_id], merge_type="single", frame_template_id=match.template_id, frame_id=match.frame_id, frame_number=match.frame_number, confidence=match.confidence, label=match.label, phase_z_status=v4_label_to_status.get(match.label, "unknown"), v4_rank=getattr(match, "v4_rank", None), selection_path=getattr(match, "selection_path", "rank_1"), fallback_reason=getattr(match, "fallback_reason", None), raw_content=s.raw_content, title=s.title, v4_candidates=_v4_cands(s.section_id), provisional=getattr(match, "provisional", False), ) _apply_capacity_fit(c, capacity_fit_fn) candidates.append(c) # parent → children 그룹화 parent_to_children: dict[str, list] = {} for s in sections: pid = derive_parent_id(s.section_id) if pid: parent_to_children.setdefault(pid, []).append(s) # 2. parent_merged (parent 자체가 V4 에 매칭된 경우) for pid, children in parent_to_children.items(): parent_match = v4_lookup_fn(pid) if parent_match is None: continue # branch 3 가 처리 if len(children) < 2: continue # merge 의미 없음 merged_raw = "\n\n".join(c.raw_content for c in children) c_pm = CompositionUnit( source_section_ids=[c.section_id for c in children], merge_type="parent_merged", frame_template_id=parent_match.template_id, frame_id=parent_match.frame_id, frame_number=parent_match.frame_number, confidence=parent_match.confidence, label=parent_match.label, phase_z_status=v4_label_to_status.get(parent_match.label, "unknown"), v4_rank=getattr(parent_match, "v4_rank", None), selection_path=getattr(parent_match, "selection_path", "rank_1"), fallback_reason=getattr(parent_match, "fallback_reason", None), raw_content=merged_raw, title=pid, v4_candidates=_v4_cands(pid), provisional=getattr(parent_match, "provisional", False), ) _apply_capacity_fit(c_pm, capacity_fit_fn) candidates.append(c_pm) # 3. parent_merged_inferred (v0.1) — parent V4 없음, child evidence 기반 for pid, children in parent_to_children.items(): if v4_lookup_fn(pid) is not None: continue # branch 2 가 이미 처리 if len(children) < 2: continue # children 중 V4 매칭 있는 것들만 evidence 로 사용 child_matches: list[tuple] = [] for c in children: m = v4_lookup_fn(c.section_id) if m is not None: child_matches.append((c, m)) if len(child_matches) < 2: continue # 최소 2 child evidence 필요 # representative = 가장 confidence 높은 child match (v0.1.1 단순 룰) # 향후 axes : top-k convergence, template family agreement, cardinality_fit 등 rep_child, rep_match = max(child_matches, key=lambda cm: cm[1].confidence) # 자동 선택 가능 여부 = auto_selectable. default True (strong inferred merge). # 다음 weak 신호 중 하나라도 있으면 auto_selectable=False (filter_reasons 에 사유) : # W1 : representative status 가 auto-renderable 아님 → 자동 렌더 자체가 막힘 # W2 : 모든 child 가 reject → merge 의미 자체가 없음 # W3 : auto-renderable 아닌 child label 이 majority (>50%) # informational notes (auto_selectable 영향 X, future axis 점수화 영역) : # N1 : children 의 rank-1 template_id 가 서로 다름 → top-k / family compat # N2 : non-auto-renderable child label 이 일부 (소수) 존재 rep_status = v4_label_to_status.get(rep_match.label, "unknown") child_labels = [m.label for _, m in child_matches] child_template_ids_unique = sorted({m.template_id for _, m in child_matches}) n_children = len(child_matches) n_not_auto = sum( 1 for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses ) filter_reasons: list[str] = [] notes: list[str] = [] if rep_status not in auto_renderable_statuses: filter_reasons.append( f"W1: representative status '{rep_status}' (label={rep_match.label}) " f"not in auto_renderable_statuses={sorted(auto_renderable_statuses)}." ) if all(l == "reject" for l in child_labels): filter_reasons.append( "W2: all children labeled 'reject' — merge has no fit basis." ) if n_children > 0 and n_not_auto * 2 > n_children: non_auto_labels = sorted({ l for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses }) filter_reasons.append( f"W3: majority of children ({n_not_auto}/{n_children}) have " f"non-auto-renderable labels {non_auto_labels}." ) if len(child_template_ids_unique) > 1: notes.append( f"N1: children's rank-1 template_id differs ({child_template_ids_unique}). " f"representative='{rep_match.template_id}' (highest child confidence). " f"top-k / family compatibility 평가는 future axis." ) if 0 < n_not_auto <= n_children // 2: non_auto_labels_minority = sorted({ l for l in child_labels if v4_label_to_status.get(l) not in auto_renderable_statuses }) notes.append( f"N2: minority ({n_not_auto}/{n_children}) of children non-auto-renderable " f"({non_auto_labels_minority}). representative is auto-renderable, merge proceeds." ) auto_selectable = len(filter_reasons) == 0 merged_raw = "\n\n".join(c.raw_content for c, _ in child_matches) c_inf = CompositionUnit( source_section_ids=[c.section_id for c, _ in child_matches], merge_type="parent_merged_inferred", frame_template_id=rep_match.template_id, frame_id=rep_match.frame_id, frame_number=rep_match.frame_number, confidence=rep_match.confidence, label=rep_match.label, phase_z_status=rep_status, v4_rank=getattr(rep_match, "v4_rank", None), selection_path=getattr(rep_match, "selection_path", "rank_1"), fallback_reason=getattr(rep_match, "fallback_reason", None), raw_content=merged_raw, title=pid, auto_selectable=auto_selectable, filter_reasons=filter_reasons, notes=notes, # rep_child 의 V4 후보 list (rep_match 와 같은 출처, frame_* 와 일관). v4_candidates=_v4_cands(rep_child.section_id), # IMP-30 u2 — rep_match drives frame selection so its provisional # flag flows here. If a non-rep child match is provisional but the # rep is not, this unit is not provisional (the rep frame is real). provisional=getattr(rep_match, "provisional", False), ) _apply_capacity_fit(c_inf, capacity_fit_fn) candidates.append(c_inf) return candidates # ─── Scoring ─────────────────────────────────────────────────── # v0 label weights — V4 label → score multiplier. # 향후 axes 추가 (cardinality_fit / hierarchy_coherence / density) 시 확장. V0_LABEL_WEIGHT = { "use_as_is": 1.0, "light_edit": 0.7, "restructure": 0.4, "reject": 0.0, } def score_candidate(c: CompositionUnit) -> CompositionUnit: """v0 scoring : confidence × label_weight. 추후 추가될 axes (rationale 에 자리만 잡아둠) : - cardinality_fit : item_count vs frame ideal/min/max - hierarchy_coherence : merge_type 적합도 - density_score : content 밀도 vs zone 크기 """ label_weight = V0_LABEL_WEIGHT.get(c.label, 0.0) frame_compat = c.confidence * label_weight c.score = frame_compat # 기존 rationale 보존 (예: collect_candidates 가 넣은 capacity_fit) c.rationale.update({ "frame_compat": round(frame_compat, 4), "confidence": c.confidence, "label": c.label, "label_weight": label_weight, "merge_type": c.merge_type, # placeholders for future axes "hierarchy_coherence": None, "density_score": None, }) return c # ─── Selection ───────────────────────────────────────────────── def select_composition_units( candidates, allowed_statuses: set[str], *, all_section_ids: Optional[list[str]] = None, allow_provisional_fill: bool = False, ) -> list[CompositionUnit]: """Greedy non-overlapping selection by score, with coverage tiebreak. 1. 모든 candidate 점수 매김 2. filter : - phase_z_status ∈ allowed_statuses - auto_selectable=True (W1/W2/W3 신호 통과) 3. 정렬 키 = (score desc, source_section_ids 수 desc) — 동점이면 더 많은 section 을 cover 하는 후보 우선. parent_merged_inferred 가 같은 점수의 single 후보를 *coverage 우위* 로 이김. 4. greedy : 이미 covered 된 section 을 가진 후보는 skip 5. 최종 선택 = covered set 채워나감 auto_selectable=False candidate 는 자동 선택 X. debug 의 candidates_summary 에는 남음. UI/editor layer 에서 사용자가 별도 처리 가능 (현 v0 범위 X). IMP-30 u3 — last-resort provisional fill (opt-in via allow_provisional_fill): After the normal greedy pass, sections in ``all_section_ids`` that are still uncovered are filled with the highest-score *provisional* candidate (``c.provisional == True``) that includes at least one uncovered section and does not collide with already-covered ones. A provisional candidate's backing V4Match was synthesized via ``lookup_v4_match_with_fallback(allow_provisional=True)`` (IMP-30 u1) after chain_exhausted; its ``phase_z_status`` is therefore typically *outside* ``allowed_statuses`` (extract_matched_zone / fallback_candidate), which is why it gets filtered out of the normal greedy pass. The fill preserves first-render invariant for sections whose rank-1~3 are all restructure/reject. Default ``allow_provisional_fill=False`` keeps pre-u3 behavior byte-identical (IMP-05 regression guard). Args: candidates: full candidate pool from collect_candidates(). allowed_statuses: phase_z_status set considered auto-renderable. all_section_ids: ordered section id list (only consulted when allow_provisional_fill=True; required for coverage check). allow_provisional_fill: opt-in for last-resort provisional fill. """ scored = [score_candidate(c) for c in candidates] viable = [ c for c in scored if c.phase_z_status in allowed_statuses and c.auto_selectable ] viable.sort(key=lambda c: (c.score, len(c.source_section_ids)), reverse=True) selected = [] covered = set() for c in viable: if any(sid in covered for sid in c.source_section_ids): continue selected.append(c) covered.update(c.source_section_ids) # IMP-30 u3 — last-resort provisional fill (opt-in, default off). # Honors first-render invariant by surfacing chain_exhausted sections as # provisional zones instead of dropping them. Skip reasons on # non-provisional filtered candidates are preserved (not mutated here). if allow_provisional_fill and all_section_ids: uncovered = {sid for sid in all_section_ids if sid not in covered} if uncovered: provisional_pool = [ c for c in scored if c.provisional and any(sid in uncovered for sid in c.source_section_ids) ] provisional_pool.sort( key=lambda c: (c.score, len(c.source_section_ids)), reverse=True, ) for c in provisional_pool: if any(sid in covered for sid in c.source_section_ids): continue selected.append(c) covered.update(c.source_section_ids) return selected # ─── Layout Preset Selection ─────────────────────────────────── def select_layout_preset(units: list[CompositionUnit]) -> Optional[str]: """v0 : count-based default selection. 1 unit → single 2 units → horizontal-2 (default. vertical-2 는 aspect signal 추가 시 분기) 3 units → top-1-bottom-2 (default. 다른 3-zone variant 는 content-weight signal 추가 시 분기) 4 units → grid-2x2 v0 한계 : - aspect / content-weight 신호 미반영 → 2 units 는 항상 horizontal, 3 units 는 항상 top-1-bottom-2 - 향후 unit.raw_content 기반 weight 산정 시 정교화 """ n = len(units) if n == 0: return None if n == 1: return "single" if n == 2: return "horizontal-2" if n == 3: return "top-1-bottom-2" if n == 4: return "grid-2x2" raise ValueError( f"Composition v0 : layout for {n} units not supported (max 4). " "Larger counts require split-into-multiple-slides decision (future)." ) # ─── Public entry — composition pipeline ─────────────────────── def plan_composition(sections, v4_lookup_fn, v4_label_to_status: dict, allowed_statuses: set[str], capacity_fit_fn=None, v4_candidates_lookup_fn=None, *, allow_provisional_fill: bool = False) -> tuple[list[CompositionUnit], Optional[str], dict]: """Composition planner v0.2 entry. v0.2 변경 : - capacity_fit_fn 주입 시 모든 candidate 에 capacity 사전 검사 (silent truncate / mapper FitError 사전 차단). 불일치 시 auto_selectable=False + filter_reason 'C1: ...'. Step 6-A axis (사용자 lock 2026-05-08) : - v4_candidates_lookup_fn 주입 시 모든 CompositionUnit 에 v4_candidates 채움. logic 변화 X — 단일 frame_template_id / frame_id / label / confidence 는 그대로. runtime 결과 무변. Step 9 application_plan input 위한 schema 확장. IMP-30 u3 — last-resort provisional fill (opt-in, default off): ``allow_provisional_fill`` is plumbed to select_composition_units(). When True, uncovered sections receive a provisional fill from candidates whose backing V4Match was synthesized via ``allow_provisional=True`` (IMP-30 u1). ``_candidate_state`` returns ``selected_provisional`` for those filled units so the debug summary distinguishes greedy selections from provisional fills. Default False keeps IMP-05 behavior identical. v0.1 / v0.1.1 동작 (유지) : - parent_merged_inferred candidate 생성 (parent V4 없어도) - review 개념 X. auto_selectable + filter_reasons 만으로 자동 결정 - selection : score desc + coverage 우세 tiebreak Returns: units : 자동 선택된 composition units layout_preset : 8 vocabulary 중 하나 (또는 None) debug : 후보 전체 + capacity_fit + filter_reasons + preset 결정 근거 """ candidates = collect_candidates( sections, v4_lookup_fn, v4_label_to_status, auto_renderable_statuses=allowed_statuses, capacity_fit_fn=capacity_fit_fn, v4_candidates_lookup_fn=v4_candidates_lookup_fn, ) scored_all = [score_candidate(c) for c in candidates] units = select_composition_units( candidates, allowed_statuses, all_section_ids=[s.section_id for s in sections] if allow_provisional_fill else None, allow_provisional_fill=allow_provisional_fill, ) preset = select_layout_preset(units) def _candidate_state(c: CompositionUnit) -> str: if c in units: # IMP-30 u3 — provisional-fill units surface as a distinct state so # downstream debug consumers can tell greedy selection apart from # last-resort fill. unit.provisional flows from u1 (V4Match # synthesis) → u2 (CompositionUnit propagation). if c.provisional: return "selected_provisional" return "selected" if c.phase_z_status not in allowed_statuses: return "filtered_status" # V4 label → status not auto-renderable if not c.auto_selectable: # filter_reasons prefix 로 capacity 와 weak 구분 if any(r.startswith("C") for r in c.filter_reasons): return "filtered_capacity" # C1 (capacity mismatch) return "filtered_weak" # W1/W2/W3 (parent_merged_inferred only) return "filtered_lost" # viable 였지만 coverage 충돌로 밀림 candidates_summary = [ { "source_section_ids": c.source_section_ids, "merge_type": c.merge_type, "template_id": c.frame_template_id, "label": c.label, "phase_z_status": c.phase_z_status, "v4_rank": c.v4_rank, "selection_path": c.selection_path, "fallback_reason": c.fallback_reason, "score": c.score, "selection_state": _candidate_state(c), "auto_selectable": c.auto_selectable, "filter_reasons": list(c.filter_reasons), "notes": list(c.notes), "capacity_fit": c.rationale.get("capacity_fit"), } for c in scored_all ] merge_candidates = [ s for s in candidates_summary if s["merge_type"] in {"parent_merged", "parent_merged_inferred"} ] capacity_mismatches = [ s for s in candidates_summary if s["selection_state"] == "filtered_capacity" ] debug = { "planner_version": "v0.2", "selection_rule": ( "score desc, then source_section_ids count desc (coverage tiebreak). " "filter = phase_z_status ∉ allowed_statuses OR auto_selectable=False. " "auto_selectable=False 사유 : C1 (capacity mismatch — silent truncate / FitError 차단), " "W1 (rep not auto-renderable), W2 (all children reject), W3 (majority children non-auto-renderable)." ), "candidates_total": len(scored_all), "candidates_viable_auto": len([ c for c in scored_all if c.phase_z_status in allowed_statuses and c.auto_selectable ]), "candidates_summary": candidates_summary, "merge_candidates": merge_candidates, "capacity_mismatches": capacity_mismatches, "selected_units_count": len(units), "layout_preset": preset, "layout_preset_rationale": ( f"v0 count-based: {len(units)} units → {preset}" if preset else "no viable units" ), } return units, preset, debug