"""Phase Z-2 Internal Region planner (B2 v0 — dormant module). SPEC v1 §2 의 Layer A planner — ContentObject[] → InternalRegion[] + region_layout. v0 minimal : - 지원 case : text_block only / text_block + transform_table 2 가지 - 3-way decision : whole + split (group merge 미지원 — 별 axis) - topology vocabulary 출력 : `region-single` + `region-vertical-stack` 2 entry 만 - SPEC v1 §2.5 algorithm = rule 1 + rule 6 만 구현 (rules 2~5 명시 deferred) - role 할당 : type 기반 (text_block → primary / transform_table → supporting) - split 결정 : distinct content type 기준 (같은 type 만 → single region) - frame_match_strategy : kind="frame_match" / frame_id=None (Step 9 / B4 책임) - dormant — runtime path 미연결 (pipeline / composition / mapper 미터치) 책임 boundary : - B2 = region 생성 (split / role / ratio / topology) - Step 9 / B4 = frame compatibility / frame selection / display strategy - accepted_content_types 기반 compatibility 판단은 B2 책임 *아님* frame_contracts 인자 : - signature 에 둠 (future hook). v0 에서는 *output 결정에 사용 X*. - 향후 display_only 활성화 / B4 통합 시 hook 자리. 검증 : - dormancy : MDX 03 final.html SHA = canonical 유지 (runtime path 미연결) - correctness : __main__ self-test (text-only 1 case + text+transform 1 case) """ from __future__ import annotations from dataclasses import dataclass, field from typing import Any, Optional # B2 v0 input contract = B1 의 ContentObject (phase_z2_content_extractor). # 두 module 모두 dormant — runtime path 와 무관한 *layer-agnostic 의존*. from phase_z2_content_extractor import ContentObject # ─── Constants (B2 v0 lock) ────────────────────────────────────── # transform_table 의 size proxy 환산 계수 (SPEC v1 §2.4 size proxy). # pair 1 개 = 1.5 line 등가 (heuristic, content 기반 ratio 산정용). # 정밀화는 향후 axis (visual_hints / content density signal). _PAIR_HEIGHT_FACTOR = 1.5 # ─── Output schema (SPEC v1 §2.1 + §2.5) ───────────────────────── @dataclass class InternalRegion: """SPEC v1 §2.1 Internal Region entity schema. Fields : region_id : zone 내 unique id (예: '{section_id}.region-1') role : 'primary' | 'supporting' | (B2 v0 = 두 개만 사용) content_type : 'text_block' | 'transform_table' (v0 supported) ratio_estimate : zone 내 비율 (sum normalize = 1.0) content_unit_ids : 본 region 에 묶인 content_object id list frame_match_strategy : {kind, frame_id, display_strategy} — B2 v0 에서 kind="frame_match" / frame_id=None / display_strategy="inline_full" 고정. 실제 frame 결정은 Step 9 / B4 책임. source_shape_index : positional index from B1 source_shape split (Option 1, optional) """ region_id: str role: str content_type: str ratio_estimate: float content_unit_ids: list[str] frame_match_strategy: dict source_shape_index: Optional[int] = None @dataclass class RegionLayout: """SPEC v1 §2.5 region_layout — zone 내 region 들의 *공간 배치 패턴*. B2 v0 가 출력하는 vocabulary : - 'region-single' (rule 1 — region_count == 1) - 'region-vertical-stack' (rule 6 fallback — 그 외) rules 2~5 (region-preview-details / region-grid-2x2 / region-main-support / region-horizontal-split) 는 SPEC 정의 있으나 *B2 v0 deferred*. """ region_layout_type: str # 'region-single' | 'region-vertical-stack' region_order: list[str] # region_id 의 배치 순서 region_placement: str # 'single' | 'vertical' @dataclass class ZoneRegionPlan: """B2 v0 의 출력 — 1 zone 의 region 분할 결과 + layout. Fields : internal_regions : list[InternalRegion] region_layout : RegionLayout """ internal_regions: list[InternalRegion] = field(default_factory=list) region_layout: Optional[RegionLayout] = None # ─── Helpers ───────────────────────────────────────────────────── # B2 v0 지원 type 별 (role, size proxy 추출 함수). # - text_block : role=primary, size = size_estimate.line_count # - transform_table : role=supporting, size = size_estimate.rows × _PAIR_HEIGHT_FACTOR _TYPE_ROLE: dict[str, str] = { "text_block": "primary", "transform_table": "supporting", } def _size_proxy(obj: ContentObject) -> float: """Content object 의 *공간 크기 proxy* (SPEC v1 §2.4). text_block : line_count transform_table : rows × _PAIR_HEIGHT_FACTOR 그 외 : 0 (B2 v0 미지원 type) """ if obj.type == "text_block": return float(obj.size_estimate.get("line_count", 0)) if obj.type == "transform_table": return float(obj.size_estimate.get("rows", 0)) * _PAIR_HEIGHT_FACTOR return 0.0 def _group_by_type_preserving_order( content_objects: list[ContentObject], ) -> dict[str, list[ContentObject]]: """content_objects 를 type 별로 grouping. 등장 순서 보존 (dict 의 ordered 특성).""" groups: dict[str, list[ContentObject]] = {} for obj in content_objects: groups.setdefault(obj.type, []).append(obj) return groups # region_order 결정 시 type 우선순위 — primary 먼저, supporting 다음. # B2 v0 type 만 등록. 향후 axis 에서 secondary / reference 추가 가능. _TYPE_ORDER_PRIORITY: dict[str, int] = { "text_block": 0, # primary "transform_table": 1, # supporting } # ─── Option 1 source_shape-aware planner ───────────────────────── def _plan_by_source_shape_index( content_objects: list[ContentObject], section_id: str, ) -> ZoneRegionPlan: """source_shape_index 기준 *positional* region grouping (Option 1). 같은 source_shape_index 끼리 1 region. mapper 의 split_source 와 cardinality align — F13 의 top_bullets 3 개 → 3 region 으로 mapper pillar_1/2/3 와 1:1 positional. """ groups: dict[int, list[ContentObject]] = {} for obj in content_objects: if obj.source_shape_index is None: continue groups.setdefault(obj.source_shape_index, []).append(obj) sorted_indices = sorted(groups.keys()) # size proxy + ratio (positional region 내부 size_estimate 합산) index_sizes: dict[int, float] = {idx: sum(_size_proxy(o) for o in groups[idx]) for idx in sorted_indices} total_size = sum(index_sizes.values()) if total_size <= 0: equal_share = 1.0 / max(len(sorted_indices), 1) index_sizes = {idx: equal_share for idx in sorted_indices} total_size = sum(index_sizes.values()) or 1.0 regions: list[InternalRegion] = [] for ord_idx, sidx in enumerate(sorted_indices, start=1): objs = groups[sidx] # role / content_type : group 내 첫 obj 의 type 기반 (Option 1 pilot = text_block 동질) primary_obj = objs[0] ctype = primary_obj.type regions.append( InternalRegion( region_id=f"{section_id}.region-{ord_idx}", role=_TYPE_ROLE.get(ctype, "primary"), content_type=ctype, ratio_estimate=round(index_sizes[sidx] / total_size, 4), content_unit_ids=[o.id for o in objs], frame_match_strategy={ "kind": "frame_match", "frame_id": None, "display_strategy": "inline_full", }, source_shape_index=sidx, ) ) region_count = len(regions) if region_count == 1: layout_type = "region-single" placement = "single" else: layout_type = "region-vertical-stack" placement = "vertical" region_order = [r.region_id for r in regions] return ZoneRegionPlan( internal_regions=regions, region_layout=RegionLayout( region_layout_type=layout_type, region_order=region_order, region_placement=placement, ), ) # ─── Public entry ──────────────────────────────────────────────── def plan_internal_regions( content_objects: list[ContentObject], frame_contracts: Optional[list[dict[str, Any]]] = None, # v0 unused, future hook section_id: str = "", ) -> ZoneRegionPlan: """ContentObject[] → ZoneRegionPlan (region 분할 + topology + ratio + role). B2 v0 algorithm : 1. content_objects 를 type 별로 grouping (등장 순서 보존) 2. distinct type 수 → region_count 결정 (split 결정) 3. region 별 : - role = type 기반 (_TYPE_ROLE) - ratio_estimate = type 내 size proxy 합 / 전체 합 (normalize=1.0) - frame_match_strategy = {kind: 'frame_match', frame_id: None, display_strategy: 'inline_full'} (Step 9 / B4 영역) 4. topology vocabulary 결정 — SPEC v1 §2.5 : - rule 1 : region_count == 1 → region-single - rules 2~5 : *deferred* (SPEC 정의만, B2 v0 미구현) - rule 6 fallback : 그 외 → region-vertical-stack 5. region_order = type priority (primary → supporting) 순. Args : content_objects : list[ContentObject] — B1 v0 extractor 출력 frame_contracts : v0 unused (future hook). signature 에 두되 output 결정에 미사용. section_id : region_id 생성용 prefix Returns : ZoneRegionPlan (1 zone 의 plan, singular). Note : - frame_contracts 무시 — 본 v0 는 *frame compatibility 판단 안 함*. compatibility 판단은 Step 9 / B4 책임. - empty content_objects → empty plan (region_layout=None) — caller 가 사전 차단 권장. """ if not content_objects: return ZoneRegionPlan() # Option 1 source_shape-aware path : ContentObjects 가 source_shape_index 보유 시 *positional* # grouping. 같은 index 끼리 1 region. mapper 의 split_source 와 cardinality align. if any(o.source_shape_index is not None for o in content_objects): return _plan_by_source_shape_index(content_objects, section_id) # 1. type 별 grouping groups = _group_by_type_preserving_order(content_objects) # 2. region 별 size proxy 합 + 전체 합 type_sizes: dict[str, float] = {} for ctype, objs in groups.items(): type_sizes[ctype] = sum(_size_proxy(o) for o in objs) total_size = sum(type_sizes.values()) if total_size <= 0: # 모든 size proxy = 0 인 edge case (예: 빈 content) → equal split fallback equal_share = 1.0 / max(len(groups), 1) for ctype in groups: type_sizes[ctype] = equal_share total_size = sum(type_sizes.values()) or 1.0 # 3. region 생성 (type 우선순위 순으로) sorted_types = sorted( groups.keys(), key=lambda t: _TYPE_ORDER_PRIORITY.get(t, 99), ) regions: list[InternalRegion] = [] for idx, ctype in enumerate(sorted_types, start=1): objs = groups[ctype] ratio = type_sizes[ctype] / total_size regions.append( InternalRegion( region_id=f"{section_id}.region-{idx}", role=_TYPE_ROLE.get(ctype, "primary"), # 미지원 type fallback = primary content_type=ctype, ratio_estimate=round(ratio, 4), content_unit_ids=[o.id for o in objs], frame_match_strategy={ "kind": "frame_match", "frame_id": None, # Step 9 / B4 영역 "display_strategy": "inline_full", # v0 default }, ) ) # 4. topology vocabulary 결정 (SPEC v1 §2.5 algorithm — rule 1 + rule 6 만) region_count = len(regions) if region_count == 1: # rule 1 layout_type = "region-single" placement = "single" else: # rules 2~5 = B2 v0 deferred (SPEC 정의만, 미구현) : # - rule 2 region-preview-details : details_presence path 미구현 # - rule 3 region-grid-2x2 : 4 region 미지원 # - rule 4 region-main-support : role asymmetric trigger 미구현 # - rule 5 region-horizontal-split : visual element type 미지원 # rule 6 fallback layout_type = "region-vertical-stack" placement = "vertical" # 5. region_order = 위 sorted_types 순 (primary → supporting) region_order = [r.region_id for r in regions] return ZoneRegionPlan( internal_regions=regions, region_layout=RegionLayout( region_layout_type=layout_type, region_order=region_order, region_placement=placement, ), ) # ─── Self-test (B2 v0 correctness 검증) ───────────────────────── def _run_self_test(): """v0 unit test : text-only 1 case + text+transform 1 case. scope-lock 의 검증 (b) correctness — planner 정확성 확인. fixed input 기반, MDX 01/02/04 미사용. """ # ─── Test 1 : text-only (1 ContentObject) ──────────────────── text_obj = ContentObject( id="test-1.text-1", type="text_block", role="summary", raw_payload="* 본문\n * nested", size_estimate={"line_count": 6}, type_specific={"format": "nested_list", "bullet_count": 1, "max_indent_level": 1, "has_emphasis": False}, ) plan1 = plan_internal_regions([text_obj], section_id="test-1") assert plan1.region_layout is not None assert plan1.region_layout.region_layout_type == "region-single", \ f"text-only → region-single 기대, got {plan1.region_layout.region_layout_type}" assert plan1.region_layout.region_placement == "single" assert len(plan1.internal_regions) == 1, f"1 region 기대, got {len(plan1.internal_regions)}" r = plan1.internal_regions[0] assert r.region_id == "test-1.region-1" assert r.role == "primary", f"text-only role=primary 기대, got {r.role}" assert r.content_type == "text_block" assert r.ratio_estimate == 1.0, f"단일 region ratio=1.0 기대, got {r.ratio_estimate}" assert r.content_unit_ids == ["test-1.text-1"] assert r.frame_match_strategy["kind"] == "frame_match" assert r.frame_match_strategy["frame_id"] is None, "B2 v0 frame_id=None lock" assert r.frame_match_strategy["display_strategy"] == "inline_full" assert plan1.region_layout.region_order == ["test-1.region-1"] print("[OK] Test 1 (text-only) passed.") # ─── Test 2 : text + transform_table (2 ContentObject) ──────── text_obj2 = ContentObject( id="test-2.text-1", type="text_block", role="summary", raw_payload="* 본문", size_estimate={"line_count": 6}, type_specific={"format": "bullet_list", "bullet_count": 1, "max_indent_level": 0, "has_emphasis": False}, ) transform_obj = ContentObject( id="test-2.transform-1", type="transform_table", role="summary", raw_payload="| AS-IS | ➜ | TO-BE |\n|---|---|---|\n| a | ➜ | b |\n| c | ➜ | d |", size_estimate={"rows": 2}, type_specific={"pair_count": 2, "arrow_glyph": "➜", "rows": [ {"from": "a", "arrow": "➜", "to": "b"}, {"from": "c", "arrow": "➜", "to": "d"}, ]}, ) plan2 = plan_internal_regions([text_obj2, transform_obj], section_id="test-2") assert plan2.region_layout is not None assert plan2.region_layout.region_layout_type == "region-vertical-stack", \ f"text+transform → region-vertical-stack (rule 6 fallback) 기대, got {plan2.region_layout.region_layout_type}" assert plan2.region_layout.region_placement == "vertical" assert len(plan2.internal_regions) == 2, f"2 region 기대, got {len(plan2.internal_regions)}" # region_order = primary first (text), supporting second (transform) assert plan2.region_layout.region_order == ["test-2.region-1", "test-2.region-2"] # text region (region-1, primary) text_r = plan2.internal_regions[0] assert text_r.region_id == "test-2.region-1" assert text_r.role == "primary" assert text_r.content_type == "text_block" # ratio : 6 / (6 + 2*1.5) = 6/9 ≈ 0.667 expected_text_ratio = 6.0 / (6.0 + 2.0 * 1.5) assert abs(text_r.ratio_estimate - expected_text_ratio) < 0.001, \ f"text ratio {expected_text_ratio:.4f} 기대, got {text_r.ratio_estimate}" assert text_r.content_unit_ids == ["test-2.text-1"] assert text_r.frame_match_strategy["kind"] == "frame_match" assert text_r.frame_match_strategy["frame_id"] is None # transform region (region-2, supporting) tr_r = plan2.internal_regions[1] assert tr_r.region_id == "test-2.region-2" assert tr_r.role == "supporting", f"transform_table role=supporting 기대, got {tr_r.role}" assert tr_r.content_type == "transform_table" expected_tr_ratio = (2.0 * 1.5) / (6.0 + 2.0 * 1.5) assert abs(tr_r.ratio_estimate - expected_tr_ratio) < 0.001, \ f"transform ratio {expected_tr_ratio:.4f} 기대, got {tr_r.ratio_estimate}" assert tr_r.content_unit_ids == ["test-2.transform-1"] assert tr_r.frame_match_strategy["frame_id"] is None # ratio sum normalize = 1.0 ratio_sum = text_r.ratio_estimate + tr_r.ratio_estimate assert abs(ratio_sum - 1.0) < 0.01, f"ratio sum=1.0 기대, got {ratio_sum}" # frame_contracts 인자 unused 검증 — None 으로 호출 / dict 으로 호출 결과 동일해야 함 plan2_with_contracts = plan_internal_regions( [text_obj2, transform_obj], frame_contracts=[{"template_id": "dummy", "accepted_content_types": ["text_block"]}], section_id="test-2", ) assert plan2_with_contracts.region_layout.region_layout_type == plan2.region_layout.region_layout_type assert len(plan2_with_contracts.internal_regions) == len(plan2.internal_regions) print("[OK] Test 2 (text+transform, vertical-stack, ratio 6:3) passed.") print("\n=== B2 v0 self-test PASS ===") if __name__ == "__main__": _run_self_test()