Files
C.E.L_Slide_test2/src/phase_z2_internal_region_planner.py
kyeongmin 761a43da5e Add Phase Z B4 source-shape-aware placement
- enable B1/B2/B4 source-shape-aware F13 placement behind env flag
- align F13 placement_trace with mapper top_bullets cardinality
- preserve canonical render output when flag is off
2026-05-07 05:26:57 +09:00

443 lines
18 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""Phase Z-2 Internal Region planner (B2 v0 — dormant module).
SPEC v1 §2 의 Layer A planner — ContentObject[] → InternalRegion[] + region_layout.
v0 minimal :
- 지원 case : text_block only / text_block + transform_table 2 가지
- 3-way decision : whole + split (group merge 미지원 — 별 axis)
- topology vocabulary 출력 : `region-single` + `region-vertical-stack` 2 entry 만
- SPEC v1 §2.5 algorithm = rule 1 + rule 6 만 구현 (rules 2~5 명시 deferred)
- role 할당 : type 기반 (text_block → primary / transform_table → supporting)
- split 결정 : distinct content type 기준 (같은 type 만 → single region)
- frame_match_strategy : kind="frame_match" / frame_id=None (Step 9 / B4 책임)
- dormant — runtime path 미연결 (pipeline / composition / mapper 미터치)
책임 boundary :
- B2 = region 생성 (split / role / ratio / topology)
- Step 9 / B4 = frame compatibility / frame selection / display strategy
- accepted_content_types 기반 compatibility 판단은 B2 책임 *아님*
frame_contracts 인자 :
- signature 에 둠 (future hook). v0 에서는 *output 결정에 사용 X*.
- 향후 display_only 활성화 / B4 통합 시 hook 자리.
검증 :
- dormancy : MDX 03 final.html SHA = canonical 유지 (runtime path 미연결)
- correctness : __main__ self-test (text-only 1 case + text+transform 1 case)
"""
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Any, Optional
# B2 v0 input contract = B1 의 ContentObject (phase_z2_content_extractor).
# 두 module 모두 dormant — runtime path 와 무관한 *layer-agnostic 의존*.
from phase_z2_content_extractor import ContentObject
# ─── Constants (B2 v0 lock) ──────────────────────────────────────
# transform_table 의 size proxy 환산 계수 (SPEC v1 §2.4 size proxy).
# pair 1 개 = 1.5 line 등가 (heuristic, content 기반 ratio 산정용).
# 정밀화는 향후 axis (visual_hints / content density signal).
_PAIR_HEIGHT_FACTOR = 1.5
# ─── Output schema (SPEC v1 §2.1 + §2.5) ─────────────────────────
@dataclass
class InternalRegion:
"""SPEC v1 §2.1 Internal Region entity schema.
Fields :
region_id : zone 내 unique id (예: '{section_id}.region-1')
role : 'primary' | 'supporting' | (B2 v0 = 두 개만 사용)
content_type : 'text_block' | 'transform_table' (v0 supported)
ratio_estimate : zone 내 비율 (sum normalize = 1.0)
content_unit_ids : 본 region 에 묶인 content_object id list
frame_match_strategy : {kind, frame_id, display_strategy}
— B2 v0 에서 kind="frame_match" / frame_id=None /
display_strategy="inline_full" 고정.
실제 frame 결정은 Step 9 / B4 책임.
source_shape_index : positional index from B1 source_shape split (Option 1, optional)
"""
region_id: str
role: str
content_type: str
ratio_estimate: float
content_unit_ids: list[str]
frame_match_strategy: dict
source_shape_index: Optional[int] = None
@dataclass
class RegionLayout:
"""SPEC v1 §2.5 region_layout — zone 내 region 들의 *공간 배치 패턴*.
B2 v0 가 출력하는 vocabulary :
- 'region-single' (rule 1 — region_count == 1)
- 'region-vertical-stack' (rule 6 fallback — 그 외)
rules 2~5 (region-preview-details / region-grid-2x2 / region-main-support /
region-horizontal-split) 는 SPEC 정의 있으나 *B2 v0 deferred*.
"""
region_layout_type: str # 'region-single' | 'region-vertical-stack'
region_order: list[str] # region_id 의 배치 순서
region_placement: str # 'single' | 'vertical'
@dataclass
class ZoneRegionPlan:
"""B2 v0 의 출력 — 1 zone 의 region 분할 결과 + layout.
Fields :
internal_regions : list[InternalRegion]
region_layout : RegionLayout
"""
internal_regions: list[InternalRegion] = field(default_factory=list)
region_layout: Optional[RegionLayout] = None
# ─── Helpers ─────────────────────────────────────────────────────
# B2 v0 지원 type 별 (role, size proxy 추출 함수).
# - text_block : role=primary, size = size_estimate.line_count
# - transform_table : role=supporting, size = size_estimate.rows × _PAIR_HEIGHT_FACTOR
_TYPE_ROLE: dict[str, str] = {
"text_block": "primary",
"transform_table": "supporting",
}
def _size_proxy(obj: ContentObject) -> float:
"""Content object 의 *공간 크기 proxy* (SPEC v1 §2.4).
text_block : line_count
transform_table : rows × _PAIR_HEIGHT_FACTOR
그 외 : 0 (B2 v0 미지원 type)
"""
if obj.type == "text_block":
return float(obj.size_estimate.get("line_count", 0))
if obj.type == "transform_table":
return float(obj.size_estimate.get("rows", 0)) * _PAIR_HEIGHT_FACTOR
return 0.0
def _group_by_type_preserving_order(
content_objects: list[ContentObject],
) -> dict[str, list[ContentObject]]:
"""content_objects 를 type 별로 grouping. 등장 순서 보존 (dict 의 ordered 특성)."""
groups: dict[str, list[ContentObject]] = {}
for obj in content_objects:
groups.setdefault(obj.type, []).append(obj)
return groups
# region_order 결정 시 type 우선순위 — primary 먼저, supporting 다음.
# B2 v0 type 만 등록. 향후 axis 에서 secondary / reference 추가 가능.
_TYPE_ORDER_PRIORITY: dict[str, int] = {
"text_block": 0, # primary
"transform_table": 1, # supporting
}
# ─── Option 1 source_shape-aware planner ─────────────────────────
def _plan_by_source_shape_index(
content_objects: list[ContentObject],
section_id: str,
) -> ZoneRegionPlan:
"""source_shape_index 기준 *positional* region grouping (Option 1).
같은 source_shape_index 끼리 1 region. mapper 의 split_source 와 cardinality align —
F13 의 top_bullets 3 개 → 3 region 으로 mapper pillar_1/2/3 와 1:1 positional.
"""
groups: dict[int, list[ContentObject]] = {}
for obj in content_objects:
if obj.source_shape_index is None:
continue
groups.setdefault(obj.source_shape_index, []).append(obj)
sorted_indices = sorted(groups.keys())
# size proxy + ratio (positional region 내부 size_estimate 합산)
index_sizes: dict[int, float] = {idx: sum(_size_proxy(o) for o in groups[idx]) for idx in sorted_indices}
total_size = sum(index_sizes.values())
if total_size <= 0:
equal_share = 1.0 / max(len(sorted_indices), 1)
index_sizes = {idx: equal_share for idx in sorted_indices}
total_size = sum(index_sizes.values()) or 1.0
regions: list[InternalRegion] = []
for ord_idx, sidx in enumerate(sorted_indices, start=1):
objs = groups[sidx]
# role / content_type : group 내 첫 obj 의 type 기반 (Option 1 pilot = text_block 동질)
primary_obj = objs[0]
ctype = primary_obj.type
regions.append(
InternalRegion(
region_id=f"{section_id}.region-{ord_idx}",
role=_TYPE_ROLE.get(ctype, "primary"),
content_type=ctype,
ratio_estimate=round(index_sizes[sidx] / total_size, 4),
content_unit_ids=[o.id for o in objs],
frame_match_strategy={
"kind": "frame_match",
"frame_id": None,
"display_strategy": "inline_full",
},
source_shape_index=sidx,
)
)
region_count = len(regions)
if region_count == 1:
layout_type = "region-single"
placement = "single"
else:
layout_type = "region-vertical-stack"
placement = "vertical"
region_order = [r.region_id for r in regions]
return ZoneRegionPlan(
internal_regions=regions,
region_layout=RegionLayout(
region_layout_type=layout_type,
region_order=region_order,
region_placement=placement,
),
)
# ─── Public entry ────────────────────────────────────────────────
def plan_internal_regions(
content_objects: list[ContentObject],
frame_contracts: Optional[list[dict[str, Any]]] = None, # v0 unused, future hook
section_id: str = "",
) -> ZoneRegionPlan:
"""ContentObject[] → ZoneRegionPlan (region 분할 + topology + ratio + role).
B2 v0 algorithm :
1. content_objects 를 type 별로 grouping (등장 순서 보존)
2. distinct type 수 → region_count 결정 (split 결정)
3. region 별 :
- role = type 기반 (_TYPE_ROLE)
- ratio_estimate = type 내 size proxy 합 / 전체 합 (normalize=1.0)
- frame_match_strategy = {kind: 'frame_match', frame_id: None,
display_strategy: 'inline_full'} (Step 9 / B4 영역)
4. topology vocabulary 결정 — SPEC v1 §2.5 :
- rule 1 : region_count == 1 → region-single
- rules 2~5 : *deferred* (SPEC 정의만, B2 v0 미구현)
- rule 6 fallback : 그 외 → region-vertical-stack
5. region_order = type priority (primary → supporting) 순.
Args :
content_objects : list[ContentObject] — B1 v0 extractor 출력
frame_contracts : v0 unused (future hook). signature 에 두되 output 결정에 미사용.
section_id : region_id 생성용 prefix
Returns :
ZoneRegionPlan (1 zone 의 plan, singular).
Note :
- frame_contracts 무시 — 본 v0 는 *frame compatibility 판단 안 함*.
compatibility 판단은 Step 9 / B4 책임.
- empty content_objects → empty plan (region_layout=None) — caller 가 사전 차단 권장.
"""
if not content_objects:
return ZoneRegionPlan()
# Option 1 source_shape-aware path : ContentObjects 가 source_shape_index 보유 시 *positional*
# grouping. 같은 index 끼리 1 region. mapper 의 split_source 와 cardinality align.
if any(o.source_shape_index is not None for o in content_objects):
return _plan_by_source_shape_index(content_objects, section_id)
# 1. type 별 grouping
groups = _group_by_type_preserving_order(content_objects)
# 2. region 별 size proxy 합 + 전체 합
type_sizes: dict[str, float] = {}
for ctype, objs in groups.items():
type_sizes[ctype] = sum(_size_proxy(o) for o in objs)
total_size = sum(type_sizes.values())
if total_size <= 0:
# 모든 size proxy = 0 인 edge case (예: 빈 content) → equal split fallback
equal_share = 1.0 / max(len(groups), 1)
for ctype in groups:
type_sizes[ctype] = equal_share
total_size = sum(type_sizes.values()) or 1.0
# 3. region 생성 (type 우선순위 순으로)
sorted_types = sorted(
groups.keys(),
key=lambda t: _TYPE_ORDER_PRIORITY.get(t, 99),
)
regions: list[InternalRegion] = []
for idx, ctype in enumerate(sorted_types, start=1):
objs = groups[ctype]
ratio = type_sizes[ctype] / total_size
regions.append(
InternalRegion(
region_id=f"{section_id}.region-{idx}",
role=_TYPE_ROLE.get(ctype, "primary"), # 미지원 type fallback = primary
content_type=ctype,
ratio_estimate=round(ratio, 4),
content_unit_ids=[o.id for o in objs],
frame_match_strategy={
"kind": "frame_match",
"frame_id": None, # Step 9 / B4 영역
"display_strategy": "inline_full", # v0 default
},
)
)
# 4. topology vocabulary 결정 (SPEC v1 §2.5 algorithm — rule 1 + rule 6 만)
region_count = len(regions)
if region_count == 1:
# rule 1
layout_type = "region-single"
placement = "single"
else:
# rules 2~5 = B2 v0 deferred (SPEC 정의만, 미구현) :
# - rule 2 region-preview-details : details_presence path 미구현
# - rule 3 region-grid-2x2 : 4 region 미지원
# - rule 4 region-main-support : role asymmetric trigger 미구현
# - rule 5 region-horizontal-split : visual element type 미지원
# rule 6 fallback
layout_type = "region-vertical-stack"
placement = "vertical"
# 5. region_order = 위 sorted_types 순 (primary → supporting)
region_order = [r.region_id for r in regions]
return ZoneRegionPlan(
internal_regions=regions,
region_layout=RegionLayout(
region_layout_type=layout_type,
region_order=region_order,
region_placement=placement,
),
)
# ─── Self-test (B2 v0 correctness 검증) ─────────────────────────
def _run_self_test():
"""v0 unit test : text-only 1 case + text+transform 1 case.
scope-lock 의 검증 (b) correctness — planner 정확성 확인.
fixed input 기반, MDX 01/02/04 미사용.
"""
# ─── Test 1 : text-only (1 ContentObject) ────────────────────
text_obj = ContentObject(
id="test-1.text-1",
type="text_block",
role="summary",
raw_payload="* 본문\n * nested",
size_estimate={"line_count": 6},
type_specific={"format": "nested_list", "bullet_count": 1, "max_indent_level": 1, "has_emphasis": False},
)
plan1 = plan_internal_regions([text_obj], section_id="test-1")
assert plan1.region_layout is not None
assert plan1.region_layout.region_layout_type == "region-single", \
f"text-only → region-single 기대, got {plan1.region_layout.region_layout_type}"
assert plan1.region_layout.region_placement == "single"
assert len(plan1.internal_regions) == 1, f"1 region 기대, got {len(plan1.internal_regions)}"
r = plan1.internal_regions[0]
assert r.region_id == "test-1.region-1"
assert r.role == "primary", f"text-only role=primary 기대, got {r.role}"
assert r.content_type == "text_block"
assert r.ratio_estimate == 1.0, f"단일 region ratio=1.0 기대, got {r.ratio_estimate}"
assert r.content_unit_ids == ["test-1.text-1"]
assert r.frame_match_strategy["kind"] == "frame_match"
assert r.frame_match_strategy["frame_id"] is None, "B2 v0 frame_id=None lock"
assert r.frame_match_strategy["display_strategy"] == "inline_full"
assert plan1.region_layout.region_order == ["test-1.region-1"]
print("[OK] Test 1 (text-only) passed.")
# ─── Test 2 : text + transform_table (2 ContentObject) ────────
text_obj2 = ContentObject(
id="test-2.text-1",
type="text_block",
role="summary",
raw_payload="* 본문",
size_estimate={"line_count": 6},
type_specific={"format": "bullet_list", "bullet_count": 1, "max_indent_level": 0, "has_emphasis": False},
)
transform_obj = ContentObject(
id="test-2.transform-1",
type="transform_table",
role="summary",
raw_payload="| AS-IS | ➜ | TO-BE |\n|---|---|---|\n| a | ➜ | b |\n| c | ➜ | d |",
size_estimate={"rows": 2},
type_specific={"pair_count": 2, "arrow_glyph": "", "rows": [
{"from": "a", "arrow": "", "to": "b"},
{"from": "c", "arrow": "", "to": "d"},
]},
)
plan2 = plan_internal_regions([text_obj2, transform_obj], section_id="test-2")
assert plan2.region_layout is not None
assert plan2.region_layout.region_layout_type == "region-vertical-stack", \
f"text+transform → region-vertical-stack (rule 6 fallback) 기대, got {plan2.region_layout.region_layout_type}"
assert plan2.region_layout.region_placement == "vertical"
assert len(plan2.internal_regions) == 2, f"2 region 기대, got {len(plan2.internal_regions)}"
# region_order = primary first (text), supporting second (transform)
assert plan2.region_layout.region_order == ["test-2.region-1", "test-2.region-2"]
# text region (region-1, primary)
text_r = plan2.internal_regions[0]
assert text_r.region_id == "test-2.region-1"
assert text_r.role == "primary"
assert text_r.content_type == "text_block"
# ratio : 6 / (6 + 2*1.5) = 6/9 ≈ 0.667
expected_text_ratio = 6.0 / (6.0 + 2.0 * 1.5)
assert abs(text_r.ratio_estimate - expected_text_ratio) < 0.001, \
f"text ratio {expected_text_ratio:.4f} 기대, got {text_r.ratio_estimate}"
assert text_r.content_unit_ids == ["test-2.text-1"]
assert text_r.frame_match_strategy["kind"] == "frame_match"
assert text_r.frame_match_strategy["frame_id"] is None
# transform region (region-2, supporting)
tr_r = plan2.internal_regions[1]
assert tr_r.region_id == "test-2.region-2"
assert tr_r.role == "supporting", f"transform_table role=supporting 기대, got {tr_r.role}"
assert tr_r.content_type == "transform_table"
expected_tr_ratio = (2.0 * 1.5) / (6.0 + 2.0 * 1.5)
assert abs(tr_r.ratio_estimate - expected_tr_ratio) < 0.001, \
f"transform ratio {expected_tr_ratio:.4f} 기대, got {tr_r.ratio_estimate}"
assert tr_r.content_unit_ids == ["test-2.transform-1"]
assert tr_r.frame_match_strategy["frame_id"] is None
# ratio sum normalize = 1.0
ratio_sum = text_r.ratio_estimate + tr_r.ratio_estimate
assert abs(ratio_sum - 1.0) < 0.01, f"ratio sum=1.0 기대, got {ratio_sum}"
# frame_contracts 인자 unused 검증 — None 으로 호출 / dict 으로 호출 결과 동일해야 함
plan2_with_contracts = plan_internal_regions(
[text_obj2, transform_obj],
frame_contracts=[{"template_id": "dummy", "accepted_content_types": ["text_block"]}],
section_id="test-2",
)
assert plan2_with_contracts.region_layout.region_layout_type == plan2.region_layout.region_layout_type
assert len(plan2_with_contracts.internal_regions) == len(plan2.internal_regions)
print("[OK] Test 2 (text+transform, vertical-stack, ratio 6:3) passed.")
print("\n=== B2 v0 self-test PASS ===")
if __name__ == "__main__":
_run_self_test()