From 3f843d73f728307a7fef41ee014e39cf06e66810 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Mon, 4 May 2026 09:34:49 +0900 Subject: [PATCH] Add Phase Z Layer A placement planner - add dormant placement planner integrating B1 / B2 / B3 - region 1:1 sub_zone mapping with narrowest-accepts-first heuristic - frame selection by accepted_content_types coverage + declaration order --- src/phase_z2_placement_planner.py | 387 ++++++++++++++++++++++++++++++ 1 file changed, 387 insertions(+) create mode 100644 src/phase_z2_placement_planner.py diff --git a/src/phase_z2_placement_planner.py b/src/phase_z2_placement_planner.py new file mode 100644 index 0000000..c123e3a --- /dev/null +++ b/src/phase_z2_placement_planner.py @@ -0,0 +1,387 @@ +"""Phase Z-2 Placement planner (B4 v0 — dormant module). + +SPEC v1 §4 의 2-stage placement (Layer A → Layer B) 통합 module. + +v0 minimal : + - 지원 case : text_block only / text_block + transform_table 2 가지 (B1/B2 정합) + - dormant — runtime path 미연결 (pipeline / composition / mapper / partial / yaml 미터치) + - Stage A = B2 plan_internal_regions() *호출만* (logic 중복 X) + - frame 선택 = accepted_content_types cover + frame_contracts 입력 순서 first + - Stage B 매핑 단위 = region 1:1 sub_zone (단순화) + - sub_zone 선택 = narrowest accepts first + declaration order tie-break (deadlock 방지) + - cardinality.strict 초과 → rejection / under-fill → 허용 + - display_strategy = 모두 inline_full / overflow_buffer = [] + - partial_target_path = sub_zone 으로부터 *읽어서 보존만* (실제 marker 미적용) + - F13 multi-pillar distribution 미지원 (1 ContentObject per region 만) + - V4 rank / multi-frame ranking / display_only path 미활성 + +책임 boundary : + - B4 = Stage A wrapping + frame 선택 + Stage B (region 1:1 sub_zone) 매핑 + - 별 axis = display_only path / preview·details 활성 / partial template marker / + telemetry 연동 / runtime pipeline 연결 + +검증 : + - dormancy : MDX 03 final.html SHA = canonical 유지 (runtime path 미연결) + - correctness : __main__ self-test (text-only 1 case + text+transform 1 case) +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from typing import Any, Optional + +# B4 v0 input contract = B1 (ContentObject) + B2 (InternalRegion / ZoneRegionPlan). +# 세 module 모두 dormant — runtime path 와 무관한 layer-agnostic 의존. +from phase_z2_content_extractor import ContentObject +from phase_z2_internal_region_planner import ( + InternalRegion, + ZoneRegionPlan, + plan_internal_regions, +) + + +# ─── Output schema (SPEC v1 §4.1) ──────────────────────────────── + + +@dataclass +class SlotAssignment: + """SPEC v1 §4.3 Stage B 의 slot_assignment. + + region 의 content_unit 이 frame 의 어느 Frame Slot 으로 가는지 명시. + partial_target_path = B3 catalog 의 sub_zone.partial_target_path 그대로 보존 + (실제 marker 적용은 별 axis B5 영역). + """ + + region_id: str + content_unit_id: str + frame_slot_id: str + partial_target_path: str + display_strategy: str # v0 = "inline_full" 만 + + +@dataclass +class PlacementPlan: + """B4 v0 의 출력 — section 의 *전체 placement* 결과 (Stage A + Stage B 통합). + + Fields : + section_id : section 식별자 (region_id prefix 와 일관) + selected_frame_id : frame.frame_id (or None — frame cover 실패 시) + selected_template_id : frame.template_id (or None) + internal_regions : Stage A 결과 (B2 planner 출력 그대로) + slot_assignments : Stage B 결과 (region.content_unit → Frame Slot) + overflow_buffer : v0 = 빈 list (preview/details path 미활성) + rejection : 매칭 안 된 / cardinality 초과 등 — 자동 렌더 X 신호 + """ + + section_id: str + selected_frame_id: Optional[str] = None + selected_template_id: Optional[str] = None + internal_regions: list[InternalRegion] = field(default_factory=list) + slot_assignments: list[SlotAssignment] = field(default_factory=list) + overflow_buffer: list[dict] = field(default_factory=list) + rejection: list[dict] = field(default_factory=list) + + +# ─── Frame selection ───────────────────────────────────────────── + + +def _select_frame( + content_objects: list[ContentObject], + frame_contracts: list[dict[str, Any]], +) -> Optional[dict[str, Any]]: + """frame_contracts 중 *content_type_set 을 모두 cover* 하는 첫 frame. + + rule (B4 v0 lock) : + 1. content_type_set = {obj.type for obj in content_objects} + 2. frame_contract.accepted_content_types ⊇ content_type_set 인 후보 모음 + 3. frame_contracts 입력 순서 (= YAML declaration order) 첫 entry 선택 + + Returns : + frame_contract dict 또는 None (cover 가능 frame 없음) + """ + content_type_set = {obj.type for obj in content_objects} + for fc in frame_contracts: + accepted = set(fc.get("accepted_content_types") or []) + if content_type_set <= accepted: # ⊇ check + return fc + return None + + +# ─── Sub_zone assignment (Stage B) ─────────────────────────────── + + +def _assign_region_to_sub_zone( + region: InternalRegion, + frame_sub_zones: list[dict[str, Any]], + assigned_sub_zone_ids: set[str], +) -> Optional[dict[str, Any]]: + """region 에 매칭할 sub_zone 선택 (B4 v0 narrowest-first heuristic). + + rule (B4 v0 lock — F29 deadlock 방지) : + 1. not-yet-assigned 중 region.content_type 을 accepts 하는 후보 수집 + 2. 후보 중 accepts list 가장 *좁은* sub_zone 우선 + 3. 동률이면 declaration order (Python sort 의 stability 활용) + + 예 (F29) : + region.content_type = text_block + candidates = [process_column(accepts=[text,transform], size 2), + product_column(accepts=[text], size 1)] + → product_column 선택 (narrowest) + + region.content_type = transform_table (이후 호출, product_column 이미 assigned) + candidates = [process_column] 만 + → process_column 선택 + + Returns : + sub_zone dict 또는 None (compatible 후보 없음) + """ + candidates: list[dict[str, Any]] = [] + for sz in frame_sub_zones: + if sz["id"] in assigned_sub_zone_ids: + continue + accepts = sz.get("accepts") or [] + if region.content_type in accepts: + candidates.append(sz) + + if not candidates: + return None + + # narrowest first — accepts size 작을수록 우선. Python sort stable → 동률은 declaration order 보존. + candidates.sort(key=lambda sz: len(sz.get("accepts") or [])) + return candidates[0] + + +# ─── Public entry ──────────────────────────────────────────────── + + +def plan_placement( + content_objects: list[ContentObject], + frame_contracts: list[dict[str, Any]], + section_id: str = "", +) -> PlacementPlan: + """ContentObject[] + frame_contracts → PlacementPlan (Stage A + Stage B 통합). + + v0 algorithm : + 1. Stage A = B2 plan_internal_regions() 호출 → internal_regions 획득 + 2. frame 선택 : accepted_content_types cover + 입력 순서 first + - cover 실패 시 → rejection + early return + 3. selected_frame 의 sub_zones 읽음 (B3 catalog) + 4. Stage B (region 1:1 sub_zone 매핑) : + - 각 region 마다 narrowest-accepts first + declaration order sub_zone 선택 + - region.content_unit_ids 를 sub_zone.cardinality 와 비교 + - count > strict → rejection 추가 / SlotAssignment 미생성 + - count ≤ strict → SlotAssignment 생성 (under-fill 허용) + - 매칭 sub_zone 없는 region → rejection 추가 + 5. display_strategy = inline_full 모두 / overflow_buffer = [] (v0) + + Args : + content_objects : list[ContentObject] — B1 v0 extractor 출력 + frame_contracts : list[dict] — frame_contracts.yaml 의 contract dict list + (YAML declaration order = list 순서로 입력 권고) + section_id : region_id / 결과 식별자 prefix + + Returns : + PlacementPlan + """ + plan = PlacementPlan(section_id=section_id) + + if not content_objects: + return plan + + # 1. Stage A — B2 호출 (logic 중복 X) + zone_plan: ZoneRegionPlan = plan_internal_regions( + content_objects=content_objects, + frame_contracts=frame_contracts, + section_id=section_id, + ) + plan.internal_regions = list(zone_plan.internal_regions) + + # 2. frame 선택 + selected_frame = _select_frame(content_objects, frame_contracts) + if selected_frame is None: + plan.rejection.append({ + "reason": "no_frame_covers_content_types", + "content_types": sorted({o.type for o in content_objects}), + }) + return plan + + plan.selected_template_id = selected_frame.get("template_id") + fid = selected_frame.get("frame_id") + plan.selected_frame_id = str(fid) if fid is not None else None + + # 3. selected_frame 의 sub_zones (B3 catalog 의 Frame Slot 선언) + sub_zones = list(selected_frame.get("sub_zones") or []) + + # 4. Stage B — region 1:1 sub_zone 매핑 + assigned_sub_zone_ids: set[str] = set() + + for region in plan.internal_regions: + sub_zone = _assign_region_to_sub_zone(region, sub_zones, assigned_sub_zone_ids) + if sub_zone is None: + plan.rejection.append({ + "reason": "no_compatible_sub_zone", + "region_id": region.region_id, + "region_content_type": region.content_type, + }) + continue + assigned_sub_zone_ids.add(sub_zone["id"]) + + # cardinality 검사 (v0 = strict only) + cardinality = sub_zone.get("cardinality") or {} + strict = cardinality.get("strict") + unit_count = len(region.content_unit_ids) + if strict is not None and unit_count > strict: + plan.rejection.append({ + "reason": "cardinality_strict_exceeded", + "region_id": region.region_id, + "frame_slot_id": sub_zone["id"], + "cardinality_strict": strict, + "unit_count": unit_count, + }) + continue + + # SlotAssignment 생성 (under-fill 허용 — strict 보다 적어도 OK) + partial_path = sub_zone.get("partial_target_path") or "" + for content_unit_id in region.content_unit_ids: + plan.slot_assignments.append( + SlotAssignment( + region_id=region.region_id, + content_unit_id=content_unit_id, + frame_slot_id=sub_zone["id"], + partial_target_path=partial_path, + display_strategy="inline_full", # v0 default + ) + ) + + # 5. v0 = overflow_buffer 미활성 (빈 list 그대로) + return plan + + +# ─── Self-test (B4 v0 correctness 검증) ───────────────────────── + + +def _run_self_test(): + """v0 unit test : Test 1 (text-only → F13) + Test 2 (text+transform → F29). + + fixed input + 실제 frame_contracts.yaml 로드해서 검증. + YAML declaration order = F13 / F29 / F16. + """ + import yaml + from pathlib import Path + + PROJECT_ROOT = Path(__file__).parent.parent + catalog_path = PROJECT_ROOT / "templates" / "phase_z2" / "catalog" / "frame_contracts.yaml" + catalog = yaml.safe_load(catalog_path.read_text(encoding="utf-8")) + # YAML 의 top-level dict — Python 3.7+ insertion-order 보존. declaration order = F13/F29/F16. + frame_contracts = list(catalog.values()) + + # ─── Test 1 : 1 text_block → F13 → pillar_1 ───────────────── + text_obj = ContentObject( + id="t1.text-1", + type="text_block", + role="summary", + raw_payload="* 본문", + size_estimate={"line_count": 6}, + type_specific={ + "format": "bullet_list", "bullet_count": 1, + "max_indent_level": 0, "has_emphasis": False, + }, + ) + plan1 = plan_placement([text_obj], frame_contracts, section_id="t1") + + # frame 선택 = F13 (declaration order first, content_type_set={text_block} cover) + assert plan1.selected_template_id == "three_parallel_requirements", \ + f"Test 1 frame=F13 기대, got {plan1.selected_template_id}" + assert plan1.selected_frame_id == "1171281190", \ + f"Test 1 frame_id=1171281190 기대, got {plan1.selected_frame_id}" + + # 1 region (region-single) + assert len(plan1.internal_regions) == 1, \ + f"Test 1 1 region 기대, got {len(plan1.internal_regions)}" + + # 1 SlotAssignment — region 1 → pillar_1 (declaration order tie-break, 모두 size=1) + assert len(plan1.slot_assignments) == 1, \ + f"Test 1 slot_assignments=1 기대, got {len(plan1.slot_assignments)}" + sa = plan1.slot_assignments[0] + assert sa.frame_slot_id == "pillar_1", \ + f"Test 1 sub_zone=pillar_1 기대, got {sa.frame_slot_id}" + assert sa.region_id == "t1.region-1" + assert sa.content_unit_id == "t1.text-1" + assert sa.display_strategy == "inline_full" + assert "f13b__col" in sa.partial_target_path, \ + f"Test 1 partial_target_path 보존 기대, got {sa.partial_target_path}" + + # under-fill 허용 — pillar_2 / pillar_3 미할당, rejection 0 + assert len(plan1.rejection) == 0, f"Test 1 rejection=0 기대, got {plan1.rejection}" + assert plan1.overflow_buffer == [] + print("[OK] Test 1 (text-only → F13 → pillar_1) passed.") + + # ─── Test 2 : 1 text + 1 transform → F29 → product_column / process_column ─ + text_obj2 = ContentObject( + id="t2.text-1", + type="text_block", + role="summary", + raw_payload="* 본문", + size_estimate={"line_count": 6}, + type_specific={ + "format": "bullet_list", "bullet_count": 1, + "max_indent_level": 0, "has_emphasis": False, + }, + ) + transform_obj = ContentObject( + id="t2.transform-1", + type="transform_table", + role="summary", + raw_payload="| AS-IS | ➜ | TO-BE |\n|---|---|---|\n| a | ➜ | b |", + size_estimate={"rows": 1}, + type_specific={ + "pair_count": 1, "arrow_glyph": "➜", + "rows": [{"from": "a", "arrow": "➜", "to": "b"}], + }, + ) + plan2 = plan_placement([text_obj2, transform_obj], frame_contracts, section_id="t2") + + # frame 선택 = F29 (transform_table 수용 유일) + assert plan2.selected_template_id == "process_product_two_way", \ + f"Test 2 frame=F29 기대, got {plan2.selected_template_id}" + + # 2 regions (text=primary / transform=supporting) + assert len(plan2.internal_regions) == 2, \ + f"Test 2 2 regions 기대, got {len(plan2.internal_regions)}" + + # 2 SlotAssignments + assert len(plan2.slot_assignments) == 2, \ + f"Test 2 slot_assignments=2 기대, got {len(plan2.slot_assignments)}" + + # narrowest-first 검증 (F29 deadlock 방지 핵심) : + # text region → product_column (accepts=[text], size 1, narrowest) + # transform region → process_column (accepts=[text+transform], size 2, 남은 candidate) + text_sa = next( + (sa for sa in plan2.slot_assignments if sa.content_unit_id == "t2.text-1"), + None, + ) + transform_sa = next( + (sa for sa in plan2.slot_assignments if sa.content_unit_id == "t2.transform-1"), + None, + ) + + assert text_sa is not None, "text content_unit SlotAssignment 존재 기대" + assert text_sa.frame_slot_id == "product_column", \ + f"Test 2 text → product_column (narrowest) 기대, got {text_sa.frame_slot_id}" + assert text_sa.region_id == "t2.region-1" + + assert transform_sa is not None, "transform content_unit SlotAssignment 존재 기대" + assert transform_sa.frame_slot_id == "process_column", \ + f"Test 2 transform → process_column 기대, got {transform_sa.frame_slot_id}" + assert transform_sa.region_id == "t2.region-2" + + # rejection 없음 / overflow_buffer 빈 list (under-fill 허용) + assert len(plan2.rejection) == 0, f"Test 2 rejection=0 기대, got {plan2.rejection}" + assert plan2.overflow_buffer == [] + print("[OK] Test 2 (text+transform → F29 → product_column / process_column) passed.") + + print("\n=== B4 v0 self-test PASS ===") + + +if __name__ == "__main__": + _run_self_test()