Add Phase Z B4 source-shape-aware placement
- enable B1/B2/B4 source-shape-aware F13 placement behind env flag - align F13 placement_trace with mapper top_bullets cardinality - preserve canonical render output when flag is off
This commit is contained in:
@@ -26,6 +26,7 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional
|
||||
|
||||
# B1 v0 helper 처리 정직 기록 (기존 보고 정정 — 2026-04-30) :
|
||||
# - `phase_z2_mapper` 미수정. 기존 mapper helper (`_extract_markdown_table` 등) move /
|
||||
@@ -46,12 +47,14 @@ class ContentObject:
|
||||
"""SPEC v1 §1.1 base schema. v0 = text_block + transform_table 만 지원.
|
||||
|
||||
Fields :
|
||||
id : section 내 unique id (예: '03-2.transform-1' / '03-2.text-1')
|
||||
type : "text_block" | "transform_table"
|
||||
role : v0 = "summary" 만 (정밀화는 별 axis)
|
||||
raw_payload : 원본 markdown (자름 / 변형 X — 원문 보존 룰)
|
||||
size_estimate : type 별 (line_count / rows 등)
|
||||
type_specific : type 별 detail (SPEC v1 §1.2)
|
||||
id : section 내 unique id (예: '03-2.transform-1' / '03-2.text-1')
|
||||
type : "text_block" | "transform_table"
|
||||
role : v0 = "summary" 만 (정밀화는 별 axis)
|
||||
raw_payload : 원본 markdown (자름 / 변형 X — 원문 보존 룰)
|
||||
size_estimate : type 별 (line_count / rows 등)
|
||||
type_specific : type 별 detail (SPEC v1 §1.2)
|
||||
source_shape_index : positional index within source_shape (Option 1, optional)
|
||||
source_shape_kind : "top_bullets" | "h3_subsections" | ... (Option 1, optional)
|
||||
"""
|
||||
|
||||
id: str
|
||||
@@ -60,6 +63,8 @@ class ContentObject:
|
||||
raw_payload: str
|
||||
size_estimate: dict = field(default_factory=dict)
|
||||
type_specific: dict = field(default_factory=dict)
|
||||
source_shape_index: Optional[int] = None
|
||||
source_shape_kind: Optional[str] = None
|
||||
|
||||
|
||||
# ─── Transform table extraction ─────────────────────────────────
|
||||
@@ -187,7 +192,7 @@ def _detect_text_block_specific(content: str) -> tuple[dict, int]:
|
||||
# ─── Public entry ───────────────────────────────────────────────
|
||||
|
||||
|
||||
def extract_content_objects(section) -> list[ContentObject]:
|
||||
def extract_content_objects(section, source_shape: Optional[str] = None) -> list[ContentObject]:
|
||||
"""MDX section.raw_content → typed content_object list (SPEC v1 §1).
|
||||
|
||||
v0 minimal :
|
||||
@@ -196,15 +201,45 @@ def extract_content_objects(section) -> list[ContentObject]:
|
||||
- 미지원 type (table / image / diagram / details) = 무시 (별 axis)
|
||||
- 원문 (raw_payload) = 자름 / 변형 X (원문 보존 룰)
|
||||
|
||||
Option 1 (source_shape-aware) :
|
||||
- source_shape="top_bullets" : raw_content 를 mapper.split_source 로 N units 분할 →
|
||||
unit 별 ContentObject 1 개 (text_block) with source_shape_index=i / source_shape_kind="top_bullets"
|
||||
- source_shape=None 또는 미지원 값 (h3_subsections 등) : 기존 legacy 동작
|
||||
|
||||
Args :
|
||||
section : MdxSection-like 객체 (section_id, raw_content 필드 필요)
|
||||
section : MdxSection-like 객체 (section_id, raw_content 필드 필요)
|
||||
source_shape : "top_bullets" 시 source_shape-aware 분기. None 이면 legacy.
|
||||
|
||||
Returns :
|
||||
list[ContentObject] — 0 ~ 2 개 (content 비어 있으면 0, transform-only 면 1, mixed 면 2)
|
||||
list[ContentObject] — legacy 0~2 / top_bullets N (bullet 수)
|
||||
"""
|
||||
content = section.raw_content
|
||||
section_id = section.section_id
|
||||
|
||||
if source_shape == "top_bullets":
|
||||
from phase_z2_mapper import split_source
|
||||
units = split_source("top_bullets", content)
|
||||
objects: list[ContentObject] = []
|
||||
for i, unit in enumerate(units):
|
||||
unit_text = unit if isinstance(unit, str) else str(unit)
|
||||
if not unit_text.strip():
|
||||
continue
|
||||
text_specific, line_count = _detect_text_block_specific(unit_text)
|
||||
objects.append(
|
||||
ContentObject(
|
||||
id=f"{section_id}.text-{i + 1}",
|
||||
type="text_block",
|
||||
role="summary",
|
||||
raw_payload=unit_text.strip(),
|
||||
size_estimate={"line_count": line_count},
|
||||
type_specific=text_specific,
|
||||
source_shape_index=i,
|
||||
source_shape_kind="top_bullets",
|
||||
)
|
||||
)
|
||||
return objects
|
||||
|
||||
# legacy path (source_shape=None 또는 미지원 값)
|
||||
objects: list[ContentObject] = []
|
||||
|
||||
# 1. transform_table 추출 시도 (3-col with arrow)
|
||||
|
||||
Reference in New Issue
Block a user