Phase W: - weight 비율 초기 배정 (space_allocator header 높이 반영) - block_assembler 공통 조립 함수 (filled/assembled 통합) - filled → Selenium 측정 → context 저장 - sidebar overflow 확장 + body 재배분 - sub_layouts 사전 계산 (이미지 누락 해결) Phase V': - 팝업 링크 우측상단 배치 (인라인 → position:absolute) - 표 내용 Kei 판단 (공란 크기 계산 → 행/열 산출 → Kei 요약) - 출처 라벨 삭제 + 이미지 아래 캡션 배치 - after 공란 제거 (결론 바로 위까지 body/sidebar 채움) 추가: - V-10 bold 키워드: 기계적 추출 → Kei 문맥 판단 - ** 마크다운 → <strong> 변환 - [이미지:] 마커 제거 (bold 변환 전 처리) - grid-template-rows AFTER 크기 반영 (Sonnet final) - assemble_stage2 CSS font-size override, white-space fix - 하드코딩 전수 검토 완료 - 본심 여러 topic 텍스트 합침 Phase X 계획 문서 작성 (동적 역할 구조) Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
291 lines
9.8 KiB
Python
291 lines
9.8 KiB
Python
from __future__ import annotations
|
|
|
|
import argparse
|
|
import asyncio
|
|
import json
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
ROOT = Path(__file__).resolve().parents[1]
|
|
if str(ROOT) not in sys.path:
|
|
sys.path.insert(0, str(ROOT))
|
|
|
|
from src.block_reference import select_and_generate_references
|
|
from src.config import settings
|
|
from src.content_verifier import generate_with_retry
|
|
from src.design_director import LAYOUT_PRESETS, select_preset
|
|
from src.image_utils import embed_images, get_image_sizes
|
|
from src.mdx_normalizer import normalize_mdx_content
|
|
from src.pipeline_context import (
|
|
Analysis,
|
|
BlockReference,
|
|
ContainerInfo,
|
|
DesignBudget,
|
|
FontHierarchy,
|
|
NormalizedContent,
|
|
PageStructure,
|
|
PipelineContext,
|
|
Topic,
|
|
create_context,
|
|
)
|
|
from src.renderer import render_slide_from_html
|
|
from src.slide_measurer import capture_slide_screenshot, measure_rendered_heights
|
|
from src.space_allocator import (
|
|
ContainerSpec as LegacyContainerSpec,
|
|
calculate_container_specs,
|
|
calculate_design_budget,
|
|
calculate_dynamic_ratio,
|
|
calculate_font_hierarchy,
|
|
)
|
|
|
|
|
|
def _load_json(path: Path) -> dict:
|
|
return json.loads(path.read_text(encoding='utf-8-sig'))
|
|
|
|
|
|
def _build_context(content: str, base_path: str, stage1a: dict, stage1b: dict) -> PipelineContext:
|
|
ctx = create_context(content, base_path)
|
|
|
|
normalized = normalize_mdx_content(content)
|
|
ctx.normalized = NormalizedContent(
|
|
clean_text=normalized['clean_text'],
|
|
title=normalized['title'],
|
|
images=normalized['images'],
|
|
popups=normalized['popups'],
|
|
tables=normalized['tables'],
|
|
sections=normalized['sections'],
|
|
)
|
|
|
|
analysis_raw = stage1a['analysis']
|
|
ctx.analysis = Analysis(
|
|
core_message=analysis_raw['core_message'],
|
|
title=analysis_raw['title'],
|
|
total_pages=analysis_raw.get('total_pages', 1),
|
|
)
|
|
ctx.page_structure = PageStructure(roles=stage1a['page_structure'])
|
|
|
|
refined_map = {item['topic_id']: item for item in stage1b['concepts']}
|
|
topics = []
|
|
for raw in stage1a['topics']:
|
|
merged = dict(raw)
|
|
if raw['id'] in refined_map:
|
|
merged.update(refined_map[raw['id']])
|
|
topics.append(Topic(**merged))
|
|
ctx.topics = topics
|
|
return ctx
|
|
|
|
|
|
def _stage_1_5a(ctx: PipelineContext) -> PipelineContext:
|
|
image_sizes = get_image_sizes(ctx.raw_content, ctx.base_path)
|
|
role_text_lengths = {}
|
|
for role, info in ctx.page_structure.roles.items():
|
|
if isinstance(info, dict):
|
|
role_text_lengths[role] = len(ctx.get_role_content(role))
|
|
|
|
font_hierarchy_dict = calculate_font_hierarchy(role_text_lengths)
|
|
ctx.font_hierarchy = FontHierarchy(
|
|
key_msg=font_hierarchy_dict.get('핵심', 14.0),
|
|
core=font_hierarchy_dict.get('본심', 12.0),
|
|
bg=font_hierarchy_dict.get('배경', 11.0),
|
|
sidebar=font_hierarchy_dict.get('첨부', 10.0),
|
|
)
|
|
ctx.container_ratio = calculate_dynamic_ratio(role_text_lengths, font_hierarchy_dict)
|
|
|
|
analysis_dict = {
|
|
'topics': [t.model_dump() for t in ctx.topics],
|
|
'page_structure': ctx.page_structure.roles,
|
|
}
|
|
preset_name = select_preset(analysis_dict)
|
|
ctx.preset_name = preset_name
|
|
ctx.preset = LAYOUT_PRESETS.get(preset_name, {})
|
|
|
|
container_specs = calculate_container_specs(
|
|
page_structure=ctx.page_structure.roles,
|
|
topics=[t.model_dump() for t in ctx.topics],
|
|
preset=ctx.preset,
|
|
slide_width=settings.slide_width,
|
|
slide_height=settings.slide_height,
|
|
)
|
|
ctx.containers = {
|
|
role: ContainerInfo(
|
|
role=spec.role,
|
|
zone=spec.zone,
|
|
topic_ids=spec.topic_ids,
|
|
weight=spec.weight,
|
|
height_px=spec.height_px,
|
|
width_px=spec.width_px,
|
|
max_height_cost=spec.max_height_cost,
|
|
block_constraints=spec.block_constraints,
|
|
)
|
|
for role, spec in container_specs.items()
|
|
}
|
|
|
|
slide_images = []
|
|
for img_key, img_info in (image_sizes or {}).items():
|
|
img_path = Path(ctx.base_path) / img_key if ctx.base_path else Path(img_key)
|
|
slide_images.append({
|
|
'path': str(img_path),
|
|
'width': img_info.get('width', 0),
|
|
'height': img_info.get('height', 0),
|
|
'ratio': round(img_info.get('width', 1) / max(1, img_info.get('height', 1)), 2),
|
|
'topic_id': img_info.get('topic_id'),
|
|
'b64': '',
|
|
})
|
|
ctx.slide_images = slide_images
|
|
ctx.analysis = ctx.analysis.model_copy(update={'image_sizes': image_sizes or {}})
|
|
return ctx
|
|
|
|
|
|
def _stage_1_7(ctx: PipelineContext) -> PipelineContext:
|
|
refs_raw = select_and_generate_references(
|
|
topics=[t.model_dump() for t in ctx.topics],
|
|
containers=ctx.containers,
|
|
page_structure=ctx.page_structure.roles,
|
|
)
|
|
ctx.references = {
|
|
role: BlockReference(
|
|
block_id=ref['block_id'],
|
|
variant=ref['variant'],
|
|
visual_type=ref['visual_type'],
|
|
schema_info=ref['schema_info'],
|
|
design_reference_html=ref['design_reference_html'],
|
|
)
|
|
for role, ref in refs_raw.items()
|
|
}
|
|
return ctx
|
|
|
|
|
|
def _stage_1_5b(ctx: PipelineContext) -> PipelineContext:
|
|
updated = {}
|
|
font_map = {'본심': 'core', '배경': 'bg', '첨부': 'sidebar', '결론': 'core'}
|
|
for role, ci in ctx.containers.items():
|
|
ref = ctx.references.get(role)
|
|
schema_info = ref.schema_info if ref else {}
|
|
font_size = getattr(ctx.font_hierarchy, font_map.get(role, 'core'), 12.0)
|
|
budget = calculate_design_budget(
|
|
container_height_px=ci.height_px,
|
|
container_width_px=ci.width_px,
|
|
block_schema=schema_info,
|
|
font_size=font_size,
|
|
)
|
|
updated[role] = ci.model_copy(update={
|
|
'design_budget': DesignBudget(
|
|
available_height_px=budget['available_height_px'],
|
|
available_width_px=budget['available_width_px'],
|
|
max_circle_diameter=budget['max_circle_diameter'],
|
|
max_img_width=budget['max_img_width'],
|
|
max_img_height=budget['max_img_height'],
|
|
fits=budget['fits'],
|
|
)
|
|
})
|
|
ctx.containers = updated
|
|
return ctx
|
|
|
|
|
|
async def _stage_2(ctx: PipelineContext) -> PipelineContext:
|
|
analysis_dict = {
|
|
'topics': [t.model_dump() for t in ctx.topics],
|
|
'page_structure': ctx.page_structure.roles,
|
|
'core_message': ctx.analysis.core_message,
|
|
'title': ctx.analysis.title,
|
|
'total_pages': ctx.analysis.total_pages,
|
|
'image_sizes': ctx.analysis.image_sizes,
|
|
}
|
|
container_specs_dict = {
|
|
role: LegacyContainerSpec(
|
|
role=ci.role,
|
|
zone=ci.zone,
|
|
topic_ids=ci.topic_ids,
|
|
weight=ci.weight,
|
|
height_px=ci.height_px,
|
|
width_px=ci.width_px,
|
|
max_height_cost=ci.max_height_cost,
|
|
block_constraints=ci.block_constraints,
|
|
)
|
|
for role, ci in ctx.containers.items()
|
|
}
|
|
analysis_dict['phase_t'] = {
|
|
'font_hierarchy': ctx.font_hierarchy.model_dump(),
|
|
'container_ratio': ctx.container_ratio,
|
|
'references': {role: ref.model_dump() for role, ref in ctx.references.items()},
|
|
'design_budgets': {
|
|
role: ci.design_budget.model_dump() if ci.design_budget else {}
|
|
for role, ci in ctx.containers.items()
|
|
},
|
|
}
|
|
generated, _verification = await generate_with_retry(
|
|
content=ctx.raw_content,
|
|
analysis=analysis_dict,
|
|
container_specs=container_specs_dict,
|
|
preset=ctx.preset,
|
|
images=ctx.slide_images,
|
|
)
|
|
ctx.generated_html = generated
|
|
return ctx
|
|
|
|
|
|
def _stage_3(ctx: PipelineContext) -> PipelineContext:
|
|
analysis_dict = {
|
|
'topics': [t.model_dump() for t in ctx.topics],
|
|
'page_structure': ctx.page_structure.roles,
|
|
'core_message': ctx.analysis.core_message,
|
|
'title': ctx.analysis.title,
|
|
}
|
|
ctx.rendered_html = render_slide_from_html(ctx.generated_html, analysis_dict, ctx.preset)
|
|
if ctx.base_path:
|
|
ctx.rendered_html = embed_images(ctx.rendered_html, ctx.base_path)
|
|
return ctx
|
|
|
|
|
|
def _stage_4_lite(ctx: PipelineContext) -> PipelineContext:
|
|
ctx.measurement = measure_rendered_heights(ctx.rendered_html)
|
|
ctx.screenshot_b64 = capture_slide_screenshot(ctx.rendered_html) or ''
|
|
ctx.quality_score = 100 if not any(
|
|
zone.get('overflowed') for zone in ctx.measurement.get('zones', {}).values()
|
|
) else 60
|
|
return ctx
|
|
|
|
|
|
async def main() -> None:
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument('--input', required=True)
|
|
parser.add_argument('--stage1a', required=True)
|
|
parser.add_argument('--stage1b', required=True)
|
|
parser.add_argument('--base-path', default='')
|
|
parser.add_argument('--output-dir', required=True)
|
|
args = parser.parse_args()
|
|
|
|
content = Path(args.input).read_text(encoding='utf-8')
|
|
stage1a = _load_json(Path(args.stage1a))
|
|
stage1b = _load_json(Path(args.stage1b))
|
|
|
|
ctx = _build_context(content, args.base_path, stage1a, stage1b)
|
|
ctx = _stage_1_5a(ctx)
|
|
ctx = _stage_1_7(ctx)
|
|
ctx = _stage_1_5b(ctx)
|
|
ctx = await _stage_2(ctx)
|
|
ctx = _stage_3(ctx)
|
|
ctx = _stage_4_lite(ctx)
|
|
|
|
out_dir = Path(args.output_dir)
|
|
out_dir.mkdir(parents=True, exist_ok=True)
|
|
(out_dir / 'generated_html.json').write_text(
|
|
json.dumps(ctx.generated_html, ensure_ascii=False, indent=2),
|
|
encoding='utf-8',
|
|
)
|
|
(out_dir / 'final.html').write_text(ctx.rendered_html, encoding='utf-8')
|
|
(out_dir / 'measurement.json').write_text(
|
|
json.dumps(ctx.measurement, ensure_ascii=False, indent=2),
|
|
encoding='utf-8',
|
|
)
|
|
(out_dir / 'context.json').write_text(
|
|
ctx.model_dump_json(indent=2, exclude={'screenshot_b64', 'rendered_html'}),
|
|
encoding='utf-8',
|
|
)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
asyncio.run(main())
|
|
|
|
|