', 1)[0]
bullets = []
for line in block.splitlines():
stripped = line.strip()
if stripped.startswith('* '):
content = _plain_text(stripped[2:])
if content and '[??' not in content:
bullets.append(content)
return bullets
def _extract_image_src_from_raw(raw: str) -> str:
m = re.search(r'!\[[^\]]*\]\(([^\)]+)\)', raw)
return m.group(1).strip() if m else ''
def _extract_caption_from_raw(raw: str) -> str:
m = re.search(r'\*\[[^\]]+\][^*]+\*', raw)
if m:
return _plain_text(m.group(0))
return 'relation diagram'
def _parse_comparison_rows_from_raw(raw: str) -> list[tuple[str, str, str]]:
blocks = _details_blocks(raw)
if len(blocks) < 2:
return []
rows: list[tuple[str, str, str]] = []
for line in blocks[1].splitlines():
stripped = line.strip()
if not stripped.startswith('|'):
continue
parts = [p.strip() for p in stripped.strip('|').split('|')]
if len(parts) != 3:
continue
if parts[0].startswith(':---') or parts[1].startswith(':---') or parts[2].startswith('---'):
continue
dx, axis, bim = (_plain_text(parts[0]), _plain_text(parts[1]), _plain_text(parts[2]))
if dx == 'DX' and bim == 'BIM':
continue
rows.append((axis, dx, bim))
return rows
def _conclusion_from_raw(raw: str) -> str:
m = re.search(r':::note\[[^\]]+\](.*?):::', raw, flags=re.S)
block = m.group(1) if m else ''
for line in block.splitlines():
stripped = line.strip()
if stripped.startswith('* '):
return _plain_text(stripped[2:])
return _plain_text(block)
def _relation_visual(image_src: str, caption: str) -> str:
if image_src:
return f'
'
return (
''
'
'
)
def _build_stage2_retry_html(ctx: PipelineContext, retry_plan: dict) -> dict:
raw = ctx.raw_content or ''
problem_topic = _topic(ctx, 1)
definitions_topic = _topic(ctx, 2)
relation_topic = _topic(ctx, 3)
evidence_topic = _topic(ctx, 4)
comparison_topic = _topic(ctx, 5)
problem_title = problem_topic.title if problem_topic and problem_topic.title else 'Problem'
definitions_title = definitions_topic.title if definitions_topic and definitions_topic.title else 'Definitions'
relation_title = relation_topic.title if relation_topic and relation_topic.title else 'Relationship'
evidence_title = evidence_topic.title if evidence_topic and evidence_topic.title else 'Evidence'
comparison_title = comparison_topic.title if comparison_topic and comparison_topic.title else 'Comparison'
problem_bullets = _problem_bullets_from_raw(raw)[:2]
evidence_bullets = _evidence_bullets_from_raw(raw)[:4]
definition_sections = _definition_sections_from_raw(raw)[:3]
relation_bullets = _relation_bullets_from_raw(raw)[:4]
comparison_rows = _parse_comparison_rows_from_raw(raw)
preferred_axes = ['??', '????', '???', '???']
picked_rows = [row for row in comparison_rows if row[0] in preferred_axes]
if len(picked_rows) < 4:
seen = {row[0] for row in picked_rows}
for row in comparison_rows:
if row[0] not in seen:
picked_rows.append(row)
seen.add(row[0])
if len(picked_rows) >= 4:
break
picked_rows = picked_rows[:4]
image_src = _extract_image_src_from_raw(raw)
if image_src and ctx.base_path:
candidate = Path(ctx.base_path) / image_src.lstrip('/\\').replace('/', '\\')
if not candidate.exists():
image_src = ''
else:
image_src = ''
image_caption = _extract_caption_from_raw(raw)
conclusion_text = _conclusion_from_raw(raw)
intro_len = sum(len(x) for x in problem_bullets + evidence_bullets)
defs_len = sum(len(s['body']) for s in definition_sections)
relation_len = sum(len(x) for x in relation_bullets)
sidebar_width = '34%' if defs_len >= relation_len else '31%'
main_width = '66%' if defs_len >= relation_len else '69%'
relation_visual_height = '210px' if intro_len > 320 else '230px'
problem_items_html = ''.join(
f'{_trim_visible_copy(item, floor=90, ceiling=220)}'
for item in problem_bullets
)
evidence_items_html = ''.join(
f'{_trim_visible_copy(item, floor=80, ceiling=180)}'
for item in evidence_bullets
)
relation_items_html = ''.join(
f'{_trim_visible_copy(item, floor=80, ceiling=210)}'
for item in relation_bullets
)
definition_cards_html = ''
for section in definition_sections:
definition_cards_html += (
''
f'
{section["title"]}
'
f'
{_trim_visible_copy(section["body"], floor=120, ceiling=250)}
'
'
'
)
comparison_rows_html = ''
for axis, dx, bim in picked_rows:
comparison_rows_html += (
''
f'| {_trim_visible_copy(dx, floor=55, ceiling=120)} | '
f'{axis} | '
f'{_trim_visible_copy(bim, floor=55, ceiling=120)} | '
'
'
)
intro_html = (
''
)
relation_html = (
f''
'
'
f'{_relation_visual(image_src, image_caption).replace("height:220px", f"height:{relation_visual_height}")}'
f'
{image_caption}
'
'
'
'
'
f'
{relation_title}
'
f'
'
'
'
'
'
)
comparison_html = (
''
f'
{comparison_title}
'
f'
'
'
'
)
body_html = (
''
f'{intro_html}'
f'{relation_html}'
f'{comparison_html}'
'
'
)
sidebar_html = (
''
f'
{definitions_title}
{definition_cards_html}
'
'
'
)
footer_html = (
''
)
return {
'body_html': body_html,
'sidebar_html': sidebar_html,
'footer_html': footer_html,
'reasoning': f"retry regrouping by content importance: intro(problem+evidence), body(relation+comparison), sidebar(definitions), widths {main_width}/{sidebar_width}",
}
async def _stage_2(ctx: PipelineContext, retry_plan: dict | None = None) -> PipelineContext:
analysis_dict = {
'topics': [t.model_dump() for t in ctx.topics],
'page_structure': ctx.page_structure.roles,
'core_message': ctx.analysis.core_message,
'title': ctx.analysis.title,
'total_pages': ctx.analysis.total_pages,
'image_sizes': ctx.analysis.image_sizes,
}
container_specs_dict = {
role: LegacyContainerSpec(
role=ci.role,
zone=ci.zone,
topic_ids=ci.topic_ids,
weight=ci.weight,
height_px=ci.height_px,
width_px=ci.width_px,
max_height_cost=ci.max_height_cost,
block_constraints=ci.block_constraints,
)
for role, ci in ctx.containers.items()
}
analysis_dict['phase_t'] = {
'font_hierarchy': ctx.font_hierarchy.model_dump(),
'container_ratio': ctx.container_ratio,
'references': {role: [item.model_dump() for item in refs] for role, refs in ctx.references.items()},
'design_budgets': {
role: ci.design_budget.model_dump() if ci.design_budget else {}
for role, ci in ctx.containers.items()
},
}
generated, verification = await generate_with_retry(
content=ctx.raw_content,
analysis=analysis_dict,
container_specs=container_specs_dict,
preset=ctx.preset,
images=ctx.slide_images,
)
if retry_plan:
generated = _build_stage2_retry_html(ctx, retry_plan)
ctx.generated_html = generated
verification_path = ctx.get_run_dir() / 'stage_2_verification.json'
_write_json(verification_path, {
area: {
'passed': result.passed,
'score': result.score,
'errors': result.errors,
}
for area, result in verification.items()
})
ctx.save_snapshot('stage_2')
return ctx
def _stage_3(ctx: PipelineContext) -> PipelineContext:
analysis_dict = {
'topics': [t.model_dump() for t in ctx.topics],
'page_structure': ctx.page_structure.roles,
'core_message': ctx.analysis.core_message,
'title': ctx.analysis.title,
}
ctx.rendered_html = render_slide_from_html(ctx.generated_html, analysis_dict, ctx.preset)
if ctx.base_path:
ctx.rendered_html = embed_images(ctx.rendered_html, ctx.base_path)
ctx.save_snapshot('stage_3')
return ctx
def _stage_4(ctx: PipelineContext) -> PipelineContext:
ctx.measurement = measure_rendered_heights(ctx.rendered_html)
ctx.screenshot_b64 = capture_slide_screenshot(ctx.rendered_html) or ''
ctx.quality_score = 100 if not any(
zone.get('overflowed') for zone in ctx.measurement.get('zones', {}).values()
) else 60
ctx.save_snapshot('stage_4')
return ctx
async def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument('--input', required=True)
parser.add_argument('--stage1a', required=True)
parser.add_argument('--stage1b', required=True)
parser.add_argument('--base-path', default='')
parser.add_argument('--output-dir', required=True)
args = parser.parse_args()
content = Path(args.input).read_text(encoding='utf-8')
stage1a = _load_json(Path(args.stage1a))
stage1b_path = Path(args.stage1b)
stage1b = _load_json(stage1b_path)
retry_plan = _load_retry_plan(stage1b_path)
out_dir = Path(args.output_dir)
out_dir.mkdir(parents=True, exist_ok=True)
ctx = create_context(content, args.base_path)
ctx.run_dir = str(out_dir)
ctx = _stage_0(ctx)
ctx = _stage_1a(ctx, stage1a)
ctx = _stage_1b(ctx, stage1b)
ctx = _stage_1_5a(ctx)
ctx = _stage_1_7(ctx)
ctx = _stage_1_5b(ctx)
ctx = await _stage_2(ctx, retry_plan=retry_plan or None)
ctx = _stage_3(ctx)
ctx = _stage_4(ctx)
(out_dir / 'generated_html.json').write_text(
json.dumps(ctx.generated_html, ensure_ascii=False, indent=2),
encoding='utf-8',
)
(out_dir / 'final.html').write_text(ctx.rendered_html, encoding='utf-8')
(out_dir / 'measurement.json').write_text(
json.dumps(ctx.measurement, ensure_ascii=False, indent=2),
encoding='utf-8',
)
if ctx.screenshot_b64:
screenshot_bytes = base64.b64decode(ctx.screenshot_b64)
(out_dir / 'final-screenshot-current.png').write_bytes(screenshot_bytes)
(out_dir / 'final-screenshot.png').write_bytes(screenshot_bytes)
(out_dir / 'context.json').write_text(
ctx.model_dump_json(indent=2, exclude={'screenshot_b64', 'rendered_html'}),
encoding='utf-8',
)
(out_dir / 'final_context.json').write_text(
ctx.model_dump_json(indent=2, exclude={'screenshot_b64', 'rendered_html'}),
encoding='utf-8',
)
if __name__ == '__main__':
asyncio.run(main())