From b2a49f55ef74ce3973135fb39b4593f838271c8e Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Tue, 7 Apr 2026 08:28:10 +0900 Subject: [PATCH] =?UTF-8?q?Type=20B'=20=EC=B6=94=EA=B0=80:=2003=EB=B2=88?= =?UTF-8?q?=20MDX=EC=9A=A9=20=EB=A0=88=EC=9D=B4=EC=95=84=EC=9B=83=20(?= =?UTF-8?q?=ED=91=9C=20=EB=A0=8C=EB=8D=94=EB=A7=81=20+=20=EB=B6=88?= =?UTF-8?q?=EB=A6=BF=20=EC=A0=84=EC=9A=A9=20=ED=95=98=EB=8B=A8)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - block_assembler: _assemble_slide_html_type_b_prime 추가 - 하단 좌: normalized.tables를 표로 렌더링 (셀 중복 불릿 제거) - 하단 우: 불릿만 (table_summaries 미사용) - pipeline: layout_template 체크를 in ("B", "B'")로 확장 - 결과: 03번 표가 표로 렌더링됨, 하단 우에 잘못된 표 제거 Co-Authored-By: Claude Opus 4.6 (1M context) --- src/block_assembler.py | 410 +++++++++++++++++++++++++++++++++++++++++ src/pipeline.py | 10 +- 2 files changed, 415 insertions(+), 5 deletions(-) diff --git a/src/block_assembler.py b/src/block_assembler.py index 544cf12..ff44048 100644 --- a/src/block_assembler.py +++ b/src/block_assembler.py @@ -371,6 +371,8 @@ def assemble_slide_html(ctx: "PipelineContext", title_text: str = "") -> str: """ if ctx.analysis.layout_template == "B": return _assemble_slide_html_type_b(ctx, title_text) + if ctx.analysis.layout_template == "B'": + return _assemble_slide_html_type_b_prime(ctx, title_text) return _assemble_slide_html_type_a(ctx, title_text) @@ -873,3 +875,411 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa {footer_html} """ + + +def _assemble_slide_html_type_b_prime(ctx: "PipelineContext", title_text: str = "") -> str: + """유형 B' 전체 슬라이드 조립: 상단(세로 카드) + 하단 2분할 + 결론. (03번용) + + assemble_stage2._assemble_type_b의 로직을 PipelineContext 기반으로 통합. + filled/after 파이프라인에서 호출되어 Selenium 측정 가능한 HTML 생성. + """ + from src.fit_verifier import _load_design_tokens + tokens = _load_design_tokens() + pad = tokens["spacing_page"] + header_h = tokens.get("header_height", 66) + gap_block = tokens["spacing_block"] + gap_small = tokens["spacing_small"] + slide_w = tokens.get("slide_width", 1280) + slide_h = tokens.get("slide_height", 720) + inner_w = slide_w - pad * 2 + + ps = ctx.page_structure.roles + enh = ctx.enhancement_result or {} + bold_kw = enh.get("bold_keywords", {}) if isinstance(enh.get("bold_keywords"), dict) else {} + font_h = ctx.font_hierarchy + font_size = font_h.core + title = title_text or ctx.analysis.title or "" + core_message = ctx.analysis.core_message or "" + slide_images = ctx.slide_images or [] + norm_sections = ctx.normalized.sections or [] + + # Kei 에스컬레이션 결정: popup 대상 역할 수집 + kei_decisions = enh.get("kei_decisions", []) + popup_roles = set() + for d in kei_decisions: + if d.get("action") == "popup": + popup_roles.add(d.get("role", "")) + + # ── zone별 역할 분류 ── + top_role = None + bottom_left_role = None + bottom_right_role = None + footer_role = None + + for role_name, info in ps.items(): + if not isinstance(info, dict): + continue + zone = info.get("zone", "") + if zone == "top": + top_role = (role_name, info) + elif zone == "bottom_left": + bottom_left_role = (role_name, info) + elif zone == "bottom_right": + bottom_right_role = (role_name, info) + elif zone == "footer": + footer_role = (role_name, info) + + # ── 좌표 계산 (containers에서 동적으로) ── + footer_ci = ctx.containers.get(footer_role[0]) if footer_role else None + footer_h_px = footer_ci.height_px if footer_ci else 53 + ft_top = slide_h - pad - footer_h_px + + top_ci = ctx.containers.get(top_role[0]) if top_role else None + top_h = top_ci.height_px if top_ci else 200 + top_top = pad + header_h + gap_block + + # 이미지: block_constraints 또는 slide_images에서 판단 + img_constraints = top_ci.block_constraints if top_ci else {} + img_w = img_constraints.get("img_width_px", 0) + has_image = img_constraints.get("has_image", False) + # block_constraints에 has_image가 없어도 slide_images에 b64가 있으면 사용 + if not has_image and slide_images: + has_image = any(img.get("b64") for img in slide_images) + if has_image and img_w <= 0: + # 이미지 폭: top_h * ratio, 최대 45% + first_img = next((img for img in slide_images if img.get("b64")), None) + if first_img: + img_ratio = first_img.get("ratio", 1) + img_w = min(int(top_h * img_ratio), int(inner_w * 0.45)) + + img_h = 0 + img_html = "" + if has_image and slide_images: + for img in slide_images: + b64 = img.get("b64", "") + if b64: + img_ratio = img.get("ratio", 1) + img_h = int(img_w / img_ratio) if img_ratio > 0 else top_h + img_html = f'' + break + + # 하단 + bottom_top = top_top + top_h + gap_small + + # V'-4: 결론 바로 위까지 채움 + fit = ctx.fit_result or {} + redist = fit.get("redistribution", {}) + column_bottom = ft_top - gap_block + bottom_h = column_bottom - bottom_top + bottom_col_w = (inner_w - gap_block) // 2 + + # ── 유틸 ── + def _bold(text: str, role: str) -> str: + for kw in bold_kw.get(role, []): + if kw in text: + text = text.replace(kw, f"{kw}") + return text + + # ── 상단 조립: normalized.sections에서 직접 가져오기 ── + top_html = "" + if top_role: + rn = top_role[0] + topic_title_from_section = "" + top_contents = [] + for s in norm_sections: + if s.get("level") == 3: + break # level=3(소목차) 나오면 상단 끝 + if not topic_title_from_section and s.get("title"): + topic_title_from_section = s["title"] + content = s.get("content", "") + if content: + if s.get("title") and s["title"] != topic_title_from_section: + top_contents.append(f"### {s['title']}") + top_contents.append(content) + all_text = "\n".join(top_contents) + all_text_clean = re.sub(r'\*\*(.+?)\*\*', r'\1', all_text) + + # 팝업 분리 + popup_titles = [] + content_lines = [] + for line in all_text_clean.split("\n"): + stripped = line.strip() + if not stripped: + continue + popup_match = re.search(r'\[팝업:\s*([^\]]+)\]', stripped) + if popup_match: + popup_titles.append(popup_match.group(1)) + continue + if re.search(r'\[이미지:', stripped) or re.match(r'^!\[', stripped): + continue + content_lines.append(stripped) + + popup_html = _popup_links_html(popup_titles, font_size) + + # 소제목(### 또는 D1:) + 불릿(D2:)을 카드형으로 분리 + sections = [] + current_section = ("", []) + for line in content_lines: + if line.startswith("### ") or line.startswith("###"): + if current_section[0] or current_section[1]: + sections.append(current_section) + current_section = (line.lstrip("# ").strip(), []) + elif re.match(r'^D1:\s*', line): + # D1 = 1단 불릿 = 소제목 (카드 제목) + title_text = re.sub(r'^D1:\s*', '', line).lstrip("• ") + if current_section[0] or current_section[1]: + sections.append(current_section) + current_section = (_bold(title_text, rn), []) + elif re.match(r'^D[2-9]:\s*', line): + # D2+ = 하위 불릿 = 본문 + clean = re.sub(r'^D[2-9]:\s*', '', line).lstrip("• ") + if clean.startswith("출처:"): + continue + current_section[1].append(_bold(clean, rn)) + else: + clean = line.lstrip("• ") + if clean.startswith("출처:"): + continue + current_section[1].append(_bold(clean, rn)) + if current_section[0] or current_section[1]: + sections.append(current_section) + + # 카드형 HTML + _card_colors = [ + ("linear-gradient(135deg, #1a365d, #2d3748)", "#e2e8f0"), + ("linear-gradient(135deg, #1e3a2f, #2d4a3e)", "#e2e8f0"), + ("linear-gradient(135deg, #3b1f2b, #4a2d3b)", "#e2e8f0"), + ("linear-gradient(135deg, #2d2b55, #3d3b65)", "#e2e8f0"), + ] + card_pad = int(font_size * 0.6) + card_gap = max(3, int(font_size * 0.4)) + indent_body = int(font_size * 1.2) + + bullets = "" + if len(sections) > 1 and sections[0][0]: + for ci, (sec_title, sec_items) in enumerate(sections): + bg, text_color = _card_colors[ci % len(_card_colors)] + items_html = "".join( + f'
' + f'• {item}
' + for item in sec_items + ) + if sec_title: + bullets += ( + f'
' + f'
{_bold(sec_title, rn)}
' + f'{items_html}
\n' + ) + else: + bullets += items_html + else: + for _, sec_items in sections: + for item in sec_items: + bullets += ( + f'
' + f'• {item}
\n' + ) + + # 이미지 캡션 + img_caption = "" + norm_images = ctx.normalized.images or [] + if norm_images: + img_caption = norm_images[0].get("alt", "") + if not img_caption: + for line in all_text.split("\n"): + stripped = line.strip().lstrip("• ") + if stripped.startswith("출처:"): + img_caption = re.sub(r'^출처:\s*', '', stripped) + break + caption_html = f'
{img_caption}
' if img_caption else "" + + # 이미지 블록 + img_block = "" + if has_image and img_html: + img_block = ( + f'
' + f'
{img_html}
' + f'{caption_html}
' + ) + + topic_title = _bold(topic_title_from_section or rn, rn) + + top_html = ( + f'
' + f'{popup_html}' + f'
{topic_title}
' + f'
' + f'
{bullets}
' + f'{img_block}
' + ) + + # ── 하단: normalized.sections에서 직접 매핑 ── + bottom_title = "" + sub_sections_from_norm = [] + found_level3 = False + for s in norm_sections: + if s.get("level") == 3: + found_level3 = True + sub_sections_from_norm.append((s.get("title", ""), s.get("content", ""))) + # 하단 대목차: level=3 바로 앞의 level=2 + for s in norm_sections: + if s.get("level") == 2: + idx = norm_sections.index(s) + if idx + 1 < len(norm_sections) and norm_sections[idx + 1].get("level") == 3: + bottom_title = s.get("title", "") + break + + bl_indent = int(font_size * 1.2) + + # 하단 좌측 — B': normalized.tables가 있으면 표로 렌더링 + norm_tables = ctx.normalized.tables or [] + bl_html = "" + if sub_sections_from_norm and bottom_left_role: + rn = bottom_left_role[0] + sub_title, sub_content = sub_sections_from_norm[0] + sub_content = re.sub(r'\*\*(.+?)\*\*', r'', sub_content) + + # 표 렌더링 (normalized.tables에서) + table_html_bl = "" + if norm_tables: + for table_data in norm_tables: + headers = table_data.get("headers", []) + rows = table_data.get("rows", []) + col_count = len(headers) + if col_count > 0 and rows: + header_cells = "".join( + f'
{c}
' + for c in headers + ) + rows_html = "" + for ri, row in enumerate(rows): + bg = "#f8fafc" if ri % 2 == 0 else "#fff" + cells = "" + for ci_idx, cell in enumerate(row): + cell_clean = re.sub(r'\*\*(.+?)\*\*', r'', str(cell)) + c_color = "#1e40af" if ci_idx == 0 else "#475569" + c_weight = "600" if ci_idx == 0 else "400" + cells += f'
{cell_clean}
' + rows_html += f'
{cells}
\n' + + table_html_bl = ( + f'
' + f'
{header_cells}
' + f'{rows_html}
' + ) + + # 불릿: 표 셀과 중복되는 텍스트 제외 + table_cell_texts = set() + for td in norm_tables: + for h in td.get("headers", []): + table_cell_texts.add(h.strip().lstrip("*").rstrip("*")) + for row in td.get("rows", []): + for cell in row: + table_cell_texts.add(str(cell).strip().lstrip("*").rstrip("*")) + + bul = "" + for line in sub_content.split("\n"): + stripped = line.strip() + if not stripped: + continue + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) + clean = stripped.lstrip("- ").lstrip("• ") + clean_plain = re.sub(r'<[^>]+>', '', clean).strip() + if clean_plain in table_cell_texts or clean_plain == "➠": + continue + if clean: + clean = _bold(clean, rn) + _pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bul += f'
• {clean}
\n' + + bl_html = ( + f'
' + f'
{_bold(sub_title, rn)}
' + f'{table_html_bl}' + f'
{bul}
' + ) + + # 하단 우측 — B': 불릿만 (table_summaries 사용 안 함) + br_html = "" + if bottom_right_role and len(sub_sections_from_norm) > 1: + rn = bottom_right_role[0] + sub_title_br, sub_content_br = sub_sections_from_norm[1] + sub_content_br = re.sub(r'\*\*(.+?)\*\*', r'', sub_content_br) + + bul = "" + for line in sub_content_br.split("\n"): + stripped = line.strip() + if not stripped: + continue + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) + clean = stripped.lstrip("- ").lstrip("• ") + if clean: + clean = _bold(clean, rn) + _pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bul += f'
• {clean}
\n' + + br_html = ( + f'
' + f'
{_bold(sub_title_br, rn)}
' + f'
{bul}
' + ) + + + # ── 결론 ── + footer_html = "" + if footer_role: + rn = footer_role[0] + footer_html = ( + f'
' + f'
{_bold(core_message, rn)}
' + ) + + # ── HTML 조립 ── + _color_palette = ["#2563eb", "#16a34a", "#d97706", "#7c3aed"] + + return f""" + +
+ +
{title}
+ +
+상단 ({inner_w}x{top_h}px) +{top_html}
+ +
+
{_bold(bottom_title, "")}
+
+
+{bl_html}
+
+
+{br_html}
+
+ + + +
""" diff --git a/src/pipeline.py b/src/pipeline.py index b9a8e14..3cd7fd5 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -332,7 +332,7 @@ async def generate_slide( ) # Phase X-B: 유형에 따라 컨테이너 생성 분기 - if context.analysis.layout_template == "B": + if context.analysis.layout_template in ("B", "B'"): from src.space_allocator import build_containers_type_b container_specs = build_containers_type_b( page_structure=context.page_structure.roles, @@ -578,7 +578,7 @@ async def generate_slide( # Type B: zone 간 재배분 (top↔bottom) # redistribute는 같은 zone 내에서만 동작하므로, Type B는 zone 간 여유를 수동 이전 - if context.analysis.layout_template == "B": + if context.analysis.layout_template in ("B", "B'"): deficit_roles = [(r, rf.shortfall_px) for r, rf in fit_analysis.roles.items() if rf.shortfall_px > 0] surplus_roles = [(r, abs(rf.shortfall_px)) for r, rf in fit_analysis.roles.items() if rf.shortfall_px < -8] if deficit_roles and surplus_roles: @@ -808,7 +808,7 @@ async def generate_slide( # X'-6: 본문 표 요약 (유형 B — normalized.tables가 있으면) table_summaries = {} norm_tables = context.normalized.tables or [] - if norm_tables and context.analysis.layout_template == "B": + if norm_tables and context.analysis.layout_template in ("B", "B'"): from src.kei_client import call_kei_summarize_popup for ti, table_data in enumerate(norm_tables): headers = table_data.get("headers", []) @@ -925,7 +925,7 @@ async def generate_slide( async def stage_2(context: PipelineContext) -> dict: # Phase X-BX': Type B는 code_assembled 직접 사용, Sonnet 재구성 스킵 - if context.analysis.layout_template == "B": + if context.analysis.layout_template in ("B", "B'"): from src.block_assembler import assemble_slide_html generated = assemble_slide_html(context) logger.info("[Stage 2] Type B: code_assembled 직접 사용 (Sonnet 스킵)") @@ -994,7 +994,7 @@ async def generate_slide( async def stage_3(context: PipelineContext) -> dict: # Phase X-BX': Type B는 Stage 2에서 이미 완전한 HTML → renderer 스킵 - if context.analysis.layout_template == "B": + if context.analysis.layout_template in ("B", "B'"): logger.info("[Stage 3] Type B: renderer 스킵 (generated_html 직접 사용)") return {"rendered_html": context.generated_html}