diff --git a/PLAN.md b/PLAN.md index cd29226..706e97b 100644 --- a/PLAN.md +++ b/PLAN.md @@ -393,6 +393,36 @@ P2-E (누락기능) ── 병렬 │ --- +## Phase Y: MDX 외부 컴포넌트 인라인 삽입 + +> 근거: MDX에서 `import ... from '*.astro'`로 불러오는 외부 컴포넌트(표, 다이어그램 등)가 파이프라인에서 누락됨. import문은 제거되고 `` 같은 태그는 사라져서 콘텐츠 손실 발생. + +### Y-1: import문 파싱 — 컴포넌트명:파일경로 매핑 +- **파일:** `src/mdx_normalizer.py` +- **내용:** `import Foo from '../../components/foo.astro'` → `{"Foo": 절대경로}` 매핑 추출 +- **의존성:** base_path (MDX 원본 파일 위치, pipeline.py에서 전달) +- **완료 기준:** import문에서 컴포넌트명→절대경로 dict 반환 + +### Y-2: .astro 파일 파싱 — HTML + CSS 추출 +- **파일:** `src/mdx_normalizer.py` +- **내용:** .astro 파일에서 `---` frontmatter 제거, HTML 본문 + `` 반환 + +### Y-3: 셀프클로징 태그 교체 — 인라인 삽입 +- **파일:** `src/mdx_normalizer.py` +- **내용:** `` 태그를 Y-2에서 추출한 HTML+CSS로 교체 +- **의존성:** Y-1, Y-2 +- **완료 기준:** MDX 정규화 결과에 외부 컴포넌트 HTML이 인라인으로 포함 + +### Y-4: Astro 특수 문법 정리 +- **파일:** `src/mdx_normalizer.py` +- **내용:** Astro의 멀티라인 태그(`텍스트` 줄바꿈 패턴), `style="letter-spacing: -0.9px"` 등 인라인 스타일 정리 +- **의존성:** Y-2 +- **완료 기준:** 추출된 HTML이 브라우저에서 정상 렌더링 + +--- + ## 의존 관계 ``` diff --git a/scripts/run_from_stage1b.py b/scripts/run_from_stage1b.py index c89834a..08d434c 100644 --- a/scripts/run_from_stage1b.py +++ b/scripts/run_from_stage1b.py @@ -22,13 +22,8 @@ async def main(run_dir: str): # Stage 1B context 로드 ctx_json = json.loads((run / "stage_1b_context.json").read_text(encoding="utf-8")) - # MDX 원본: samples에서 직접 읽기 (최신 원본 사용) - samples_dir = Path(__file__).parent.parent / "samples" - mdx_file = samples_dir / "mdx" / "01. 건설산업 DX의 올바른 이해(0127).mdx" - if mdx_file.exists(): - raw_content = mdx_file.read_text(encoding="utf-8") - else: - raw_content = ctx_json.get("raw_content", "") + # MDX 원본: context에서 가져옴 (어떤 MDX든 대응) + raw_content = ctx_json.get("raw_content", "") # Stage 1A 결과를 manual_layout으로 전달 (Stage 1A 스킵) # page_structure가 {"roles": {...}} 형태이면 roles 안쪽을 직접 전달 @@ -41,9 +36,11 @@ async def main(run_dir: str): "page_structure": ps, "core_message": ctx_json.get("analysis", {}).get("core_message", ""), "title": ctx_json.get("analysis", {}).get("title", ""), + "layout_template": ctx_json.get("analysis", {}).get("layout_template", "A"), } - print(f"=== Stage 1B 데이터 고정: {run.name} ===") + layout = manual_layout.get("layout_template", "A") + print(f"=== Stage 1B 데이터 고정: {run.name} (유형 {layout}) ===") print(f" topics: {len(ctx_json['topics'])}개") for t in ctx_json["topics"]: print(f" 꼭지{t['id']}: {t['title']} (st={len(t.get('structured_text',''))}자)") @@ -51,8 +48,8 @@ async def main(run_dir: str): # pipeline.py의 generate_slide() 호출 from src.pipeline import generate_slide - # 이미지 base_path: samples/images/ - base_path = str(samples_dir / "images") + # 이미지 base_path: context에서 가져옴 + base_path = ctx_json.get("base_path", "") async for event in generate_slide(raw_content, manual_layout=manual_layout, base_path=base_path): ev_type = event.get("event", "") ev_data = event.get("data", "") diff --git a/src/block_assembler.py b/src/block_assembler.py index 22132de..10b2331 100644 --- a/src/block_assembler.py +++ b/src/block_assembler.py @@ -367,7 +367,15 @@ def assemble_slide_html(ctx: "PipelineContext", title_text: str = "") -> str: """전체 슬라이드를 조립하여 HTML 반환. filled, assembled, stage_2 모두 이 함수를 호출. + layout_template에 따라 유형 A/B 분기. """ + if ctx.analysis.layout_template == "B": + return _assemble_slide_html_type_b(ctx, title_text) + return _assemble_slide_html_type_a(ctx, title_text) + + +def _assemble_slide_html_type_a(ctx: "PipelineContext", title_text: str = "") -> str: + """유형 A 전체 슬라이드 조립 (기존 코드 그대로).""" from src.fit_verifier import _load_design_tokens tokens = _load_design_tokens() pad = tokens["spacing_page"] @@ -443,3 +451,378 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa {role_htmls.get("결론", "")} """ + + +def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") -> str: + """유형 B 전체 슬라이드 조립: 상단(top+이미지) + 하단 2분할 + 결론. + + assemble_stage2._assemble_type_b의 로직을 PipelineContext 기반으로 통합. + filled/after 파이프라인에서 호출되어 Selenium 측정 가능한 HTML 생성. + """ + from src.fit_verifier import _load_design_tokens + tokens = _load_design_tokens() + pad = tokens["spacing_page"] + header_h = tokens.get("header_height", 66) + gap_block = tokens["spacing_block"] + gap_small = tokens["spacing_small"] + slide_w = tokens.get("slide_width", 1280) + slide_h = tokens.get("slide_height", 720) + inner_w = slide_w - pad * 2 + + ps = ctx.page_structure.roles + enh = ctx.enhancement_result or {} + bold_kw = enh.get("bold_keywords", {}) if isinstance(enh.get("bold_keywords"), dict) else {} + font_h = ctx.font_hierarchy + font_size = font_h.core + title = title_text or ctx.analysis.title or "" + core_message = ctx.analysis.core_message or "" + slide_images = ctx.slide_images or [] + norm_sections = ctx.normalized.sections or [] + + # ── zone별 역할 분류 ── + top_role = None + bottom_left_role = None + bottom_right_role = None + footer_role = None + + for role_name, info in ps.items(): + if not isinstance(info, dict): + continue + zone = info.get("zone", "") + if zone == "top": + top_role = (role_name, info) + elif zone == "bottom_left": + bottom_left_role = (role_name, info) + elif zone == "bottom_right": + bottom_right_role = (role_name, info) + elif zone == "footer": + footer_role = (role_name, info) + + # ── 좌표 계산 (containers에서 동적으로) ── + footer_ci = ctx.containers.get(footer_role[0]) if footer_role else None + footer_h_px = footer_ci.height_px if footer_ci else 53 + ft_top = slide_h - pad - footer_h_px + + top_ci = ctx.containers.get(top_role[0]) if top_role else None + top_h = top_ci.height_px if top_ci else 200 + top_top = pad + header_h + gap_block + + # 이미지: block_constraints 또는 slide_images에서 판단 + img_constraints = top_ci.block_constraints if top_ci else {} + img_w = img_constraints.get("img_width_px", 0) + has_image = img_constraints.get("has_image", False) + # block_constraints에 has_image가 없어도 slide_images에 b64가 있으면 사용 + if not has_image and slide_images: + has_image = any(img.get("b64") for img in slide_images) + if has_image and img_w <= 0: + # 이미지 폭: top_h * ratio, 최대 45% + first_img = next((img for img in slide_images if img.get("b64")), None) + if first_img: + img_ratio = first_img.get("ratio", 1) + img_w = min(int(top_h * img_ratio), int(inner_w * 0.45)) + + img_h = 0 + img_html = "" + if has_image and slide_images: + for img in slide_images: + b64 = img.get("b64", "") + if b64: + img_ratio = img.get("ratio", 1) + img_h = int(img_w / img_ratio) if img_ratio > 0 else top_h + img_html = f'' + break + + # 하단 + bottom_top = top_top + top_h + gap_small + + # V'-4: 결론 바로 위까지 채움 + fit = ctx.fit_result or {} + redist = fit.get("redistribution", {}) + column_bottom = ft_top - gap_block + bottom_h = column_bottom - bottom_top + bottom_col_w = (inner_w - gap_block) // 2 + + # ── 유틸 ── + def _bold(text: str, role: str) -> str: + for kw in bold_kw.get(role, []): + if kw in text: + text = text.replace(kw, f"{kw}") + return text + + # ── 상단 조립: normalized.sections에서 직접 가져오기 ── + top_html = "" + if top_role: + rn = top_role[0] + topic_title_from_section = "" + top_contents = [] + for s in norm_sections: + if s.get("level") == 3: + break # level=3(소목차) 나오면 상단 끝 + if not topic_title_from_section and s.get("title"): + topic_title_from_section = s["title"] + content = s.get("content", "") + if content: + if s.get("title") and s["title"] != topic_title_from_section: + top_contents.append(f"### {s['title']}") + top_contents.append(content) + all_text = "\n".join(top_contents) + all_text_clean = re.sub(r'\*\*(.+?)\*\*', r'\1', all_text) + + # 팝업 분리 + popup_titles = [] + content_lines = [] + for line in all_text_clean.split("\n"): + stripped = line.strip() + if not stripped: + continue + popup_match = re.search(r'\[팝업:\s*([^\]]+)\]', stripped) + if popup_match: + popup_titles.append(popup_match.group(1)) + continue + if re.search(r'\[이미지:', stripped): + continue + content_lines.append(stripped) + + popup_html = _popup_links_html(popup_titles, font_size) + + # 소제목(###) + 불릿을 카드형으로 분리 + sections = [] + current_section = ("", []) + for line in content_lines: + if line.startswith("### ") or line.startswith("###"): + if current_section[0] or current_section[1]: + sections.append(current_section) + current_section = (line.lstrip("# ").strip(), []) + else: + clean = line.lstrip("• ") + if clean.startswith("출처:"): + continue + current_section[1].append(_bold(clean, rn)) + if current_section[0] or current_section[1]: + sections.append(current_section) + + # 카드형 HTML + _card_colors = [ + ("linear-gradient(135deg, #1a365d, #2d3748)", "#e2e8f0"), + ("linear-gradient(135deg, #1e3a2f, #2d4a3e)", "#e2e8f0"), + ("linear-gradient(135deg, #3b1f2b, #4a2d3b)", "#e2e8f0"), + ("linear-gradient(135deg, #2d2b55, #3d3b65)", "#e2e8f0"), + ] + card_pad = int(font_size * 0.6) + card_gap = max(3, int(font_size * 0.4)) + indent_body = int(font_size * 1.2) + + bullets = "" + if len(sections) > 1 and sections[0][0]: + for ci, (sec_title, sec_items) in enumerate(sections): + bg, text_color = _card_colors[ci % len(_card_colors)] + items_html = "".join( + f'
' + f'• {item}
' + for item in sec_items + ) + if sec_title: + bullets += ( + f'
' + f'
{_bold(sec_title, rn)}
' + f'{items_html}
\n' + ) + else: + bullets += items_html + else: + for _, sec_items in sections: + for item in sec_items: + bullets += ( + f'
' + f'• {item}
\n' + ) + + # 이미지 캡션 + img_caption = "" + norm_images = ctx.normalized.images or [] + if norm_images: + img_caption = norm_images[0].get("alt", "") + if not img_caption: + for line in all_text.split("\n"): + stripped = line.strip().lstrip("• ") + if stripped.startswith("출처:"): + img_caption = re.sub(r'^출처:\s*', '', stripped) + break + caption_html = f'
{img_caption}
' if img_caption else "" + + # 이미지 블록 + img_block = "" + if has_image and img_html: + img_block = ( + f'
' + f'
{img_html}
' + f'{caption_html}
' + ) + + topic_title = _bold(topic_title_from_section or rn, rn) + + top_html = ( + f'
' + f'{popup_html}' + f'
{topic_title}
' + f'
' + f'
{bullets}
' + f'{img_block}
' + ) + + # ── 하단: normalized.sections에서 직접 매핑 ── + bottom_title = "" + sub_sections_from_norm = [] + found_level3 = False + for s in norm_sections: + if s.get("level") == 3: + found_level3 = True + sub_sections_from_norm.append((s.get("title", ""), s.get("content", ""))) + # 하단 대목차: level=3 바로 앞의 level=2 + for s in norm_sections: + if s.get("level") == 2: + idx = norm_sections.index(s) + if idx + 1 < len(norm_sections) and norm_sections[idx + 1].get("level") == 3: + bottom_title = s.get("title", "") + break + + bl_indent = int(font_size * 1.2) + + # 하단 좌측 + bl_html = "" + if sub_sections_from_norm and bottom_left_role: + rn = bottom_left_role[0] + sub_title, sub_content = sub_sections_from_norm[0] + sub_content = re.sub(r'\*\*(.+?)\*\*', r'\1', sub_content) + + bul = "" + for line in sub_content.split("\n"): + stripped = line.strip() + if not stripped: + continue + clean = stripped.lstrip("- ").lstrip("• ") + clean = _bold(clean, rn) + bul += f'
• {clean}
\n' + + bl_html = ( + f'
' + f'
{_bold(sub_title, rn)}
' + f'
{bul}
' + ) + + # 하단 우측 + 표 요약 + br_html = "" + if bottom_right_role and len(sub_sections_from_norm) > 1: + rn = bottom_right_role[0] + sub_title_br, sub_content_br = sub_sections_from_norm[1] + sub_content_br = re.sub(r'\*\*(.+?)\*\*', r'\1', sub_content_br) + + # 팝업 링크 + popup_link_title = f"{sub_title_br} 바로가기" + popup_html_br = ( + f'
' + f'[{popup_link_title} →]
' + ) + + # 불릿 + table_summaries = enh.get("table_summaries", {}) + bul = "" + if not table_summaries: + for line in sub_content_br.split("\n"): + clean = line.strip().lstrip("- ").lstrip("• ") + if clean: + clean = _bold(clean, rn) + bul += f'
• {clean}
\n' + + # 표 요약 HTML + table_html_br = "" + for ts_key, ts_data in table_summaries.items(): + fmt = ts_data.get("format", "text") + if fmt == "table": + cols = ts_data.get("columns", []) + data = ts_data.get("data", []) + col_count = len(cols) + if col_count > 0 and data: + header_cells = "".join( + f'
{c}
' + for c in cols + ) + rows_html = "" + for ri, row in enumerate(data): + bg = "#f8fafc" if ri % 2 == 0 else "#fff" + cells = "" + for ci_idx, cell in enumerate(row): + c_color = "#1e40af" if ci_idx == 0 else "#475569" + c_weight = "600" if ci_idx == 0 else "400" + cells += f'
{_bold(str(cell), rn)}
' + rows_html += f'
{cells}
\n' + table_html_br = ( + f'
' + f'
{header_cells}
' + f'{rows_html}
' + ) + elif fmt == "bullets": + items = ts_data.get("items", []) + table_html_br = "".join( + f'
• {_bold(str(item), rn)}
' + for item in items + ) + elif fmt == "text": + table_html_br = f'
{_bold(str(ts_data.get("summary", "")), rn)}
' + + br_html = ( + f'
' + f'{popup_html_br}' + f'
{_bold(sub_title_br, rn)}
' + f'
{bul}
' + f'{table_html_br}
' + ) + + # ── 결론 ── + footer_html = "" + if footer_role: + rn = footer_role[0] + footer_html = ( + f'
' + f'
{_bold(core_message, rn)}
' + ) + + # ── HTML 조립 ── + _color_palette = ["#2563eb", "#16a34a", "#d97706", "#7c3aed"] + + return f""" + +
+ +
{title}
+ +
+상단 ({inner_w}x{top_h}px) +{top_html}
+ +
+
{_bold(bottom_title, "")}
+
+
+하단좌 ({bottom_col_w}px) +{bl_html}
+
+하단우 ({bottom_col_w}px) +{br_html}
+
+ + + +
""" diff --git a/src/fit_verifier.py b/src/fit_verifier.py index 6cdb8c6..8776a62 100644 --- a/src/fit_verifier.py +++ b/src/fit_verifier.py @@ -501,10 +501,18 @@ def redistribute( """부족 영역에 여유 영역의 공간을 재배분. 같은 zone 내에서만 재배분 가능 (body 안의 배경↔본심). + 유형 B: containers의 zone 속성에서 동적으로 매핑. """ zone_roles: dict[str, list[str]] = {} for role in analysis.roles: - zone = ROLE_ZONE_MAP.get(role, "body") + # containers에 zone 정보가 있으면 그걸 사용, 없으면 ROLE_ZONE_MAP fallback + ci = containers.get(role) + if ci is not None: + zone = ci.get("zone") if isinstance(ci, dict) else getattr(ci, "zone", None) + else: + zone = None + if not zone: + zone = ROLE_ZONE_MAP.get(role, "body") if zone not in zone_roles: zone_roles[zone] = [] zone_roles[zone].append(role) diff --git a/src/pipeline.py b/src/pipeline.py index 7ce8674..9e35406 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -1088,6 +1088,34 @@ async def generate_slide( # markdown bold → HTML bold clean_content = _re.sub(r'\*\*(.+?)\*\*', r'\1', clean_content) + # 콘텐츠 유형 감지: 테이블 vs 리스트 + has_table = " @@ -1125,30 +1153,7 @@ h1 {{ color: #64748b; margin-bottom: 20px; }} -table {{ - border-collapse: collapse; - width: 100%; - margin: 16px 0; - font-size: 13px; -}} -th {{ - background: var(--color-primary); - color: #ffffff; - font-weight: 700; - padding: 10px 14px; - text-align: center; - border: 1px solid #334155; -}} -td {{ - padding: 8px 14px; - border: 1px solid var(--color-border); - vertical-align: top; - line-height: 1.5; -}} -tr:nth-child(even) {{ background: var(--color-bg-subtle); }} -td:first-child {{ font-weight: 600; background: #f1f5f9; }} -ul {{ padding-left: 20px; margin: 8px 0; }} -li {{ margin-bottom: 4px; font-size: 13px; }} +{content_css} strong {{ color: var(--color-primary); }} .source {{ font-size: 11px; diff --git a/src/renderer.py b/src/renderer.py index d4c2213..ac2c28b 100644 --- a/src/renderer.py +++ b/src/renderer.py @@ -547,9 +547,24 @@ def render_slide_from_html( _tokens = _ldt() _header_h = _tokens.get("header_height", 66) _gap_small = _tokens["spacing_small"] - _bg_h = int(redist.get("배경", containers.get("배경", {}).get("height_px", 0))) - _core_h = int(redist.get("본심", containers.get("본심", {}).get("height_px", 0))) - _footer_h = int(redist.get("결론", containers.get("결론", {}).get("height_px", 0))) + # zone 기반으로 body/footer 높이를 동적 탐색 (유형 A: 배경+본심, 유형 B: zone별) + def _find_h(role_name, zone_name=None): + """redist → containers 순으로 높이 탐색. role_name 없으면 zone으로 fallback.""" + h = redist.get(role_name, 0) + if h: + return int(h) + ci = containers.get(role_name, {}) + if ci: + return int(ci.get("height_px", 0)) + if zone_name: + for _r, _c in containers.items(): + if isinstance(_c, dict) and _c.get("zone") == zone_name: + return int(redist.get(_r, _c.get("height_px", 0))) + return 0 + + _bg_h = _find_h("배경") + _core_h = _find_h("본심") + _footer_h = _find_h("결론", "footer") _body_row_h = _bg_h + _core_h + _gap_small if _bg_h and _core_h else 0 if _body_row_h > 0 and _footer_h > 0: grid_rows = f"auto {_body_row_h}px {_footer_h}px"