X' 핵심 수정: MDX sections에서 직접 텍스트 가져오기 + normalizer ### 지원

핵심 변경: - mdx_normalizer: ### (h3) 소목차도 section으로 분리 (기존 ## 만) - _assemble_type_b: Kei structured_text 대신 normalized.sections에서 직접 텍스트 - 대목차/소목차 계층 구조 그대로 반영 결과: - 슬라이드 제목: 원본 MDX frontmatter 그대로 - 대목차: "DX 기반 Process 혁신에 따른 주체별 기대효과" - 소목차 좌: "업무 수행 과정(Process)의 변화" - 소목차 우: "DX 시행 주체별 기대효과" + 팝업 링크 + Kei 요약 표 - 캡션: normalized.images alt text Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-06 12:22:09 +09:00
parent 6b17f448eb
commit 3719704d75
2 changed files with 80 additions and 75 deletions
--- a/scripts/assemble_stage2.py
+++ b/scripts/assemble_stage2.py
@@ -677,15 +677,19 @@ def _assemble_type_b(run: Path, ctx: dict):
    bottom_h = column_bottom - bottom_top
    bottom_col_w = (inner_w - gap_block) // 2
-    # ── 역할별 HTML 조립 ──
+    # ── normalized.sections에서 직접 텍스트 가져오기 ──
    norm_sections = ctx.get("normalized", {}).get("sections", [])
    font_size = fh.get("core", 12)
-    # 상단 (텍스트 + 이미지 나란히)
+    # 상단 (텍스트 + 이미지 나란히) — sections[0] 사용
    top_html = ""
    if top_role:
        rn, info = top_role
        tids = info.get("topic_ids", [])
-        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
+        # MDX 원본 sections에서 직접 가져오기 (Kei structured_text 대신)
        top_section = norm_sections[0] if norm_sections else {}
        all_text = top_section.get("content", "")
        topic_title_from_section = top_section.get("title", "")
        # 마크다운 bold → HTML
        all_text_clean = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
@@ -792,9 +796,8 @@ def _assemble_type_b(run: Path, ctx: dict):
                f'{caption_html}</div>'
            )
-        # 제목
+        # 제목 — MDX 원본 section 제목 사용
-        primary_topic = topic_map.get(tids[0], {}) if tids else {}
+        topic_title = bold(topic_title_from_section or rn, rn)
        topic_title = bold(primary_topic.get("title", ""), rn)
        # X'-4: 상단 컨테이너 — 내용을 전체 높이에 균등 배분
        top_html = (
@@ -807,75 +810,69 @@ def _assemble_type_b(run: Path, ctx: dict):
            f'{img_block}</div></div>'
        )
-    # 하단 좌측
+    # 하단: normalized.sections에서 직접 매핑
-    bl_html = ""
+    # sections 구조: [level=2 상단, level=2 하단대목차, level=3 하단좌, level=3 하단우, ...]
-    if bottom_left_role:
+    # 하단 대목차 = level=2 두 번째
-        rn, info = bottom_left_role
+    # 하단 소목차들 = level=3
-        tids = info.get("topic_ids", [])
+    bottom_title = ""
-        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
+    sub_sections_from_norm = []  # [(제목, content)]
-        all_text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
+    for s in norm_sections[1:]:  # 상단 제외
        if s["level"] == 2:
            bottom_title = s.get("title", "")
        elif s["level"] == 3:
            sub_sections_from_norm.append((s.get("title", ""), s.get("content", "")))
        primary_topic = topic_map.get(tids[0], {}) if tids else {}
        topic_title = bold(primary_topic.get("title", ""), rn)
        # X'-2: 들여쓰기 계층 (소제목+불릿)
    bl_indent = int(font_size * 1.2)
    # 하단 좌측 = 첫 번째 소목차 (level=3)
    bl_html = ""
    if sub_sections_from_norm and bottom_left_role:
        rn = bottom_left_role[0]
        sub_title, sub_content = sub_sections_from_norm[0]
        sub_content = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', sub_content)
        bullets = ""
-        for line in all_text.split("\n"):
+        for line in sub_content.split("\n"):
            stripped = line.strip()
            if not stripped or re.search(r'\[팝업:|\[이미지:', stripped):
                continue
            if stripped.startswith("### "):
                # 소제목
                sub_title = stripped.lstrip("# ").strip()
                bullets += f'<div style="font-weight:700;font-size:{font_size}px;color:#1e40af;margin-top:{int(font_size*0.4)}px;">{bold(sub_title, rn)}</div>\n'
            else:
                clean = stripped.lstrip("• ")
                clean = bold(clean, rn)
                bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:1px;">• {clean}</div>\n'
        bl_html = (
            f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{topic_title}</div>'
            f'<div style="line-height:1.55;color:#333;">{bullets}</div></div>'
        )
    # 하단 우측
    br_html = ""
    if bottom_right_role:
        rn, info = bottom_right_role
        tids = info.get("topic_ids", [])
        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
        all_text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
        primary_topic = topic_map.get(tids[0], {}) if tids else {}
        topic_title = bold(primary_topic.get("title", ""), rn)
        # 팝업 분리
        popup_titles_br = []
        content_lines_br = []
        for line in all_text.split("\n"):
            stripped = line.strip()
            if not stripped:
                continue
-            popup_match = re.search(r'\[팝업:\s*([^\]]+)\]', stripped)
+            clean = stripped.lstrip("- ").lstrip("• ")
            if popup_match:
                popup_titles_br.append(popup_match.group(1))
                continue
            if re.search(r'\[이미지:', stripped):
                continue
            content_lines_br.append(stripped)
        popup_html_br = ""
        if popup_titles_br:
            links = " ".join(f'<span style="color:#2563eb;font-size:{font_size-2}px;cursor:pointer;">[{t}→]</span>' for t in popup_titles_br)
            popup_html_br = f'<div style="position:absolute;top:4px;right:8px;text-align:right;z-index:1;">{links}</div>'
        bullets = ""
        for line in content_lines_br:
            clean = line.lstrip("• ")
            clean = bold(clean, rn)
-            bullets += f'<div class="bl" style="font-size:{font_size}px;"><span class="bl-m">•</span><span class="bl-t">{clean}</span></div>\n'
+            bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
        bl_html = (
            f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(sub_title, rn)}</div>'
            f'<div style="line-height:1.55;color:#333;">{bullets}</div></div>'
        )
    # 하단 우측 = 두 번째 소목차 (level=3) + 표 요약
    br_html = ""
    if bottom_right_role and len(sub_sections_from_norm) > 1:
        rn = bottom_right_role[0]
        sub_title_br, sub_content_br = sub_sections_from_norm[1]
        sub_content_br = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', sub_content_br)
        content_lines_br = [l.strip() for l in sub_content_br.split("\n") if l.strip()]
        # 팝업 링크 — 소목차 제목으로 팝업 링크 생성
        popup_html_br = ""
        popup_link_title = f"{sub_title_br} 바로가기"
        popup_html_br = (
            f'<div style="position:absolute;top:4px;right:8px;text-align:right;z-index:1;">'
            f'<span style="color:#2563eb;font-size:{font_size-2}px;cursor:pointer;">[{popup_link_title} →]</span></div>'
        )
        # 불릿 — table_summaries가 있으면 표 데이터는 Kei 요약으로 대체되므로 불릿은 간략하게
        table_summaries = enh.get("table_summaries", {})
        bullets = ""
        if not table_summaries:
            # 표 요약 없으면 content 그대로
            for line in content_lines_br:
                clean = line.strip().lstrip("- ").lstrip("• ")
                if clean:
                    clean = bold(clean, rn)
                    bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
        # X'-6: 본문 표 요약이 있으면 하단 우측에 추가
        table_summaries = enh.get("table_summaries", {})
@@ -918,7 +915,7 @@ def _assemble_type_b(run: Path, ctx: dict):
            f'<div style="position:relative;height:100%;padding:{gap_small}px;box-sizing:border-box;'
            f'display:flex;flex-direction:column;">'
            f'{popup_html_br}'
-            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{topic_title}</div>'
+            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(sub_title_br, rn)}</div>'
            f'<div style="font-size:{font_size}px;line-height:1.55;color:#333;flex:1;">{bullets}</div>'
            f'{table_html_br}</div>'
        )
@@ -951,11 +948,15 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa
 <div style="position:absolute;left:{pad}px;top:{top_top}px;width:{inner_w}px;height:{top_h}px;border:2px solid {_color_palette[0]};border-radius:6px;overflow:hidden;">
 {top_html}</div>
-<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{bottom_col_w}px;height:{bottom_h}px;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
+<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;">
 <div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(bottom_title, "")}</div>
 <div style="display:flex;gap:{gap_block}px;height:calc(100% - {int(font_size*1.5 + 4)}px);">
 <div style="flex:1;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
 {bl_html}</div>
-<div style="position:absolute;left:{pad + bottom_col_w + gap_block}px;top:{bottom_top}px;width:{bottom_col_w}px;height:{bottom_h}px;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
+<div style="flex:1;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
 {br_html}</div>
 </div></div>
 <div style="position:absolute;left:{pad}px;top:{ft_top}px;width:{inner_w}px;height:{footer_h}px;border-radius:8px;overflow:hidden;">
 {footer_html}</div>
--- a/src/mdx_normalizer.py
+++ b/src/mdx_normalizer.py
@@ -229,15 +229,18 @@ def _extract_structure(text: str) -> dict[str, Any]:
    current_section_title = ""
    current_section_lines = []
    current_section_level = 2
    def _flush_section():
-        nonlocal current_section_title, current_section_lines
+        nonlocal current_section_title, current_section_lines, current_section_level
        if current_section_title:
            sections.append({
-                "level": 2,
+                "level": current_section_level,
                "title": current_section_title,
                "content": "\n".join(current_section_lines).strip(),
            })
            current_section_lines = []
            current_section_level = 2
    for i, token in enumerate(tokens):
        # 이미지 추출 (inline children)
@@ -283,12 +286,13 @@ def _extract_structure(text: str) -> dict[str, Any]:
            if table["headers"] or table["rows"]:
                tables.append(table)
-        # 섹션 추출 (## 기준)
+        # 섹션 추출 (## 및 ### 기준 — 대목차/소목차 모두)
-        if token.type == "heading_open" and token.tag == "h2":
+        if token.type == "heading_open" and token.tag in ("h2", "h3"):
            _flush_section()
            # 다음 토큰이 inline (제목 텍스트)
            if i + 1 < len(tokens) and tokens[i + 1].type == "inline":
                current_section_title = tokens[i + 1].content
                current_section_level = 2 if token.tag == "h2" else 3
        elif current_section_title and token.type in ("paragraph_open", "bullet_list_open",
                                                       "ordered_list_open", "fence"):
            # 섹션 내용 수집 — inline 토큰의 content만