From 3719704d75c05ce1043285f19adefd510561327c Mon Sep 17 00:00:00 2001
From: kyeongmin <b24009@hanmaceng.co.kr>
Date: Mon, 6 Apr 2026 12:22:09 +0900
Subject: [PATCH] =?UTF-8?q?X'=20=ED=95=B5=EC=8B=AC=20=EC=88=98=EC=A0=95:?=
 =?UTF-8?q?=20MDX=20sections=EC=97=90=EC=84=9C=20=EC=A7=81=EC=A0=91=20?=
 =?UTF-8?q?=ED=85=8D=EC=8A=A4=ED=8A=B8=20=EA=B0=80=EC=A0=B8=EC=98=A4?=
 =?UTF-8?q?=EA=B8=B0=20+=20normalizer=20###=20=EC=A7=80=EC=9B=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

핵심 변경:
- mdx_normalizer: ### (h3) 소목차도 section으로 분리 (기존 ## 만)
- _assemble_type_b: Kei structured_text 대신 normalized.sections에서 직접 텍스트
- 대목차/소목차 계층 구조 그대로 반영

결과:
- 슬라이드 제목: 원본 MDX frontmatter 그대로
- 대목차: "DX 기반 Process 혁신에 따른 주체별 기대효과"
- 소목차 좌: "업무 수행 과정(Process)의 변화"
- 소목차 우: "DX 시행 주체별 기대효과" + 팝업 링크 + Kei 요약 표
- 캡션: normalized.images alt text

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 scripts/assemble_stage2.py | 143 +++++++++++++++++++------------------
 src/mdx_normalizer.py      |  12 ++--
 2 files changed, 80 insertions(+), 75 deletions(-)
diff --git a/scripts/assemble_stage2.py b/scripts/assemble_stage2.py
index 2658e25..9da4b50 100644
--- a/scripts/assemble_stage2.py
+++ b/scripts/assemble_stage2.py
@@ -677,15 +677,19 @@ def _assemble_type_b(run: Path, ctx: dict):
     bottom_h = column_bottom - bottom_top
     bottom_col_w = (inner_w - gap_block) // 2
 
-    # ── 역할별 HTML 조립 ──
+    # ── normalized.sections에서 직접 텍스트 가져오기 ──
+    norm_sections = ctx.get("normalized", {}).get("sections", [])
     font_size = fh.get("core", 12)
 
-    # 상단 (텍스트 + 이미지 나란히)
+    # 상단 (텍스트 + 이미지 나란히) — sections[0] 사용
     top_html = ""
     if top_role:
         rn, info = top_role
         tids = info.get("topic_ids", [])
-        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
+        # MDX 원본 sections에서 직접 가져오기 (Kei structured_text 대신)
+        top_section = norm_sections[0] if norm_sections else {}
+        all_text = top_section.get("content", "")
+        topic_title_from_section = top_section.get("title", "")
         # 마크다운 bold → HTML
         all_text_clean = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
 
@@ -792,9 +796,8 @@ def _assemble_type_b(run: Path, ctx: dict):
                 f'{caption_html}</div>'
             )
 
-        # 제목
-        primary_topic = topic_map.get(tids[0], {}) if tids else {}
-        topic_title = bold(primary_topic.get("title", ""), rn)
+        # 제목 — MDX 원본 section 제목 사용
+        topic_title = bold(topic_title_from_section or rn, rn)
 
         # X'-4: 상단 컨테이너 — 내용을 전체 높이에 균등 배분
         top_html = (
@@ -807,75 +810,69 @@ def _assemble_type_b(run: Path, ctx: dict):
             f'{img_block}</div></div>'
         )
 
-    # 하단 좌측
+    # 하단: normalized.sections에서 직접 매핑
+    # sections 구조: [level=2 상단, level=2 하단대목차, level=3 하단좌, level=3 하단우, ...]
+    # 하단 대목차 = level=2 두 번째
+    # 하단 소목차들 = level=3
+    bottom_title = ""
+    sub_sections_from_norm = []  # [(제목, content)]
+    for s in norm_sections[1:]:  # 상단 제외
+        if s["level"] == 2:
+            bottom_title = s.get("title", "")
+        elif s["level"] == 3:
+            sub_sections_from_norm.append((s.get("title", ""), s.get("content", "")))
+
+    bl_indent = int(font_size * 1.2)
+
+    # 하단 좌측 = 첫 번째 소목차 (level=3)
     bl_html = ""
-    if bottom_left_role:
-        rn, info = bottom_left_role
-        tids = info.get("topic_ids", [])
-        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
-        all_text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
+    if sub_sections_from_norm and bottom_left_role:
+        rn = bottom_left_role[0]
+        sub_title, sub_content = sub_sections_from_norm[0]
+        sub_content = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', sub_content)
 
-        primary_topic = topic_map.get(tids[0], {}) if tids else {}
-        topic_title = bold(primary_topic.get("title", ""), rn)
-
-        # X'-2: 들여쓰기 계층 (소제목+불릿)
-        bl_indent = int(font_size * 1.2)
         bullets = ""
-        for line in all_text.split("\n"):
-            stripped = line.strip()
-            if not stripped or re.search(r'\[팝업:|\[이미지:', stripped):
-                continue
-            if stripped.startswith("### "):
-                # 소제목
-                sub_title = stripped.lstrip("# ").strip()
-                bullets += f'<div style="font-weight:700;font-size:{font_size}px;color:#1e40af;margin-top:{int(font_size*0.4)}px;">{bold(sub_title, rn)}</div>\n'
-            else:
-                clean = stripped.lstrip("• ")
-                clean = bold(clean, rn)
-                bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:1px;">• {clean}</div>\n'
-
-        bl_html = (
-            f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
-            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{topic_title}</div>'
-            f'<div style="line-height:1.55;color:#333;">{bullets}</div></div>'
-        )
-
-    # 하단 우측
-    br_html = ""
-    if bottom_right_role:
-        rn, info = bottom_right_role
-        tids = info.get("topic_ids", [])
-        all_text = "\n".join(get_text(topic_map.get(tid, {})) for tid in tids if topic_map.get(tid))
-        all_text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', all_text)
-
-        primary_topic = topic_map.get(tids[0], {}) if tids else {}
-        topic_title = bold(primary_topic.get("title", ""), rn)
-
-        # 팝업 분리
-        popup_titles_br = []
-        content_lines_br = []
-        for line in all_text.split("\n"):
+        for line in sub_content.split("\n"):
             stripped = line.strip()
             if not stripped:
                 continue
-            popup_match = re.search(r'\[팝업:\s*([^\]]+)\]', stripped)
-            if popup_match:
-                popup_titles_br.append(popup_match.group(1))
-                continue
-            if re.search(r'\[이미지:', stripped):
-                continue
-            content_lines_br.append(stripped)
-
-        popup_html_br = ""
-        if popup_titles_br:
-            links = " ".join(f'<span style="color:#2563eb;font-size:{font_size-2}px;cursor:pointer;">[{t}→]</span>' for t in popup_titles_br)
-            popup_html_br = f'<div style="position:absolute;top:4px;right:8px;text-align:right;z-index:1;">{links}</div>'
-
-        bullets = ""
-        for line in content_lines_br:
-            clean = line.lstrip("• ")
+            clean = stripped.lstrip("- ").lstrip("• ")
             clean = bold(clean, rn)
-            bullets += f'<div class="bl" style="font-size:{font_size}px;"><span class="bl-m">•</span><span class="bl-t">{clean}</span></div>\n'
+            bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
+
+        bl_html = (
+            f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
+            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(sub_title, rn)}</div>'
+            f'<div style="line-height:1.55;color:#333;">{bullets}</div></div>'
+        )
+
+    # 하단 우측 = 두 번째 소목차 (level=3) + 표 요약
+    br_html = ""
+    if bottom_right_role and len(sub_sections_from_norm) > 1:
+        rn = bottom_right_role[0]
+        sub_title_br, sub_content_br = sub_sections_from_norm[1]
+        sub_content_br = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', sub_content_br)
+
+        content_lines_br = [l.strip() for l in sub_content_br.split("\n") if l.strip()]
+
+        # 팝업 링크 — 소목차 제목으로 팝업 링크 생성
+        popup_html_br = ""
+        popup_link_title = f"{sub_title_br} 바로가기"
+        popup_html_br = (
+            f'<div style="position:absolute;top:4px;right:8px;text-align:right;z-index:1;">'
+            f'<span style="color:#2563eb;font-size:{font_size-2}px;cursor:pointer;">[{popup_link_title} →]</span></div>'
+        )
+
+        # 불릿 — table_summaries가 있으면 표 데이터는 Kei 요약으로 대체되므로 불릿은 간략하게
+        table_summaries = enh.get("table_summaries", {})
+        bullets = ""
+        if not table_summaries:
+            # 표 요약 없으면 content 그대로
+            for line in content_lines_br:
+                clean = line.strip().lstrip("- ").lstrip("• ")
+                if clean:
+                    clean = bold(clean, rn)
+                    bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
 
         # X'-6: 본문 표 요약이 있으면 하단 우측에 추가
         table_summaries = enh.get("table_summaries", {})
@@ -918,7 +915,7 @@ def _assemble_type_b(run: Path, ctx: dict):
             f'<div style="position:relative;height:100%;padding:{gap_small}px;box-sizing:border-box;'
             f'display:flex;flex-direction:column;">'
             f'{popup_html_br}'
-            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{topic_title}</div>'
+            f'<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(sub_title_br, rn)}</div>'
             f'<div style="font-size:{font_size}px;line-height:1.55;color:#333;flex:1;">{bullets}</div>'
             f'{table_html_br}</div>'
         )
@@ -951,11 +948,15 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa
 <div style="position:absolute;left:{pad}px;top:{top_top}px;width:{inner_w}px;height:{top_h}px;border:2px solid {_color_palette[0]};border-radius:6px;overflow:hidden;">
 {top_html}</div>
 
-<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{bottom_col_w}px;height:{bottom_h}px;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
+<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;">
+<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(bottom_title, "")}</div>
+<div style="display:flex;gap:{gap_block}px;height:calc(100% - {int(font_size*1.5 + 4)}px);">
+<div style="flex:1;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
 {bl_html}</div>
 
-<div style="position:absolute;left:{pad + bottom_col_w + gap_block}px;top:{bottom_top}px;width:{bottom_col_w}px;height:{bottom_h}px;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
+<div style="flex:1;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
 {br_html}</div>
+</div></div>
 
 <div style="position:absolute;left:{pad}px;top:{ft_top}px;width:{inner_w}px;height:{footer_h}px;border-radius:8px;overflow:hidden;">
 {footer_html}</div>
diff --git a/src/mdx_normalizer.py b/src/mdx_normalizer.py
index 6556276..3236761 100644
--- a/src/mdx_normalizer.py
+++ b/src/mdx_normalizer.py
@@ -229,15 +229,18 @@ def _extract_structure(text: str) -> dict[str, Any]:
     current_section_title = ""
     current_section_lines = []
 
+    current_section_level = 2
+
     def _flush_section():
-        nonlocal current_section_title, current_section_lines
+        nonlocal current_section_title, current_section_lines, current_section_level
         if current_section_title:
             sections.append({
-                "level": 2,
+                "level": current_section_level,
                 "title": current_section_title,
                 "content": "\n".join(current_section_lines).strip(),
             })
             current_section_lines = []
+            current_section_level = 2
 
     for i, token in enumerate(tokens):
         # 이미지 추출 (inline children)
@@ -283,12 +286,13 @@ def _extract_structure(text: str) -> dict[str, Any]:
             if table["headers"] or table["rows"]:
                 tables.append(table)
 
-        # 섹션 추출 (## 기준)
-        if token.type == "heading_open" and token.tag == "h2":
+        # 섹션 추출 (## 및 ### 기준 — 대목차/소목차 모두)
+        if token.type == "heading_open" and token.tag in ("h2", "h3"):
             _flush_section()
             # 다음 토큰이 inline (제목 텍스트)
             if i + 1 < len(tokens) and tokens[i + 1].type == "inline":
                 current_section_title = tokens[i + 1].content
+                current_section_level = 2 if token.tag == "h2" else 3
         elif current_section_title and token.type in ("paragraph_open", "bullet_list_open",
                                                        "ordered_list_open", "fence"):
             # 섹션 내용 수집 — inline 토큰의 content만