From 17e77e310ffcb2f26b884cfa892349d729594f47 Mon Sep 17 00:00:00 2001
From: kyeongmin <b24009@hanmaceng.co.kr>
Date: Tue, 7 Apr 2026 06:00:18 +0900
Subject: [PATCH] =?UTF-8?q?Phase=20X-BX'=20XBX-1,3,5,6=20=EC=99=84?=
 =?UTF-8?q?=EB=A3=8C:=20=EC=9C=A0=ED=98=95=20B=20=ED=8C=8C=EC=9D=B4?=
 =?UTF-8?q?=ED=94=84=EB=9D=BC=EC=9D=B8=20=EC=A0=95=EC=83=81=20=EB=8F=99?=
 =?UTF-8?q?=EC=9E=91?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- XBX-1: normalizer 불릿 depth 보존 (D1/D2 마커) + 조립 로직 계층 반영
- XBX-3: 하단 구조 개선 — 하나의 큰 박스 안에 중제목 헤더 + 세로 구분선 2분할
- XBX-5: before→filled→after 파이프라인 연결 확인 (filled 2.2MB, 측정/재배분 정상)
- XBX-6: Type B에서 Sonnet 재구성 + renderer 스킵 — code_assembled 직접 사용
- final.html: 4,934 bytes → 2.2MB (Type B 정상 출력)
- Type A 코드 한 글자도 안 건드림

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 scripts/assemble_stage2.py | 52 ++++++++++++++++++++++++++++++-------
 src/block_assembler.py     | 53 ++++++++++++++++++++++++++++++--------
 src/mdx_normalizer.py      | 18 +++++++++++--
 src/pipeline.py            | 14 ++++++++++
 4 files changed, 114 insertions(+), 23 deletions(-)
diff --git a/scripts/assemble_stage2.py b/scripts/assemble_stage2.py
index ef02dd6..444696f 100644
--- a/scripts/assemble_stage2.py
+++ b/scripts/assemble_stage2.py
@@ -727,7 +727,7 @@ def _assemble_type_b(run: Path, ctx: dict):
             links = " ".join(f'<span style="color:#2563eb;font-size:{font_size-2}px;cursor:pointer;">[{t}→]</span>' for t in popup_titles)
             popup_html = f'<div style="position:absolute;top:4px;right:8px;text-align:right;z-index:1;">{links}</div>'
 
-        # 소제목(###) + 불릿을 카드형으로 분리
+        # 소제목(### 또는 D1:) + 불릿(D2:)을 카드형으로 분리
         sections = []  # [(소제목, [불릿들])]
         current_section = ("", [])
         for line in content_lines:
@@ -735,6 +735,18 @@ def _assemble_type_b(run: Path, ctx: dict):
                 if current_section[0] or current_section[1]:
                     sections.append(current_section)
                 current_section = (line.lstrip("# ").strip(), [])
+            elif re.match(r'^D1:\s*', line):
+                # D1 = 1단 불릿 = 소제목 (카드 제목)
+                title_text = re.sub(r'^D1:\s*', '', line).lstrip("• ")
+                if current_section[0] or current_section[1]:
+                    sections.append(current_section)
+                current_section = (bold(title_text, rn), [])
+            elif re.match(r'^D[2-9]:\s*', line):
+                # D2+ = 하위 불릿 = 본문
+                clean = re.sub(r'^D[2-9]:\s*', '', line).lstrip("• ")
+                if clean.startswith("출처:"):
+                    continue
+                current_section[1].append(bold(clean, rn))
             else:
                 clean = line.lstrip("• ")
                 if clean.startswith("출처:"):
@@ -864,9 +876,18 @@ def _assemble_type_b(run: Path, ctx: dict):
             stripped = line.strip()
             if not stripped:
                 continue
+            # D마커 제거 + depth별 스타일
+            depth = 1
+            dm = re.match(r'^D(\d+):\s*', stripped)
+            if dm:
+                depth = int(dm.group(1))
+                stripped = re.sub(r'^D\d+:\s*', '', stripped)
             clean = stripped.lstrip("- ").lstrip("• ")
             clean = bold(clean, rn)
-            bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
+            pad = bl_indent * depth
+            fs = font_size if depth == 1 else font_size - 1
+            weight = "font-weight:600;" if depth == 1 else ""
+            bullets += f'<div style="padding-left:{pad}px;font-size:{fs}px;margin-bottom:2px;{weight}">• {clean}</div>\n'
 
         bl_html = (
             f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
@@ -897,10 +918,21 @@ def _assemble_type_b(run: Path, ctx: dict):
         if not table_summaries:
             # 표 요약 없으면 content 그대로
             for line in content_lines_br:
-                clean = line.strip().lstrip("- ").lstrip("• ")
+                stripped = line.strip()
+                if not stripped:
+                    continue
+                depth = 1
+                dm = re.match(r'^D(\d+):\s*', stripped)
+                if dm:
+                    depth = int(dm.group(1))
+                    stripped = re.sub(r'^D\d+:\s*', '', stripped)
+                clean = stripped.lstrip("- ").lstrip("• ")
                 if clean:
                     clean = bold(clean, rn)
-                    bullets += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
+                    pad = bl_indent * depth
+                    fs = font_size if depth == 1 else font_size - 1
+                    weight = "font-weight:600;" if depth == 1 else ""
+                    bullets += f'<div style="padding-left:{pad}px;font-size:{fs}px;margin-bottom:2px;{weight}">• {clean}</div>\n'
 
         # X'-6: 본문 표 요약이 있으면 하단 우측에 추가
         table_summaries = enh.get("table_summaries", {})
@@ -976,13 +1008,13 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa
 <div style="position:absolute;left:{pad}px;top:{top_top}px;width:{inner_w}px;height:{top_h}px;border:2px solid {_color_palette[0]};border-radius:6px;overflow:hidden;">
 {top_html}</div>
 
-<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;">
-<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{bold(bottom_title, "")}</div>
-<div style="display:flex;gap:{gap_block}px;height:calc(100% - {int(font_size*1.5 + 4)}px);">
-<div style="flex:1;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
+<div style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
+<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;padding:{gap_small}px {gap_small}px 4px;border-bottom:1px solid #e2e8f0;">{bold(bottom_title, "")}</div>
+<div style="display:flex;height:calc(100% - {int(font_size*1.5 + gap_small + 5)}px);">
+<div style="flex:1;overflow:hidden;">
 {bl_html}</div>
-
-<div style="flex:1;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
+<div style="width:1px;background:#cbd5e1;flex-shrink:0;"></div>
+<div style="flex:1;overflow:hidden;">
 {br_html}</div>
 </div></div>
 
diff --git a/src/block_assembler.py b/src/block_assembler.py
index 10b2331..25ca9ba 100644
--- a/src/block_assembler.py
+++ b/src/block_assembler.py
@@ -585,7 +585,7 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") ->
 
         popup_html = _popup_links_html(popup_titles, font_size)
 
-        # 소제목(###) + 불릿을 카드형으로 분리
+        # 소제목(### 또는 D1:) + 불릿(D2:)을 카드형으로 분리
         sections = []
         current_section = ("", [])
         for line in content_lines:
@@ -593,6 +593,18 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") ->
                 if current_section[0] or current_section[1]:
                     sections.append(current_section)
                 current_section = (line.lstrip("# ").strip(), [])
+            elif re.match(r'^D1:\s*', line):
+                # D1 = 1단 불릿 = 소제목 (카드 제목)
+                title_text = re.sub(r'^D1:\s*', '', line).lstrip("• ")
+                if current_section[0] or current_section[1]:
+                    sections.append(current_section)
+                current_section = (_bold(title_text, rn), [])
+            elif re.match(r'^D[2-9]:\s*', line):
+                # D2+ = 하위 불릿 = 본문
+                clean = re.sub(r'^D[2-9]:\s*', '', line).lstrip("• ")
+                if clean.startswith("출처:"):
+                    continue
+                current_section[1].append(_bold(clean, rn))
             else:
                 clean = line.lstrip("• ")
                 if clean.startswith("출처:"):
@@ -703,9 +715,18 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") ->
             stripped = line.strip()
             if not stripped:
                 continue
+            # D마커 제거 + depth별 스타일
+            depth = 1
+            dm = re.match(r'^D(\d+):\s*', stripped)
+            if dm:
+                depth = int(dm.group(1))
+                stripped = re.sub(r'^D\d+:\s*', '', stripped)
             clean = stripped.lstrip("- ").lstrip("• ")
             clean = _bold(clean, rn)
-            bul += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
+            pad = bl_indent * depth
+            fs = font_size if depth == 1 else font_size - 1
+            weight = "font-weight:600;" if depth == 1 else ""
+            bul += f'<div style="padding-left:{pad}px;font-size:{fs}px;margin-bottom:2px;{weight}">• {clean}</div>\n'
 
         bl_html = (
             f'<div style="height:100%;padding:{gap_small}px;box-sizing:border-box;">'
@@ -732,10 +753,21 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") ->
         bul = ""
         if not table_summaries:
             for line in sub_content_br.split("\n"):
-                clean = line.strip().lstrip("- ").lstrip("• ")
+                stripped = line.strip()
+                if not stripped:
+                    continue
+                depth = 1
+                dm = re.match(r'^D(\d+):\s*', stripped)
+                if dm:
+                    depth = int(dm.group(1))
+                    stripped = re.sub(r'^D\d+:\s*', '', stripped)
+                clean = stripped.lstrip("- ").lstrip("• ")
                 if clean:
                     clean = _bold(clean, rn)
-                    bul += f'<div style="padding-left:{bl_indent}px;font-size:{font_size}px;margin-bottom:2px;">• {clean}</div>\n'
+                    pad = bl_indent * depth
+                    fs = font_size if depth == 1 else font_size - 1
+                    weight = "font-weight:600;" if depth == 1 else ""
+                    bul += f'<div style="padding-left:{pad}px;font-size:{fs}px;margin-bottom:2px;{weight}">• {clean}</div>\n'
 
         # 표 요약 HTML
         table_html_br = ""
@@ -810,14 +842,13 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa
 <span style="position:absolute;top:2px;left:4px;font-size:7px;color:{_color_palette[0]};opacity:0.5;">상단 ({inner_w}x{top_h}px)</span>
 {top_html}</div>
 
-<div class="area-bottom" style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;">
-<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;margin-bottom:4px;">{_bold(bottom_title, "")}</div>
-<div style="display:flex;gap:{gap_block}px;height:calc(100% - {int(font_size*1.5 + 4)}px);">
-<div class="area-bottom-left" style="flex:1;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
-<span style="position:absolute;top:2px;left:4px;font-size:7px;color:{_color_palette[1]};opacity:0.5;">하단좌 ({bottom_col_w}px)</span>
+<div class="area-bottom" style="position:absolute;left:{pad}px;top:{bottom_top}px;width:{inner_w}px;height:{bottom_h}px;border:2px solid {_color_palette[1]};border-radius:6px;overflow:hidden;">
+<div style="font-weight:700;font-size:{font_size+1}px;color:#1a365d;padding:{gap_small}px {gap_small}px 4px;border-bottom:1px solid #e2e8f0;">{_bold(bottom_title, "")}</div>
+<div style="display:flex;height:calc(100% - {int(font_size*1.5 + gap_small + 5)}px);">
+<div class="area-bottom-left" style="flex:1;overflow:hidden;">
 {bl_html}</div>
-<div class="area-bottom-right" style="flex:1;border:2px solid {_color_palette[2]};border-radius:6px;overflow:hidden;">
-<span style="position:absolute;top:2px;left:4px;font-size:7px;color:{_color_palette[2]};opacity:0.5;">하단우 ({bottom_col_w}px)</span>
+<div style="width:1px;background:#cbd5e1;flex-shrink:0;"></div>
+<div class="area-bottom-right" style="flex:1;overflow:hidden;">
 {br_html}</div>
 </div></div>
 
diff --git a/src/mdx_normalizer.py b/src/mdx_normalizer.py
index d5a2e43..05e7834 100644
--- a/src/mdx_normalizer.py
+++ b/src/mdx_normalizer.py
@@ -274,9 +274,10 @@ def _extract_structure(text: str) -> dict[str, Any]:
     current_section_lines = []
 
     current_section_level = 2
+    bullet_depth = 0  # 불릿 중첩 깊이 추적 (bullet_list_open/close)
 
     def _flush_section():
-        nonlocal current_section_title, current_section_lines, current_section_level
+        nonlocal current_section_title, current_section_lines, current_section_level, bullet_depth
         if current_section_title:
             sections.append({
                 "level": current_section_level,
@@ -285,6 +286,7 @@ def _extract_structure(text: str) -> dict[str, Any]:
             })
             current_section_lines = []
             current_section_level = 2
+            bullet_depth = 0
 
     for i, token in enumerate(tokens):
         # 이미지 추출 (inline children)
@@ -330,6 +332,13 @@ def _extract_structure(text: str) -> dict[str, Any]:
             if table["headers"] or table["rows"]:
                 tables.append(table)
 
+        # 불릿 depth 추적 (섹션 내용 수집 시 계층 보존)
+        if current_section_title:
+            if token.type == "bullet_list_open":
+                bullet_depth += 1
+            elif token.type == "bullet_list_close":
+                bullet_depth = max(0, bullet_depth - 1)
+
         # 섹션 추출 (## 및 ### 기준 — 대목차/소목차 모두)
         if token.type == "heading_open" and token.tag in ("h2", "h3"):
             # 다음 토큰이 inline (제목 텍스트) — 무의미한 제목(<br/> 등)은 건너뜀
@@ -349,7 +358,12 @@ def _extract_structure(text: str) -> dict[str, Any]:
             # heading의 inline은 제목이므로 건너뜀 (이미 current_section_title에 저장)
             parent_type = tokens[i - 1].type if i > 0 else ""
             if parent_type != "heading_open":
-                current_section_lines.append(token.content)
+                # depth prefix 추가: D1=1단 불릿, D2=2단 불릿, D3=3단 불릿
+                depth = max(1, bullet_depth) if bullet_depth > 0 else 0
+                if depth > 0:
+                    current_section_lines.append(f"D{depth}: {token.content}")
+                else:
+                    current_section_lines.append(token.content)
 
     _flush_section()
 
diff --git a/src/pipeline.py b/src/pipeline.py
index 9e35406..1995ceb 100644
--- a/src/pipeline.py
+++ b/src/pipeline.py
@@ -899,6 +899,14 @@ async def generate_slide(
         yield {"event": "progress", "data": "3/7 슬라이드 HTML 생성 중..."}
 
         async def stage_2(context: PipelineContext) -> dict:
+            # Phase X-BX': Type B는 code_assembled 직접 사용, Sonnet 재구성 스킵
+            if context.analysis.layout_template == "B":
+                from src.block_assembler import assemble_slide_html
+                generated = assemble_slide_html(context)
+                logger.info("[Stage 2] Type B: code_assembled 직접 사용 (Sonnet 스킵)")
+                return {"generated_html": generated}
+
+            # Type A: 기존 Sonnet 재구성 코드 그대로
             from src.content_verifier import generate_with_retry
 
             # PipelineContext → 기존 함수 인터페이스로 변환
@@ -960,6 +968,12 @@ async def generate_slide(
         yield {"event": "progress", "data": "4/7 슬라이드 조립 중..."}
 
         async def stage_3(context: PipelineContext) -> dict:
+            # Phase X-BX': Type B는 Stage 2에서 이미 완전한 HTML → renderer 스킵
+            if context.analysis.layout_template == "B":
+                logger.info("[Stage 3] Type B: renderer 스킵 (generated_html 직접 사용)")
+                return {"rendered_html": context.generated_html}
+
+            # Type A: 기존 renderer 코드 그대로
             from src.renderer import render_slide_from_html
 
             analysis_dict = {