Phase X-BX' XBX-1,3,5,6 완료: 유형 B 파이프라인 정상 동작

- XBX-1: normalizer 불릿 depth 보존 (D1/D2 마커) + 조립 로직 계층 반영 - XBX-3: 하단 구조 개선 — 하나의 큰 박스 안에 중제목 헤더 + 세로 구분선 2분할 - XBX-5: before→filled→after 파이프라인 연결 확인 (filled 2.2MB, 측정/재배분 정상) - XBX-6: Type B에서 Sonnet 재구성 + renderer 스킵 — code_assembled 직접 사용 - final.html: 4,934 bytes → 2.2MB (Type B 정상 출력) - Type A 코드 한 글자도 안 건드림 Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
2026-04-07 06:00:18 +09:00
parent 82f25caa6e
commit 17e77e310f
4 changed files with 114 additions and 23 deletions
--- a/src/mdx_normalizer.py
+++ b/src/mdx_normalizer.py
@@ -274,9 +274,10 @@ def _extract_structure(text: str) -> dict[str, Any]:
    current_section_lines = []

    current_section_level = 2
+    bullet_depth = 0  # 불릿 중첩 깊이 추적 (bullet_list_open/close)

    def _flush_section():
-        nonlocal current_section_title, current_section_lines, current_section_level
+        nonlocal current_section_title, current_section_lines, current_section_level, bullet_depth
        if current_section_title:
            sections.append({
                "level": current_section_level,
@@ -285,6 +286,7 @@ def _extract_structure(text: str) -> dict[str, Any]:
            })
            current_section_lines = []
            current_section_level = 2
+            bullet_depth = 0

    for i, token in enumerate(tokens):
        # 이미지 추출 (inline children)
@@ -330,6 +332,13 @@ def _extract_structure(text: str) -> dict[str, Any]:
            if table["headers"] or table["rows"]:
                tables.append(table)

+        # 불릿 depth 추적 (섹션 내용 수집 시 계층 보존)
+        if current_section_title:
+            if token.type == "bullet_list_open":
+                bullet_depth += 1
+            elif token.type == "bullet_list_close":
+                bullet_depth = max(0, bullet_depth - 1)
+
        # 섹션 추출 (## 및 ### 기준 — 대목차/소목차 모두)
        if token.type == "heading_open" and token.tag in ("h2", "h3"):
            # 다음 토큰이 inline (제목 텍스트) — 무의미한 제목(<br/> 등)은 건너뜀
@@ -349,7 +358,12 @@ def _extract_structure(text: str) -> dict[str, Any]:
            # heading의 inline은 제목이므로 건너뜀 (이미 current_section_title에 저장)
            parent_type = tokens[i - 1].type if i > 0 else ""
            if parent_type != "heading_open":
-                current_section_lines.append(token.content)
+                # depth prefix 추가: D1=1단 불릿, D2=2단 불릿, D3=3단 불릿
+                depth = max(1, bullet_depth) if bullet_depth > 0 else 0
+                if depth > 0:
+                    current_section_lines.append(f"D{depth}: {token.content}")
+                else:
+                    current_section_lines.append(token.content)

    _flush_section()