From 17e77e310ffcb2f26b884cfa892349d729594f47 Mon Sep 17 00:00:00 2001 From: kyeongmin Date: Tue, 7 Apr 2026 06:00:18 +0900 Subject: [PATCH] =?UTF-8?q?Phase=20X-BX'=20XBX-1,3,5,6=20=EC=99=84?= =?UTF-8?q?=EB=A3=8C:=20=EC=9C=A0=ED=98=95=20B=20=ED=8C=8C=EC=9D=B4?= =?UTF-8?q?=ED=94=84=EB=9D=BC=EC=9D=B8=20=EC=A0=95=EC=83=81=20=EB=8F=99?= =?UTF-8?q?=EC=9E=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - XBX-1: normalizer 불릿 depth 보존 (D1/D2 마커) + 조립 로직 계층 반영 - XBX-3: 하단 구조 개선 — 하나의 큰 박스 안에 중제목 헤더 + 세로 구분선 2분할 - XBX-5: before→filled→after 파이프라인 연결 확인 (filled 2.2MB, 측정/재배분 정상) - XBX-6: Type B에서 Sonnet 재구성 + renderer 스킵 — code_assembled 직접 사용 - final.html: 4,934 bytes → 2.2MB (Type B 정상 출력) - Type A 코드 한 글자도 안 건드림 Co-Authored-By: Claude Opus 4.6 (1M context) --- scripts/assemble_stage2.py | 52 ++++++++++++++++++++++++++++++------- src/block_assembler.py | 53 ++++++++++++++++++++++++++++++-------- src/mdx_normalizer.py | 18 +++++++++++-- src/pipeline.py | 14 ++++++++++ 4 files changed, 114 insertions(+), 23 deletions(-) diff --git a/scripts/assemble_stage2.py b/scripts/assemble_stage2.py index ef02dd6..444696f 100644 --- a/scripts/assemble_stage2.py +++ b/scripts/assemble_stage2.py @@ -727,7 +727,7 @@ def _assemble_type_b(run: Path, ctx: dict): links = " ".join(f'[{t}→]' for t in popup_titles) popup_html = f'
{links}
' - # 소제목(###) + 불릿을 카드형으로 분리 + # 소제목(### 또는 D1:) + 불릿(D2:)을 카드형으로 분리 sections = [] # [(소제목, [불릿들])] current_section = ("", []) for line in content_lines: @@ -735,6 +735,18 @@ def _assemble_type_b(run: Path, ctx: dict): if current_section[0] or current_section[1]: sections.append(current_section) current_section = (line.lstrip("# ").strip(), []) + elif re.match(r'^D1:\s*', line): + # D1 = 1단 불릿 = 소제목 (카드 제목) + title_text = re.sub(r'^D1:\s*', '', line).lstrip("• ") + if current_section[0] or current_section[1]: + sections.append(current_section) + current_section = (bold(title_text, rn), []) + elif re.match(r'^D[2-9]:\s*', line): + # D2+ = 하위 불릿 = 본문 + clean = re.sub(r'^D[2-9]:\s*', '', line).lstrip("• ") + if clean.startswith("출처:"): + continue + current_section[1].append(bold(clean, rn)) else: clean = line.lstrip("• ") if clean.startswith("출처:"): @@ -864,9 +876,18 @@ def _assemble_type_b(run: Path, ctx: dict): stripped = line.strip() if not stripped: continue + # D마커 제거 + depth별 스타일 + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) clean = stripped.lstrip("- ").lstrip("• ") clean = bold(clean, rn) - bullets += f'
• {clean}
\n' + pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bullets += f'
• {clean}
\n' bl_html = ( f'
' @@ -897,10 +918,21 @@ def _assemble_type_b(run: Path, ctx: dict): if not table_summaries: # 표 요약 없으면 content 그대로 for line in content_lines_br: - clean = line.strip().lstrip("- ").lstrip("• ") + stripped = line.strip() + if not stripped: + continue + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) + clean = stripped.lstrip("- ").lstrip("• ") if clean: clean = bold(clean, rn) - bullets += f'
• {clean}
\n' + pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bullets += f'
• {clean}
\n' # X'-6: 본문 표 요약이 있으면 하단 우측에 추가 table_summaries = enh.get("table_summaries", {}) @@ -976,13 +1008,13 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa
{top_html}
-
-
{bold(bottom_title, "")}
-
-
+
+
{bold(bottom_title, "")}
+
+
{bl_html}
- -
+
+
{br_html}
diff --git a/src/block_assembler.py b/src/block_assembler.py index 10b2331..25ca9ba 100644 --- a/src/block_assembler.py +++ b/src/block_assembler.py @@ -585,7 +585,7 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") -> popup_html = _popup_links_html(popup_titles, font_size) - # 소제목(###) + 불릿을 카드형으로 분리 + # 소제목(### 또는 D1:) + 불릿(D2:)을 카드형으로 분리 sections = [] current_section = ("", []) for line in content_lines: @@ -593,6 +593,18 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") -> if current_section[0] or current_section[1]: sections.append(current_section) current_section = (line.lstrip("# ").strip(), []) + elif re.match(r'^D1:\s*', line): + # D1 = 1단 불릿 = 소제목 (카드 제목) + title_text = re.sub(r'^D1:\s*', '', line).lstrip("• ") + if current_section[0] or current_section[1]: + sections.append(current_section) + current_section = (_bold(title_text, rn), []) + elif re.match(r'^D[2-9]:\s*', line): + # D2+ = 하위 불릿 = 본문 + clean = re.sub(r'^D[2-9]:\s*', '', line).lstrip("• ") + if clean.startswith("출처:"): + continue + current_section[1].append(_bold(clean, rn)) else: clean = line.lstrip("• ") if clean.startswith("출처:"): @@ -703,9 +715,18 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") -> stripped = line.strip() if not stripped: continue + # D마커 제거 + depth별 스타일 + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) clean = stripped.lstrip("- ").lstrip("• ") clean = _bold(clean, rn) - bul += f'
• {clean}
\n' + pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bul += f'
• {clean}
\n' bl_html = ( f'
' @@ -732,10 +753,21 @@ def _assemble_slide_html_type_b(ctx: "PipelineContext", title_text: str = "") -> bul = "" if not table_summaries: for line in sub_content_br.split("\n"): - clean = line.strip().lstrip("- ").lstrip("• ") + stripped = line.strip() + if not stripped: + continue + depth = 1 + dm = re.match(r'^D(\d+):\s*', stripped) + if dm: + depth = int(dm.group(1)) + stripped = re.sub(r'^D\d+:\s*', '', stripped) + clean = stripped.lstrip("- ").lstrip("• ") if clean: clean = _bold(clean, rn) - bul += f'
• {clean}
\n' + pad = bl_indent * depth + fs = font_size if depth == 1 else font_size - 1 + weight = "font-weight:600;" if depth == 1 else "" + bul += f'
• {clean}
\n' # 표 요약 HTML table_html_br = "" @@ -810,14 +842,13 @@ body{{background:#e5e5e5;padding:10px;font-family:'Pretendard Variable','Noto Sa 상단 ({inner_w}x{top_h}px) {top_html}
-
-
{_bold(bottom_title, "")}
-
-
-하단좌 ({bottom_col_w}px) +
+
{_bold(bottom_title, "")}
+
+
{bl_html}
-
-하단우 ({bottom_col_w}px) +
+
{br_html}
diff --git a/src/mdx_normalizer.py b/src/mdx_normalizer.py index d5a2e43..05e7834 100644 --- a/src/mdx_normalizer.py +++ b/src/mdx_normalizer.py @@ -274,9 +274,10 @@ def _extract_structure(text: str) -> dict[str, Any]: current_section_lines = [] current_section_level = 2 + bullet_depth = 0 # 불릿 중첩 깊이 추적 (bullet_list_open/close) def _flush_section(): - nonlocal current_section_title, current_section_lines, current_section_level + nonlocal current_section_title, current_section_lines, current_section_level, bullet_depth if current_section_title: sections.append({ "level": current_section_level, @@ -285,6 +286,7 @@ def _extract_structure(text: str) -> dict[str, Any]: }) current_section_lines = [] current_section_level = 2 + bullet_depth = 0 for i, token in enumerate(tokens): # 이미지 추출 (inline children) @@ -330,6 +332,13 @@ def _extract_structure(text: str) -> dict[str, Any]: if table["headers"] or table["rows"]: tables.append(table) + # 불릿 depth 추적 (섹션 내용 수집 시 계층 보존) + if current_section_title: + if token.type == "bullet_list_open": + bullet_depth += 1 + elif token.type == "bullet_list_close": + bullet_depth = max(0, bullet_depth - 1) + # 섹션 추출 (## 및 ### 기준 — 대목차/소목차 모두) if token.type == "heading_open" and token.tag in ("h2", "h3"): # 다음 토큰이 inline (제목 텍스트) — 무의미한 제목(
등)은 건너뜀 @@ -349,7 +358,12 @@ def _extract_structure(text: str) -> dict[str, Any]: # heading의 inline은 제목이므로 건너뜀 (이미 current_section_title에 저장) parent_type = tokens[i - 1].type if i > 0 else "" if parent_type != "heading_open": - current_section_lines.append(token.content) + # depth prefix 추가: D1=1단 불릿, D2=2단 불릿, D3=3단 불릿 + depth = max(1, bullet_depth) if bullet_depth > 0 else 0 + if depth > 0: + current_section_lines.append(f"D{depth}: {token.content}") + else: + current_section_lines.append(token.content) _flush_section() diff --git a/src/pipeline.py b/src/pipeline.py index 9e35406..1995ceb 100644 --- a/src/pipeline.py +++ b/src/pipeline.py @@ -899,6 +899,14 @@ async def generate_slide( yield {"event": "progress", "data": "3/7 슬라이드 HTML 생성 중..."} async def stage_2(context: PipelineContext) -> dict: + # Phase X-BX': Type B는 code_assembled 직접 사용, Sonnet 재구성 스킵 + if context.analysis.layout_template == "B": + from src.block_assembler import assemble_slide_html + generated = assemble_slide_html(context) + logger.info("[Stage 2] Type B: code_assembled 직접 사용 (Sonnet 스킵)") + return {"generated_html": generated} + + # Type A: 기존 Sonnet 재구성 코드 그대로 from src.content_verifier import generate_with_retry # PipelineContext → 기존 함수 인터페이스로 변환 @@ -960,6 +968,12 @@ async def generate_slide( yield {"event": "progress", "data": "4/7 슬라이드 조립 중..."} async def stage_3(context: PipelineContext) -> dict: + # Phase X-BX': Type B는 Stage 2에서 이미 완전한 HTML → renderer 스킵 + if context.analysis.layout_template == "B": + logger.info("[Stage 3] Type B: renderer 스킵 (generated_html 직접 사용)") + return {"rendered_html": context.generated_html} + + # Type A: 기존 renderer 코드 그대로 from src.renderer import render_slide_from_html analysis_dict = {