diff --git a/app.py b/app.py
index 81ab3ce..27c2e80 100644
--- a/app.py
+++ b/app.py
@@ -13,8 +13,13 @@ from flask import Flask, render_template, request, jsonify, Response, session
 from datetime import datetime
 import io
 import re
+from flask import send_file
+from datetime import datetime
+import tempfile
+from converters.pipeline.router import process_document
 from api_config import API_KEYS
 
+
 app = Flask(__name__)
 app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max
 app.config['SECRET_KEY'] = os.environ.get('SECRET_KEY', 'geulbeot-light-secret-key-v2')
@@ -81,7 +86,6 @@ def get_refine_prompt():
 
 위 피드백을 반영하여 수정된 완전한 HTML을 출력하세요."""
 
-
 # ============== API 호출 함수 ==============
 
 def call_claude(system_prompt, user_message, max_tokens=8000):
@@ -479,6 +483,45 @@ def hwp_script():
     """HWP 변환 스크립트 안내"""
     return render_template('hwp_guide.html')
 
+@app.route('/generate-report', methods=['POST'])
+def generate_report_api():
+    """보고서 생성 API (router 기반)"""
+    try:
+        data = request.get_json() or {}
+        
+        # HTML 내용 (폴더에서 읽거나 직접 입력)
+        content = data.get('content', '')
+        
+        # 옵션
+        options = {
+            'folder_path': data.get('folder_path', ''),
+            'cover': data.get('cover', False),
+            'toc': data.get('toc', False),
+            'divider': data.get('divider', False),
+            'instruction': data.get('instruction', '')
+        }
+        
+        if not content.strip():
+            return jsonify({'error': '내용이 비어있습니다.'}), 400
+        
+        # router로 처리
+        result = process_document(content, options)
+        
+        if result.get('success'):
+            return jsonify(result)
+        else:
+            return jsonify({'error': result.get('error', '처리 실패')}), 500
+            
+    except Exception as e:
+        import traceback
+        return jsonify({'error': str(e), 'trace': traceback.format_exc()}), 500
+
+@app.route('/assets/<path:filename>')
+def serve_assets(filename):
+    """로컬 assets 폴더 서빙"""
+    assets_dir = r"D:\for python\geulbeot-light\geulbeot-light\output\assets"
+    return send_file(os.path.join(assets_dir, filename))
+
 
 @app.route('/health')
 def health():
@@ -486,6 +529,50 @@ def health():
     return jsonify({'status': 'healthy', 'version': '2.0.0'})
 
 
+# ===== HWP 변환 =====
+@app.route('/export-hwp', methods=['POST'])
+def export_hwp():
+    try:
+        data = request.get_json()
+        html_content = data.get('html', '')
+        doc_type = data.get('doc_type', 'briefing')
+        
+        if not html_content:
+            return jsonify({'error': 'HTML 내용이 없습니다'}), 400
+        
+        # 임시 파일 생성
+        temp_dir = tempfile.gettempdir()
+        html_path = os.path.join(temp_dir, 'geulbeot_temp.html')
+        hwp_path = os.path.join(temp_dir, 'geulbeot_output.hwp')
+        
+        # HTML 저장
+        with open(html_path, 'w', encoding='utf-8') as f:
+            f.write(html_content)
+        
+        # 변환기 import 및 실행
+        if doc_type == 'briefing':
+            from converters.html_to_hwp_briefing import HtmlToHwpConverter
+        else:
+            from converters.html_to_hwp import HtmlToHwpConverter
+        
+        converter = HtmlToHwpConverter(visible=False)
+        converter.convert(html_path, hwp_path)
+        converter.close()
+        
+        # 파일 전송
+        return send_file(
+            hwp_path,
+            as_attachment=True,
+            download_name=f'report_{datetime.now().strftime("%Y%m%d_%H%M%S")}.hwp',
+            mimetype='application/x-hwp'
+        )
+        
+    except ImportError as e:
+        return jsonify({'error': f'pyhwpx 필요: {str(e)}'}), 500
+    except Exception as e:
+        return jsonify({'error': str(e)}), 500
+
+
 if __name__ == '__main__':
     port = int(os.environ.get('PORT', 5000))
     debug = os.environ.get('FLASK_DEBUG', 'False').lower() == 'true'
diff --git a/converters/__init__.py b/converters/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/converters/html_to_hwp.py b/converters/html_to_hwp.py
new file mode 100644
index 0000000..0c143d8
--- /dev/null
+++ b/converters/html_to_hwp.py
@@ -0,0 +1,573 @@
+# -*- coding: utf-8 -*-
+"""
+HTML → HWP 변환기 v11
+
+✅ 이미지: sizeoption=0 (원본 크기) 또는 width/height 지정
+✅ 페이지번호: ctrl 코드 방식으로 수정
+✅ 나머지는 v10 유지
+
+pip install pyhwpx beautifulsoup4 pillow
+"""
+
+from pyhwpx import Hwp
+from bs4 import BeautifulSoup, NavigableString
+import os, re
+
+# PIL 선택적 import (이미지 크기 확인용)
+try:
+    from PIL import Image
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+    print("[알림] PIL 없음 - 이미지 원본 크기로 삽입")
+
+class Config:
+    MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
+    HEADER_LEN, FOOTER_LEN = 10, 10
+    MAX_IMAGE_WIDTH = 150  # mm (최대 이미지 너비)
+
+class StyleParser:
+    def __init__(self):
+        self.class_styles = {
+            'h1': {'font-size': '20pt', 'color': '#008000'},
+            'h2': {'font-size': '16pt', 'color': '#03581d'},
+            'h3': {'font-size': '13pt', 'color': '#228B22'},
+            'p': {'font-size': '11pt', 'color': '#333333'},
+            'li': {'font-size': '11pt', 'color': '#333333'},
+            'th': {'font-size': '9pt', 'color': '#006400'},
+            'td': {'font-size': '9.5pt', 'color': '#333333'},
+            'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'},
+            'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'},
+            'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'},
+        }
+    
+    def get_element_style(self, elem):
+        style = {}
+        tag = elem.name if hasattr(elem, 'name') else None
+        if tag and tag in self.class_styles: style.update(self.class_styles[tag])
+        for cls in elem.get('class', []) if hasattr(elem, 'get') else []:
+            if cls in self.class_styles: style.update(self.class_styles[cls])
+        return style
+    
+    def parse_size(self, s):
+        m = re.search(r'([\d.]+)', str(s)) if s else None
+        return float(m.group(1)) if m else 11
+    
+    def parse_color(self, c):
+        if not c: return '#000000'
+        c = str(c).strip().lower()
+        if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper()
+        m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c)
+        return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000'
+    
+    def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']
+
+
+class HtmlToHwpConverter:
+    def __init__(self, visible=True):
+        self.hwp = Hwp(visible=visible)
+        self.cfg = Config()
+        self.sp = StyleParser()
+        self.base_path = ""
+        self.is_first_h1 = True
+        self.image_count = 0
+    
+    def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
+    def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
+    def _rgb(self, c):
+        c = c.lstrip('#')
+        return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0)
+    
+    def _setup_page(self):
+        try:
+            self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
+            s = self.hwp.HParameterSet.HSecDef
+            s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
+            s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
+            s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
+            s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
+            s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
+            s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
+            self.hwp.HAction.Execute("PageSetup", s.HSet)
+        except: pass
+    
+    def _create_header(self, right_text=""):
+        print(f"  → 머리말 생성: {right_text if right_text else '(초기화)'}")
+        try:
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            if right_text:
+                self.hwp.insert_text(right_text)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 머리말: {e}")
+    
+    # ═══════════════════════════════════════════════════════════════
+    # 꼬리말 - 페이지 번호 (수정)
+    # ═══════════════════════════════════════════════════════════════
+    def _create_footer(self, left_text=""):
+        print(f"  → 꼬리말: {left_text}")
+        
+        # 1. 꼬리말 열기
+        self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+        self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+        self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 1)
+        self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+        
+        # 2. 좌측 정렬 + 제목 8pt
+        self.hwp.HAction.Run("ParagraphShapeAlignLeft")
+        self._set_font(8, False, '#666666')
+        self.hwp.insert_text(left_text)
+        
+        # 3. 꼬리말 닫기
+        self.hwp.HAction.Run("CloseEx")
+        
+        # 4. 쪽번호 (우측 하단)
+        self.hwp.HAction.GetDefault("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
+        self.hwp.HParameterSet.HPageNumPos.DrawPos = self.hwp.PageNumPosition("BottomRight")
+        self.hwp.HAction.Execute("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
+
+    def _new_section_with_header(self, header_text):
+        """새 구역 생성 후 머리말 설정"""
+        print(f"    → 새 구역 머리말: {header_text}")
+        try:
+            self.hwp.HAction.Run("BreakSection")
+            
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            self.hwp.HAction.Run("SelectAll")
+            self.hwp.HAction.Run("Delete")
+            
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            self.hwp.insert_text(header_text)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 구역 머리말: {e}")
+
+
+    def _set_font(self, size=11, bold=False, color='#000000'):
+        self.hwp.set_font(FaceName='맑은 고딕', Height=size, Bold=bold, TextColor=self._rgb(color))
+    
+    def _set_para(self, align='justify', lh=170, left=0, indent=0, before=0, after=0):
+        acts = {'left':'ParagraphShapeAlignLeft','center':'ParagraphShapeAlignCenter',
+                'right':'ParagraphShapeAlignRight','justify':'ParagraphShapeAlignJustify'}
+        if align in acts: self.hwp.HAction.Run(acts[align])
+        try:
+            self.hwp.HAction.GetDefault("ParagraphShape", self.hwp.HParameterSet.HParaShape.HSet)
+            p = self.hwp.HParameterSet.HParaShape
+            p.LineSpaceType, p.LineSpacing = 0, lh
+            p.LeftMargin = self._mm(left)
+            p.IndentMargin = self._mm(indent)
+            p.SpaceBeforePara = self._pt(before)
+            p.SpaceAfterPara = self._pt(after)
+            p.BreakNonLatinWord = 0
+            self.hwp.HAction.Execute("ParagraphShape", p.HSet)
+        except: pass
+    
+    def _set_cell_bg(self, color):
+        try:
+            self.hwp.HAction.GetDefault("CellBorderFill", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            p = self.hwp.HParameterSet.HCellBorderFill
+            p.FillAttr.type = self.hwp.BrushType("NullBrush|WinBrush")
+            p.FillAttr.WinBrushFaceStyle = self.hwp.HatchStyle("None")
+            p.FillAttr.WinBrushHatchColor = self._rgb('#000000')
+            p.FillAttr.WinBrushFaceColor = self._rgb(color)
+            p.FillAttr.WindowsBrush = 1
+            self.hwp.HAction.Execute("CellBorderFill", p.HSet)
+        except: pass
+    
+    def _underline_box(self, text, size=14, color='#008000'):
+        try:
+            self.hwp.HAction.GetDefault("TableCreate", self.hwp.HParameterSet.HTableCreation.HSet)
+            t = self.hwp.HParameterSet.HTableCreation
+            t.Rows, t.Cols, t.WidthType, t.HeightType = 1, 1, 0, 0
+            t.WidthValue, t.HeightValue = self._mm(168), self._mm(10)
+            self.hwp.HAction.Execute("TableCreate", t.HSet)
+            self.hwp.HAction.GetDefault("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
+            self.hwp.HParameterSet.HInsertText.Text = text
+            self.hwp.HAction.Execute("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
+            self.hwp.HAction.Run("TableCellBlock")
+            self.hwp.HAction.GetDefault("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
+            self.hwp.HParameterSet.HCharShape.Height = self._pt(size)
+            self.hwp.HParameterSet.HCharShape.TextColor = self._rgb(color)
+            self.hwp.HAction.Execute("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
+            self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            c = self.hwp.HParameterSet.HCellBorderFill
+            c.BorderTypeTop = self.hwp.HwpLineType("None")
+            c.BorderTypeRight = self.hwp.HwpLineType("None")
+            c.BorderTypeLeft = self.hwp.HwpLineType("None")
+            self.hwp.HAction.Execute("CellBorder", c.HSet)
+            self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            c = self.hwp.HParameterSet.HCellBorderFill
+            c.BorderColorBottom = self._rgb(color)
+            c.BorderWidthBottom = self.hwp.HwpLineWidth("0.4mm")
+            self.hwp.HAction.Execute("CellBorder", c.HSet)
+            self.hwp.HAction.Run("Cancel")
+            self.hwp.HAction.Run("CloseEx")
+            self.hwp.HAction.Run("MoveDocEnd")
+        except:
+            self._set_font(size, True, color)
+            self.hwp.insert_text(text)
+            self.hwp.BreakPara()
+    
+    def _update_header(self, new_title):
+        """머리말 텍스트 업데이트"""
+        try:
+            # 기존 머리말 편집 모드로 진입
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 2)  # 편집 모드
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            # 기존 내용 삭제
+            self.hwp.HAction.Run("SelectAll")
+            self.hwp.HAction.Run("Delete")
+            
+            # 새 내용 삽입
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            self.hwp.insert_text(new_title)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 머리말 업데이트: {e}")
+
+    def _insert_heading(self, elem):
+        lv = int(elem.name[1]) if elem.name in ['h1','h2','h3'] else 1
+        txt = elem.get_text(strip=True)
+        st = self.sp.get_element_style(elem)
+        sz = self.sp.parse_size(st.get('font-size','14pt'))
+        cl = self.sp.parse_color(st.get('color','#008000'))
+        
+        if lv == 1:
+            if self.is_first_h1:
+                self._create_header(txt)
+                self.is_first_h1 = False
+            else:
+                self._new_section_with_header(txt)
+            
+            self._set_para('left', 130, before=0, after=0)
+            self._underline_box(txt, sz, cl)
+            self.hwp.BreakPara()
+            self._set_para('left', 130, before=0, after=15)
+            self.hwp.BreakPara()
+        elif lv == 2:
+            self._set_para('left', 150, before=20, after=8)
+            self._set_font(sz, True, cl)
+            self.hwp.insert_text("■ " + txt)
+            self.hwp.BreakPara()
+        elif lv == 3:
+            self._set_para('left', 140, left=3, before=12, after=5)
+            self._set_font(sz, True, cl)
+            self.hwp.insert_text("▸ " + txt)
+            self.hwp.BreakPara()
+    
+    def _insert_paragraph(self, elem):
+        txt = elem.get_text(strip=True)
+        if not txt: return
+        st = self.sp.get_element_style(elem)
+        sz = self.sp.parse_size(st.get('font-size','11pt'))
+        cl = self.sp.parse_color(st.get('color','#333333'))
+        self._set_para('justify', 170, left=0, indent=3, before=0, after=3)
+        
+        if elem.find(['b','strong']):
+            for ch in elem.children:
+                if isinstance(ch, NavigableString):
+                    if str(ch).strip(): self._set_font(sz,False,cl); self.hwp.insert_text(str(ch))
+                elif ch.name in ['b','strong']:
+                    if ch.get_text(): self._set_font(sz,True,cl); self.hwp.insert_text(ch.get_text())
+        else:
+            self._set_font(sz, self.sp.is_bold(st), cl)
+            self.hwp.insert_text(txt)
+        self.hwp.BreakPara()
+    
+    def _insert_list(self, elem):
+        lt = elem.name
+        for i, li in enumerate(elem.find_all('li', recursive=False)):
+            st = self.sp.get_element_style(li)
+            cls = li.get('class', [])
+            txt = li.get_text(strip=True)
+            is_toc = any('toc-' in c for c in cls)
+            
+            if 'toc-lvl-1' in cls: left, bef = 0, 8
+            elif 'toc-lvl-2' in cls: left, bef = 7, 3
+            elif 'toc-lvl-3' in cls: left, bef = 14, 1
+            else: left, bef = 4, 2
+            
+            pf = f"{i+1}. " if lt == 'ol' else "• "
+            sz = self.sp.parse_size(st.get('font-size','11pt'))
+            cl = self.sp.parse_color(st.get('color','#333333'))
+            bd = self.sp.is_bold(st)
+            
+            if is_toc:
+                self._set_para('left', 170, left=left, indent=0, before=bef, after=1)
+                self._set_font(sz, bd, cl)
+                self.hwp.insert_text(pf + txt)
+                self.hwp.BreakPara()
+            else:
+                self._set_para('justify', 170, left=left, indent=0, before=bef, after=1)
+                self._set_font(sz, bd, cl)
+                self.hwp.insert_text(pf)
+                self.hwp.HAction.Run("ParagraphShapeIndentAtCaret")
+                self.hwp.insert_text(txt)
+                self.hwp.BreakPara()
+    
+    def _insert_table(self, table_elem):
+        rows_data, cell_styles, occupied, max_cols = [], {}, {}, 0
+        for ri, tr in enumerate(table_elem.find_all('tr')):
+            row, ci = [], 0
+            for cell in tr.find_all(['td','th']):
+                while (ri,ci) in occupied: row.append(""); ci+=1
+                txt = cell.get_text(strip=True)
+                cs, rs = int(cell.get('colspan',1)), int(cell.get('rowspan',1))
+                cell_styles[(ri,ci)] = {'is_header': cell.name=='th' or ri==0}
+                row.append(txt)
+                for dr in range(rs):
+                    for dc in range(cs):
+                        if dr>0 or dc>0: occupied[(ri+dr,ci+dc)] = True
+                for _ in range(cs-1): row.append("")
+                ci += cs
+            rows_data.append(row)
+            max_cols = max(max_cols, len(row))
+        for row in rows_data:
+            while len(row) < max_cols: row.append("")
+        
+        rc = len(rows_data)
+        if rc == 0 or max_cols == 0: return
+        print(f"    표: {rc}행 × {max_cols}열")
+        
+        self._set_para('left', 130, before=5, after=0)
+        self.hwp.create_table(rc, max_cols, treat_as_char=True)
+        
+        for ri, row in enumerate(rows_data):
+            for ci in range(max_cols):
+                if (ri,ci) in occupied: self.hwp.HAction.Run("MoveRight"); continue
+                txt = row[ci] if ci < len(row) else ""
+                hdr = cell_styles.get((ri,ci),{}).get('is_header', False)
+                if hdr: self._set_cell_bg('#E8F5E9')
+                self.hwp.HAction.Run("ParagraphShapeAlignCenter")
+                self._set_font(9 if hdr else 9.5, hdr, '#006400' if hdr else '#333333')
+                self.hwp.insert_text(str(txt))
+                if not (ri==rc-1 and ci==max_cols-1): self.hwp.HAction.Run("MoveRight")
+        
+        self.hwp.HAction.Run("Cancel")
+        self.hwp.HAction.Run("CloseEx")
+        self.hwp.HAction.Run("MoveDocEnd")
+        self._set_para('left', 130, before=5, after=5)
+        self.hwp.BreakPara()
+    
+    # ═══════════════════════════════════════════════════════════════
+    # 이미지 삽입 - sizeoption 수정 ★
+    # ═══════════════════════════════════════════════════════════════
+    def _insert_image(self, src, caption=""):
+        self.image_count += 1
+        print(f"    📷 이미지 #{self.image_count}: {os.path.basename(src)}")
+        
+        if not src:
+            return
+        
+        # 상대경로 → 절대경로
+        if not os.path.isabs(src):
+            full_path = os.path.normpath(os.path.join(self.base_path, src))
+        else:
+            full_path = src
+        
+        if not os.path.exists(full_path):
+            print(f"       ❌ 파일 없음: {full_path}")
+            self._set_font(9, False, '#999999')
+            self._set_para('center', 130)
+            self.hwp.insert_text(f"[이미지 없음: {os.path.basename(src)}]")
+            self.hwp.BreakPara()
+            return
+        
+        try:
+            self._set_para('center', 130, before=5, after=3)
+            
+            # ★ sizeoption=0: 원본 크기
+            # ★ sizeoption=2: 지정 크기 (width, height 필요)
+            # ★ 둘 다 안되면 sizeoption 없이 시도
+            
+            inserted = False
+            
+            # 방법 1: sizeoption=0 (원본 크기)
+            try:
+                self.hwp.insert_picture(full_path, sizeoption=0)
+                inserted = True
+                print(f"       ✅ 삽입 성공 (원본 크기)")
+            except Exception as e1:
+                pass
+            
+            # 방법 2: width/height 지정
+            if not inserted and HAS_PIL:
+                try:
+                    with Image.open(full_path) as img:
+                        w_px, h_px = img.size
+                    # px → mm 변환 (96 DPI 기준)
+                    w_mm = w_px * 25.4 / 96
+                    h_mm = h_px * 25.4 / 96
+                    # 최대 너비 제한
+                    if w_mm > self.cfg.MAX_IMAGE_WIDTH:
+                        ratio = self.cfg.MAX_IMAGE_WIDTH / w_mm
+                        w_mm = self.cfg.MAX_IMAGE_WIDTH
+                        h_mm = h_mm * ratio
+                    
+                    self.hwp.insert_picture(full_path, sizeoption=1, 
+                                           width=self._mm(w_mm), height=self._mm(h_mm))
+                    inserted = True
+                    print(f"       ✅ 삽입 성공 ({w_mm:.0f}×{h_mm:.0f}mm)")
+                except Exception as e2:
+                    pass
+            
+            # 방법 3: 기본값
+            if not inserted:
+                try:
+                    self.hwp.insert_picture(full_path)
+                    inserted = True
+                    print(f"       ✅ 삽입 성공 (기본)")
+                except Exception as e3:
+                    print(f"       ❌ 삽입 실패: {e3}")
+                    self._set_font(9, False, '#FF0000')
+                    self.hwp.insert_text(f"[이미지 오류: {os.path.basename(src)}]")
+            
+            self.hwp.BreakPara()
+            
+            if caption and inserted:
+                self._set_font(9.5, True, '#666666')
+                self._set_para('center', 130, before=0, after=5)
+                self.hwp.insert_text(caption)
+                self.hwp.BreakPara()
+                
+        except Exception as e:
+            print(f"       ❌ 오류: {e}")
+    
+    def _insert_highlight_box(self, elem):
+        txt = elem.get_text(strip=True)
+        if not txt: return
+        self._set_para('left', 130, before=5, after=0)
+        self.hwp.create_table(1, 1, treat_as_char=True)
+        self._set_cell_bg('#E2ECE2')
+        self._set_font(11, False, '#333333')
+        self.hwp.insert_text(txt)
+        self.hwp.HAction.Run("Cancel")
+        self.hwp.HAction.Run("CloseEx")
+        self.hwp.HAction.Run("MoveDocEnd")
+        self._set_para('left', 130, before=0, after=5)
+        self.hwp.BreakPara()
+    
+    def _process(self, elem):
+        if isinstance(elem, NavigableString): return
+        tag = elem.name
+        if not tag or tag in ['script','style','template','noscript','head']: return
+        
+        if tag == 'figure':
+            img = elem.find('img')
+            if img:
+                figcaption = elem.find('figcaption')
+                caption = figcaption.get_text(strip=True) if figcaption else ""
+                self._insert_image(img.get('src', ''), caption)
+            return
+        
+        if tag == 'img':
+            self._insert_image(elem.get('src', ''))
+            return
+        
+        if tag in ['h1','h2','h3']: self._insert_heading(elem)
+        elif tag == 'p': self._insert_paragraph(elem)
+        elif tag == 'table': self._insert_table(elem)
+        elif tag in ['ul','ol']: self._insert_list(elem)
+        elif 'highlight-box' in elem.get('class',[]): self._insert_highlight_box(elem)
+        elif tag in ['div','section','article','main','body','html','span']:
+            for ch in elem.children: self._process(ch)
+    
+    def convert(self, html_path, output_path):
+        print("="*60)
+        print("HTML → HWP 변환기 v11")
+        print("  ✓ 이미지: sizeoption 수정")
+        print("  ✓ 페이지번호: 다중 방법 시도")
+        print("="*60)
+        
+        self.base_path = os.path.dirname(os.path.abspath(html_path))
+        self.is_first_h1 = True
+        self.image_count = 0
+        
+        print(f"\n입력: {html_path}")
+        print(f"출력: {output_path}\n")
+        
+        with open(html_path, 'r', encoding='utf-8') as f:
+            soup = BeautifulSoup(f.read(), 'html.parser')
+        
+        title_tag = soup.find('title')
+        if title_tag:
+            full_title = title_tag.get_text(strip=True)
+            footer_title = full_title.split(':')[0].strip()  # ":" 이전
+        else:
+            footer_title = ""
+
+        self.hwp.FileNew()
+        self._setup_page()
+        self._create_footer(footer_title)
+
+        raw = soup.find(id='raw-container')
+        if raw:
+            cover = raw.find(id='box-cover')
+            if cover:
+                print("  → 표지")
+                for ch in cover.children: self._process(ch)
+                self.hwp.HAction.Run("BreakPage")
+            toc = raw.find(id='box-toc')
+            if toc:
+                print("  → 목차")
+                self.is_first_h1 = True
+                self._underline_box("목 차", 20, '#008000')
+                self.hwp.BreakPara(); self.hwp.BreakPara()
+                self._insert_list(toc.find('ul') or toc)
+                self.hwp.HAction.Run("BreakPage")
+            summary = raw.find(id='box-summary')
+            if summary:
+                print("  → 요약")
+                self.is_first_h1 = True
+                self._process(summary)
+                self.hwp.HAction.Run("BreakPage")
+            content = raw.find(id='box-content')
+            if content:
+                print("  → 본문")
+                self.is_first_h1 = True
+                self._process(content)
+        else:
+            self._process(soup.find('body') or soup)
+        
+        self.hwp.SaveAs(output_path)
+        print(f"\n✅ 저장: {output_path}")
+        print(f"   이미지: {self.image_count}개 처리")
+    
+    def close(self):
+        try: self.hwp.Quit()
+        except: pass
+
+
+def main():
+    html_path = r"D:\for python\survey_test\output\generated\report.html"
+    output_path = r"D:\for python\survey_test\output\generated\report_v12.hwp"
+    
+    try:
+        conv = HtmlToHwpConverter(visible=True)
+        conv.convert(html_path, output_path)
+        input("\nEnter를 누르면 HWP가 닫힙니다...")  # ← 선택사항
+        conv.close()
+    except Exception as e:
+        print(f"\n[에러] {e}")
+        import traceback; traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/converters/html_to_hwp_briefing.py b/converters/html_to_hwp_briefing.py
new file mode 100644
index 0000000..0c143d8
--- /dev/null
+++ b/converters/html_to_hwp_briefing.py
@@ -0,0 +1,573 @@
+# -*- coding: utf-8 -*-
+"""
+HTML → HWP 변환기 v11
+
+✅ 이미지: sizeoption=0 (원본 크기) 또는 width/height 지정
+✅ 페이지번호: ctrl 코드 방식으로 수정
+✅ 나머지는 v10 유지
+
+pip install pyhwpx beautifulsoup4 pillow
+"""
+
+from pyhwpx import Hwp
+from bs4 import BeautifulSoup, NavigableString
+import os, re
+
+# PIL 선택적 import (이미지 크기 확인용)
+try:
+    from PIL import Image
+    HAS_PIL = True
+except ImportError:
+    HAS_PIL = False
+    print("[알림] PIL 없음 - 이미지 원본 크기로 삽입")
+
+class Config:
+    MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
+    HEADER_LEN, FOOTER_LEN = 10, 10
+    MAX_IMAGE_WIDTH = 150  # mm (최대 이미지 너비)
+
+class StyleParser:
+    def __init__(self):
+        self.class_styles = {
+            'h1': {'font-size': '20pt', 'color': '#008000'},
+            'h2': {'font-size': '16pt', 'color': '#03581d'},
+            'h3': {'font-size': '13pt', 'color': '#228B22'},
+            'p': {'font-size': '11pt', 'color': '#333333'},
+            'li': {'font-size': '11pt', 'color': '#333333'},
+            'th': {'font-size': '9pt', 'color': '#006400'},
+            'td': {'font-size': '9.5pt', 'color': '#333333'},
+            'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'},
+            'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'},
+            'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'},
+        }
+    
+    def get_element_style(self, elem):
+        style = {}
+        tag = elem.name if hasattr(elem, 'name') else None
+        if tag and tag in self.class_styles: style.update(self.class_styles[tag])
+        for cls in elem.get('class', []) if hasattr(elem, 'get') else []:
+            if cls in self.class_styles: style.update(self.class_styles[cls])
+        return style
+    
+    def parse_size(self, s):
+        m = re.search(r'([\d.]+)', str(s)) if s else None
+        return float(m.group(1)) if m else 11
+    
+    def parse_color(self, c):
+        if not c: return '#000000'
+        c = str(c).strip().lower()
+        if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper()
+        m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c)
+        return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000'
+    
+    def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']
+
+
+class HtmlToHwpConverter:
+    def __init__(self, visible=True):
+        self.hwp = Hwp(visible=visible)
+        self.cfg = Config()
+        self.sp = StyleParser()
+        self.base_path = ""
+        self.is_first_h1 = True
+        self.image_count = 0
+    
+    def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
+    def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
+    def _rgb(self, c):
+        c = c.lstrip('#')
+        return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0)
+    
+    def _setup_page(self):
+        try:
+            self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
+            s = self.hwp.HParameterSet.HSecDef
+            s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
+            s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
+            s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
+            s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
+            s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
+            s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
+            self.hwp.HAction.Execute("PageSetup", s.HSet)
+        except: pass
+    
+    def _create_header(self, right_text=""):
+        print(f"  → 머리말 생성: {right_text if right_text else '(초기화)'}")
+        try:
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            if right_text:
+                self.hwp.insert_text(right_text)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 머리말: {e}")
+    
+    # ═══════════════════════════════════════════════════════════════
+    # 꼬리말 - 페이지 번호 (수정)
+    # ═══════════════════════════════════════════════════════════════
+    def _create_footer(self, left_text=""):
+        print(f"  → 꼬리말: {left_text}")
+        
+        # 1. 꼬리말 열기
+        self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+        self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+        self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 1)
+        self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+        
+        # 2. 좌측 정렬 + 제목 8pt
+        self.hwp.HAction.Run("ParagraphShapeAlignLeft")
+        self._set_font(8, False, '#666666')
+        self.hwp.insert_text(left_text)
+        
+        # 3. 꼬리말 닫기
+        self.hwp.HAction.Run("CloseEx")
+        
+        # 4. 쪽번호 (우측 하단)
+        self.hwp.HAction.GetDefault("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
+        self.hwp.HParameterSet.HPageNumPos.DrawPos = self.hwp.PageNumPosition("BottomRight")
+        self.hwp.HAction.Execute("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
+
+    def _new_section_with_header(self, header_text):
+        """새 구역 생성 후 머리말 설정"""
+        print(f"    → 새 구역 머리말: {header_text}")
+        try:
+            self.hwp.HAction.Run("BreakSection")
+            
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            self.hwp.HAction.Run("SelectAll")
+            self.hwp.HAction.Run("Delete")
+            
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            self.hwp.insert_text(header_text)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 구역 머리말: {e}")
+
+
+    def _set_font(self, size=11, bold=False, color='#000000'):
+        self.hwp.set_font(FaceName='맑은 고딕', Height=size, Bold=bold, TextColor=self._rgb(color))
+    
+    def _set_para(self, align='justify', lh=170, left=0, indent=0, before=0, after=0):
+        acts = {'left':'ParagraphShapeAlignLeft','center':'ParagraphShapeAlignCenter',
+                'right':'ParagraphShapeAlignRight','justify':'ParagraphShapeAlignJustify'}
+        if align in acts: self.hwp.HAction.Run(acts[align])
+        try:
+            self.hwp.HAction.GetDefault("ParagraphShape", self.hwp.HParameterSet.HParaShape.HSet)
+            p = self.hwp.HParameterSet.HParaShape
+            p.LineSpaceType, p.LineSpacing = 0, lh
+            p.LeftMargin = self._mm(left)
+            p.IndentMargin = self._mm(indent)
+            p.SpaceBeforePara = self._pt(before)
+            p.SpaceAfterPara = self._pt(after)
+            p.BreakNonLatinWord = 0
+            self.hwp.HAction.Execute("ParagraphShape", p.HSet)
+        except: pass
+    
+    def _set_cell_bg(self, color):
+        try:
+            self.hwp.HAction.GetDefault("CellBorderFill", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            p = self.hwp.HParameterSet.HCellBorderFill
+            p.FillAttr.type = self.hwp.BrushType("NullBrush|WinBrush")
+            p.FillAttr.WinBrushFaceStyle = self.hwp.HatchStyle("None")
+            p.FillAttr.WinBrushHatchColor = self._rgb('#000000')
+            p.FillAttr.WinBrushFaceColor = self._rgb(color)
+            p.FillAttr.WindowsBrush = 1
+            self.hwp.HAction.Execute("CellBorderFill", p.HSet)
+        except: pass
+    
+    def _underline_box(self, text, size=14, color='#008000'):
+        try:
+            self.hwp.HAction.GetDefault("TableCreate", self.hwp.HParameterSet.HTableCreation.HSet)
+            t = self.hwp.HParameterSet.HTableCreation
+            t.Rows, t.Cols, t.WidthType, t.HeightType = 1, 1, 0, 0
+            t.WidthValue, t.HeightValue = self._mm(168), self._mm(10)
+            self.hwp.HAction.Execute("TableCreate", t.HSet)
+            self.hwp.HAction.GetDefault("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
+            self.hwp.HParameterSet.HInsertText.Text = text
+            self.hwp.HAction.Execute("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
+            self.hwp.HAction.Run("TableCellBlock")
+            self.hwp.HAction.GetDefault("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
+            self.hwp.HParameterSet.HCharShape.Height = self._pt(size)
+            self.hwp.HParameterSet.HCharShape.TextColor = self._rgb(color)
+            self.hwp.HAction.Execute("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
+            self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            c = self.hwp.HParameterSet.HCellBorderFill
+            c.BorderTypeTop = self.hwp.HwpLineType("None")
+            c.BorderTypeRight = self.hwp.HwpLineType("None")
+            c.BorderTypeLeft = self.hwp.HwpLineType("None")
+            self.hwp.HAction.Execute("CellBorder", c.HSet)
+            self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            c = self.hwp.HParameterSet.HCellBorderFill
+            c.BorderColorBottom = self._rgb(color)
+            c.BorderWidthBottom = self.hwp.HwpLineWidth("0.4mm")
+            self.hwp.HAction.Execute("CellBorder", c.HSet)
+            self.hwp.HAction.Run("Cancel")
+            self.hwp.HAction.Run("CloseEx")
+            self.hwp.HAction.Run("MoveDocEnd")
+        except:
+            self._set_font(size, True, color)
+            self.hwp.insert_text(text)
+            self.hwp.BreakPara()
+    
+    def _update_header(self, new_title):
+        """머리말 텍스트 업데이트"""
+        try:
+            # 기존 머리말 편집 모드로 진입
+            self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 2)  # 편집 모드
+            self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
+            self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
+            
+            # 기존 내용 삭제
+            self.hwp.HAction.Run("SelectAll")
+            self.hwp.HAction.Run("Delete")
+            
+            # 새 내용 삽입
+            self.hwp.HAction.Run("ParagraphShapeAlignRight")
+            self._set_font(9, False, '#333333')
+            self.hwp.insert_text(new_title)
+            
+            self.hwp.HAction.Run("CloseEx")
+        except Exception as e:
+            print(f"    [경고] 머리말 업데이트: {e}")
+
+    def _insert_heading(self, elem):
+        lv = int(elem.name[1]) if elem.name in ['h1','h2','h3'] else 1
+        txt = elem.get_text(strip=True)
+        st = self.sp.get_element_style(elem)
+        sz = self.sp.parse_size(st.get('font-size','14pt'))
+        cl = self.sp.parse_color(st.get('color','#008000'))
+        
+        if lv == 1:
+            if self.is_first_h1:
+                self._create_header(txt)
+                self.is_first_h1 = False
+            else:
+                self._new_section_with_header(txt)
+            
+            self._set_para('left', 130, before=0, after=0)
+            self._underline_box(txt, sz, cl)
+            self.hwp.BreakPara()
+            self._set_para('left', 130, before=0, after=15)
+            self.hwp.BreakPara()
+        elif lv == 2:
+            self._set_para('left', 150, before=20, after=8)
+            self._set_font(sz, True, cl)
+            self.hwp.insert_text("■ " + txt)
+            self.hwp.BreakPara()
+        elif lv == 3:
+            self._set_para('left', 140, left=3, before=12, after=5)
+            self._set_font(sz, True, cl)
+            self.hwp.insert_text("▸ " + txt)
+            self.hwp.BreakPara()
+    
+    def _insert_paragraph(self, elem):
+        txt = elem.get_text(strip=True)
+        if not txt: return
+        st = self.sp.get_element_style(elem)
+        sz = self.sp.parse_size(st.get('font-size','11pt'))
+        cl = self.sp.parse_color(st.get('color','#333333'))
+        self._set_para('justify', 170, left=0, indent=3, before=0, after=3)
+        
+        if elem.find(['b','strong']):
+            for ch in elem.children:
+                if isinstance(ch, NavigableString):
+                    if str(ch).strip(): self._set_font(sz,False,cl); self.hwp.insert_text(str(ch))
+                elif ch.name in ['b','strong']:
+                    if ch.get_text(): self._set_font(sz,True,cl); self.hwp.insert_text(ch.get_text())
+        else:
+            self._set_font(sz, self.sp.is_bold(st), cl)
+            self.hwp.insert_text(txt)
+        self.hwp.BreakPara()
+    
+    def _insert_list(self, elem):
+        lt = elem.name
+        for i, li in enumerate(elem.find_all('li', recursive=False)):
+            st = self.sp.get_element_style(li)
+            cls = li.get('class', [])
+            txt = li.get_text(strip=True)
+            is_toc = any('toc-' in c for c in cls)
+            
+            if 'toc-lvl-1' in cls: left, bef = 0, 8
+            elif 'toc-lvl-2' in cls: left, bef = 7, 3
+            elif 'toc-lvl-3' in cls: left, bef = 14, 1
+            else: left, bef = 4, 2
+            
+            pf = f"{i+1}. " if lt == 'ol' else "• "
+            sz = self.sp.parse_size(st.get('font-size','11pt'))
+            cl = self.sp.parse_color(st.get('color','#333333'))
+            bd = self.sp.is_bold(st)
+            
+            if is_toc:
+                self._set_para('left', 170, left=left, indent=0, before=bef, after=1)
+                self._set_font(sz, bd, cl)
+                self.hwp.insert_text(pf + txt)
+                self.hwp.BreakPara()
+            else:
+                self._set_para('justify', 170, left=left, indent=0, before=bef, after=1)
+                self._set_font(sz, bd, cl)
+                self.hwp.insert_text(pf)
+                self.hwp.HAction.Run("ParagraphShapeIndentAtCaret")
+                self.hwp.insert_text(txt)
+                self.hwp.BreakPara()
+    
+    def _insert_table(self, table_elem):
+        rows_data, cell_styles, occupied, max_cols = [], {}, {}, 0
+        for ri, tr in enumerate(table_elem.find_all('tr')):
+            row, ci = [], 0
+            for cell in tr.find_all(['td','th']):
+                while (ri,ci) in occupied: row.append(""); ci+=1
+                txt = cell.get_text(strip=True)
+                cs, rs = int(cell.get('colspan',1)), int(cell.get('rowspan',1))
+                cell_styles[(ri,ci)] = {'is_header': cell.name=='th' or ri==0}
+                row.append(txt)
+                for dr in range(rs):
+                    for dc in range(cs):
+                        if dr>0 or dc>0: occupied[(ri+dr,ci+dc)] = True
+                for _ in range(cs-1): row.append("")
+                ci += cs
+            rows_data.append(row)
+            max_cols = max(max_cols, len(row))
+        for row in rows_data:
+            while len(row) < max_cols: row.append("")
+        
+        rc = len(rows_data)
+        if rc == 0 or max_cols == 0: return
+        print(f"    표: {rc}행 × {max_cols}열")
+        
+        self._set_para('left', 130, before=5, after=0)
+        self.hwp.create_table(rc, max_cols, treat_as_char=True)
+        
+        for ri, row in enumerate(rows_data):
+            for ci in range(max_cols):
+                if (ri,ci) in occupied: self.hwp.HAction.Run("MoveRight"); continue
+                txt = row[ci] if ci < len(row) else ""
+                hdr = cell_styles.get((ri,ci),{}).get('is_header', False)
+                if hdr: self._set_cell_bg('#E8F5E9')
+                self.hwp.HAction.Run("ParagraphShapeAlignCenter")
+                self._set_font(9 if hdr else 9.5, hdr, '#006400' if hdr else '#333333')
+                self.hwp.insert_text(str(txt))
+                if not (ri==rc-1 and ci==max_cols-1): self.hwp.HAction.Run("MoveRight")
+        
+        self.hwp.HAction.Run("Cancel")
+        self.hwp.HAction.Run("CloseEx")
+        self.hwp.HAction.Run("MoveDocEnd")
+        self._set_para('left', 130, before=5, after=5)
+        self.hwp.BreakPara()
+    
+    # ═══════════════════════════════════════════════════════════════
+    # 이미지 삽입 - sizeoption 수정 ★
+    # ═══════════════════════════════════════════════════════════════
+    def _insert_image(self, src, caption=""):
+        self.image_count += 1
+        print(f"    📷 이미지 #{self.image_count}: {os.path.basename(src)}")
+        
+        if not src:
+            return
+        
+        # 상대경로 → 절대경로
+        if not os.path.isabs(src):
+            full_path = os.path.normpath(os.path.join(self.base_path, src))
+        else:
+            full_path = src
+        
+        if not os.path.exists(full_path):
+            print(f"       ❌ 파일 없음: {full_path}")
+            self._set_font(9, False, '#999999')
+            self._set_para('center', 130)
+            self.hwp.insert_text(f"[이미지 없음: {os.path.basename(src)}]")
+            self.hwp.BreakPara()
+            return
+        
+        try:
+            self._set_para('center', 130, before=5, after=3)
+            
+            # ★ sizeoption=0: 원본 크기
+            # ★ sizeoption=2: 지정 크기 (width, height 필요)
+            # ★ 둘 다 안되면 sizeoption 없이 시도
+            
+            inserted = False
+            
+            # 방법 1: sizeoption=0 (원본 크기)
+            try:
+                self.hwp.insert_picture(full_path, sizeoption=0)
+                inserted = True
+                print(f"       ✅ 삽입 성공 (원본 크기)")
+            except Exception as e1:
+                pass
+            
+            # 방법 2: width/height 지정
+            if not inserted and HAS_PIL:
+                try:
+                    with Image.open(full_path) as img:
+                        w_px, h_px = img.size
+                    # px → mm 변환 (96 DPI 기준)
+                    w_mm = w_px * 25.4 / 96
+                    h_mm = h_px * 25.4 / 96
+                    # 최대 너비 제한
+                    if w_mm > self.cfg.MAX_IMAGE_WIDTH:
+                        ratio = self.cfg.MAX_IMAGE_WIDTH / w_mm
+                        w_mm = self.cfg.MAX_IMAGE_WIDTH
+                        h_mm = h_mm * ratio
+                    
+                    self.hwp.insert_picture(full_path, sizeoption=1, 
+                                           width=self._mm(w_mm), height=self._mm(h_mm))
+                    inserted = True
+                    print(f"       ✅ 삽입 성공 ({w_mm:.0f}×{h_mm:.0f}mm)")
+                except Exception as e2:
+                    pass
+            
+            # 방법 3: 기본값
+            if not inserted:
+                try:
+                    self.hwp.insert_picture(full_path)
+                    inserted = True
+                    print(f"       ✅ 삽입 성공 (기본)")
+                except Exception as e3:
+                    print(f"       ❌ 삽입 실패: {e3}")
+                    self._set_font(9, False, '#FF0000')
+                    self.hwp.insert_text(f"[이미지 오류: {os.path.basename(src)}]")
+            
+            self.hwp.BreakPara()
+            
+            if caption and inserted:
+                self._set_font(9.5, True, '#666666')
+                self._set_para('center', 130, before=0, after=5)
+                self.hwp.insert_text(caption)
+                self.hwp.BreakPara()
+                
+        except Exception as e:
+            print(f"       ❌ 오류: {e}")
+    
+    def _insert_highlight_box(self, elem):
+        txt = elem.get_text(strip=True)
+        if not txt: return
+        self._set_para('left', 130, before=5, after=0)
+        self.hwp.create_table(1, 1, treat_as_char=True)
+        self._set_cell_bg('#E2ECE2')
+        self._set_font(11, False, '#333333')
+        self.hwp.insert_text(txt)
+        self.hwp.HAction.Run("Cancel")
+        self.hwp.HAction.Run("CloseEx")
+        self.hwp.HAction.Run("MoveDocEnd")
+        self._set_para('left', 130, before=0, after=5)
+        self.hwp.BreakPara()
+    
+    def _process(self, elem):
+        if isinstance(elem, NavigableString): return
+        tag = elem.name
+        if not tag or tag in ['script','style','template','noscript','head']: return
+        
+        if tag == 'figure':
+            img = elem.find('img')
+            if img:
+                figcaption = elem.find('figcaption')
+                caption = figcaption.get_text(strip=True) if figcaption else ""
+                self._insert_image(img.get('src', ''), caption)
+            return
+        
+        if tag == 'img':
+            self._insert_image(elem.get('src', ''))
+            return
+        
+        if tag in ['h1','h2','h3']: self._insert_heading(elem)
+        elif tag == 'p': self._insert_paragraph(elem)
+        elif tag == 'table': self._insert_table(elem)
+        elif tag in ['ul','ol']: self._insert_list(elem)
+        elif 'highlight-box' in elem.get('class',[]): self._insert_highlight_box(elem)
+        elif tag in ['div','section','article','main','body','html','span']:
+            for ch in elem.children: self._process(ch)
+    
+    def convert(self, html_path, output_path):
+        print("="*60)
+        print("HTML → HWP 변환기 v11")
+        print("  ✓ 이미지: sizeoption 수정")
+        print("  ✓ 페이지번호: 다중 방법 시도")
+        print("="*60)
+        
+        self.base_path = os.path.dirname(os.path.abspath(html_path))
+        self.is_first_h1 = True
+        self.image_count = 0
+        
+        print(f"\n입력: {html_path}")
+        print(f"출력: {output_path}\n")
+        
+        with open(html_path, 'r', encoding='utf-8') as f:
+            soup = BeautifulSoup(f.read(), 'html.parser')
+        
+        title_tag = soup.find('title')
+        if title_tag:
+            full_title = title_tag.get_text(strip=True)
+            footer_title = full_title.split(':')[0].strip()  # ":" 이전
+        else:
+            footer_title = ""
+
+        self.hwp.FileNew()
+        self._setup_page()
+        self._create_footer(footer_title)
+
+        raw = soup.find(id='raw-container')
+        if raw:
+            cover = raw.find(id='box-cover')
+            if cover:
+                print("  → 표지")
+                for ch in cover.children: self._process(ch)
+                self.hwp.HAction.Run("BreakPage")
+            toc = raw.find(id='box-toc')
+            if toc:
+                print("  → 목차")
+                self.is_first_h1 = True
+                self._underline_box("목 차", 20, '#008000')
+                self.hwp.BreakPara(); self.hwp.BreakPara()
+                self._insert_list(toc.find('ul') or toc)
+                self.hwp.HAction.Run("BreakPage")
+            summary = raw.find(id='box-summary')
+            if summary:
+                print("  → 요약")
+                self.is_first_h1 = True
+                self._process(summary)
+                self.hwp.HAction.Run("BreakPage")
+            content = raw.find(id='box-content')
+            if content:
+                print("  → 본문")
+                self.is_first_h1 = True
+                self._process(content)
+        else:
+            self._process(soup.find('body') or soup)
+        
+        self.hwp.SaveAs(output_path)
+        print(f"\n✅ 저장: {output_path}")
+        print(f"   이미지: {self.image_count}개 처리")
+    
+    def close(self):
+        try: self.hwp.Quit()
+        except: pass
+
+
+def main():
+    html_path = r"D:\for python\survey_test\output\generated\report.html"
+    output_path = r"D:\for python\survey_test\output\generated\report_v12.hwp"
+    
+    try:
+        conv = HtmlToHwpConverter(visible=True)
+        conv.convert(html_path, output_path)
+        input("\nEnter를 누르면 HWP가 닫힙니다...")  # ← 선택사항
+        conv.close()
+    except Exception as e:
+        print(f"\n[에러] {e}")
+        import traceback; traceback.print_exc()
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/converters/pipeline/__init__.py b/converters/pipeline/__init__.py
new file mode 100644
index 0000000..d698245
--- /dev/null
+++ b/converters/pipeline/__init__.py
@@ -0,0 +1 @@
+from .router import process_document, is_long_document
diff --git a/converters/pipeline/router.py b/converters/pipeline/router.py
new file mode 100644
index 0000000..ef41136
--- /dev/null
+++ b/converters/pipeline/router.py
@@ -0,0 +1,139 @@
+# -*- coding: utf-8 -*-
+"""
+router.py
+
+기능:
+- HTML 입력의 분량을 판단하여 적절한 파이프라인으로 분기
+- 긴 문서 (5000자 이상): RAG 파이프라인 (step3→4→5→6→7→8→9)
+- 짧은 문서 (5000자 미만): 직접 생성 (step7→8→9)
+"""
+
+import re
+import os
+from typing import Dict, Any
+
+# 분량 판단 기준
+LONG_DOC_THRESHOLD = 5000  # 5000자 이상이면 긴 문서
+
+# 이미지 assets 경로 (개발용 고정) - r prefix 필수!
+ASSETS_BASE_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets"
+
+def count_characters(html_content: str) -> int:
+    """HTML 태그 제외한 순수 텍스트 글자 수 계산"""
+    # HTML 태그 제거
+    text_only = re.sub(r'<[^>]+>', '', html_content)
+    # 공백 정리
+    text_only = ' '.join(text_only.split())
+    return len(text_only)
+
+
+def is_long_document(html_content: str) -> bool:
+    """긴 문서 여부 판단"""
+    char_count = count_characters(html_content)
+    return char_count >= LONG_DOC_THRESHOLD
+
+def convert_image_paths(html_content: str) -> str:
+    """
+    HTML 내 상대 이미지 경로를 서버 경로로 변환
+    assets/xxx.png → /assets/xxx.png
+    """
+    result = re.sub(r'src="assets/', 'src="/assets/', html_content)
+    return result
+    
+    def replace_src(match):
+        original_path = match.group(1)
+        # 이미 절대 경로이거나 URL이면 그대로
+        if original_path.startswith(('http://', 'https://', 'file://', 'D:', 'C:')):
+            return match.group(0)
+        
+        # assets/로 시작하면 절대 경로로 변환
+        if original_path.startswith('assets/'):
+            filename = original_path.replace('assets/', '')
+            absolute_path = os.path.join(ASSETS_BASE_PATH, filename)
+            return f'src="{absolute_path}"'
+        
+        return match.group(0)
+    
+    # src="..." 패턴 찾아서 변환
+    result = re.sub(r'src="([^"]+)"', replace_src, html_content)
+    return result
+
+def run_short_pipeline(html_content: str, options: dict) -> Dict[str, Any]:
+    """
+    짧은 문서 파이프라인 (5000자 미만)
+    """
+    try:
+        # 이미지 경로 변환
+        processed_html = convert_image_paths(html_content)
+        
+        # TODO: step7, step8, step9 연동
+        return {
+            'success': True,
+            'pipeline': 'short',
+            'char_count': count_characters(html_content),
+            'html': processed_html
+        }
+    except Exception as e:
+        return {
+            'success': False,
+            'error': str(e),
+            'pipeline': 'short'
+        }
+
+
+def run_long_pipeline(html_content: str, options: dict) -> Dict[str, Any]:
+    """
+    긴 문서 파이프라인 (5000자 이상)
+    """
+    try:
+        # 이미지 경로 변환
+        processed_html = convert_image_paths(html_content)
+        
+        # TODO: step3~9 순차 실행
+        return {
+            'success': True,
+            'pipeline': 'long',
+            'char_count': count_characters(html_content),
+            'html': processed_html
+        }
+    except Exception as e:
+        return {
+            'success': False,
+            'error': str(e),
+            'pipeline': 'long'
+        }
+
+
+def process_document(content: str, options: dict = None) -> Dict[str, Any]:
+    """
+    메인 라우터 함수
+    - 분량에 따라 적절한 파이프라인으로 분기
+    
+    Args:
+        content: HTML 문자열
+        options: 추가 옵션 (page_option, instruction 등)
+    
+    Returns:
+        {'success': bool, 'html': str, 'pipeline': str, ...}
+    """
+    if options is None:
+        options = {}
+    
+    if not content or not content.strip():
+        return {
+            'success': False,
+            'error': '내용이 비어있습니다.'
+        }
+    
+    char_count = count_characters(content)
+    
+    if is_long_document(content):
+        result = run_long_pipeline(content, options)
+    else:
+        result = run_short_pipeline(content, options)
+    
+    # 공통 정보 추가
+    result['char_count'] = char_count
+    result['threshold'] = LONG_DOC_THRESHOLD
+    
+    return result
\ No newline at end of file
diff --git a/converters/pipeline/step1_convert.py b/converters/pipeline/step1_convert.py
new file mode 100644
index 0000000..a3b57b6
--- /dev/null
+++ b/converters/pipeline/step1_convert.py
@@ -0,0 +1,784 @@
+"""
+측량/GIS/드론 관련 자료 PDF 변환 및 정리 시스템
+- 모든 파일 형식을 PDF로 변환
+- DWG 파일: DWG TrueView를 사용한 자동 PDF 변환
+- 동영상 파일: Whisper를 사용한 음성→텍스트 변환 후 PDF 생성
+- 원본 경로와 변환 파일 경로를 엑셀로 관리
+"""
+
+import os
+import shutil
+from pathlib import Path
+from datetime import datetime
+import openpyxl
+from openpyxl.styles import Font, PatternFill, Alignment
+import win32com.client
+import pythoncom
+from PIL import Image
+import subprocess
+import json
+
+class SurveyingFileConverter:
+    def _dbg(self, msg):
+        if getattr(self, "debug", False):
+            print(msg)
+
+    def _ensure_ffmpeg_on_path(self):
+        import os
+        import shutil
+        from pathlib import Path
+
+        found = shutil.which("ffmpeg")
+        self._dbg(f"DEBUG ffmpeg which before: {found}")
+        if found:
+            self.ffmpeg_exe = found
+            return True
+
+        try:
+            import imageio_ffmpeg
+            
+            src = Path(imageio_ffmpeg.get_ffmpeg_exe())
+            self._dbg(f"DEBUG imageio ffmpeg exe: {src}")
+            self._dbg(f"DEBUG imageio ffmpeg exists: {src.exists()}")
+
+            if not src.exists():
+                return False
+
+            tools_dir = Path(self.output_dir) / "tools_ffmpeg"
+            tools_dir.mkdir(parents=True, exist_ok=True)
+
+            dst = tools_dir / "ffmpeg.exe"
+
+            if not dst.exists():
+                shutil.copyfile(str(src), str(dst))
+
+            os.environ["PATH"] = str(tools_dir) + os.pathsep + os.environ.get("PATH", "")
+
+            found2 = shutil.which("ffmpeg")
+            self._dbg(f"DEBUG ffmpeg which after: {found2}")
+
+            if found2:
+                self.ffmpeg_exe = found2
+                return True
+
+            return False
+
+        except Exception as e:
+            self._dbg(f"DEBUG ensure ffmpeg error: {e}")
+            return False
+
+
+    def __init__(self, source_dir, output_dir):
+        self.source_dir = Path(source_dir)
+        self.output_dir = Path(output_dir)
+        self.output_dir.mkdir(parents=True, exist_ok=True)
+
+        self.debug = True
+        self.ffmpeg_exe = None
+        ok = self._ensure_ffmpeg_on_path()
+        self._dbg(f"DEBUG ensure_ffmpeg_on_path result: {ok}")
+
+        # 변환 로그를 저장할 리스트
+        self.conversion_log = []
+        
+        # ★ 추가: 도메인 용어 사전
+        self.domain_terms = ""
+
+        # HWP 보안 모듈 후보 목록 추가
+        self.hwp_security_modules = [
+            "FilePathCheckerModuleExample",
+            "SecurityModule",
+            ""
+        ]
+    
+        # 지원 파일 확장자 정의
+        self.image_extensions = {'.jpg', '.jpeg', '.png', '.bmp', '.gif', '.tiff', '.tif', '.webp'}
+        self.office_extensions = {'.doc', '.docx', '.xls', '.xlsx', '.ppt', '.pptx', '.hwp', '.hwpx'}
+        self.video_extensions = {'.mp4', '.avi', '.mov', '.wmv', '.flv', '.mkv', '.m4v'}
+        self.text_extensions = {'.txt', '.csv', '.log', '.md'}
+        self.pdf_extension = {'.pdf'}
+        self.dwg_extensions = {'.dwg', '.dxf'}
+        
+        # DWG TrueView 경로 설정 (설치 버전에 맞게 조정)
+        self.trueview_path = self._find_trueview()
+        
+    def _find_trueview(self):
+        """DWG TrueView 설치 경로 자동 탐색"""
+        possible_paths = [
+            r"C:\Program Files\Autodesk\DWG TrueView 2025\dwgviewr.exe",
+            r"C:\Program Files\Autodesk\DWG TrueView 2024\dwgviewr.exe",
+            r"C:\Program Files\Autodesk\DWG TrueView 2023\dwgviewr.exe",
+            r"C:\Program Files (x86)\Autodesk\DWG TrueView 2025\dwgviewr.exe",
+            r"C:\Program Files (x86)\Autodesk\DWG TrueView 2024\dwgviewr.exe",
+        ]
+        
+        for path in possible_paths:
+            if Path(path).exists():
+                return path
+        
+        return None
+        
+    def get_all_files(self):
+        """하위 모든 폴더의 파일 목록 가져오기"""
+        all_files = []
+        for file_path in self.source_dir.rglob('*'):
+            if file_path.is_file():
+                all_files.append(file_path)
+        return all_files
+        
+    def extract_audio_from_video(self, video_path, audio_output_path):
+        try:
+            import imageio_ffmpeg
+            from pathlib import Path
+
+            ffmpeg_exe = imageio_ffmpeg.get_ffmpeg_exe()
+            self._dbg(f"DEBUG extract ffmpeg_exe: {ffmpeg_exe}")
+            self._dbg(f"DEBUG extract ffmpeg_exe exists: {Path(ffmpeg_exe).exists()}")
+            self._dbg(f"DEBUG extract input exists: {Path(video_path).exists()}")
+            self._dbg(f"DEBUG extract out path: {audio_output_path}")
+
+            cmd = [
+                ffmpeg_exe,
+                "-i", str(video_path),
+                "-vn",
+                "-acodec", "pcm_s16le",
+                "-ar", "16000",
+                "-ac", "1",
+                "-y",
+                str(audio_output_path),
+            ]
+            self._dbg("DEBUG extract cmd: " + " ".join(cmd))
+
+            result = subprocess.run(cmd, capture_output=True, timeout=300, check=True, text=True)
+            self._dbg(f"DEBUG extract returncode: {result.returncode}")
+            self._dbg(f"DEBUG extract stderr tail: {(result.stderr or '')[-300:]}")
+            return True
+
+        except subprocess.CalledProcessError as e:
+            self._dbg(f"DEBUG extract CalledProcessError returncode: {e.returncode}")
+            self._dbg(f"DEBUG extract stderr tail: {(e.stderr or '')[-300:]}")
+            return False
+        except Exception as e:
+            self._dbg(f"DEBUG extract exception: {e}")
+            return False
+    
+    def transcribe_audio_with_whisper(self, audio_path):
+        try:
+            self._ensure_ffmpeg_on_path()
+            
+            import shutil
+            from pathlib import Path
+            
+            ffmpeg_path = shutil.which("ffmpeg")
+            self._dbg(f"DEBUG whisper ffmpeg which: {ffmpeg_path}")
+            
+            if not ffmpeg_path:
+                if self.ffmpeg_exe:
+                    import os
+                    os.environ["PATH"] = str(Path(self.ffmpeg_exe).parent) + os.pathsep + os.environ.get("PATH", "")
+            
+            audio_file = Path(audio_path)
+            self._dbg(f"DEBUG whisper audio exists: {audio_file.exists()}")
+            self._dbg(f"DEBUG whisper audio size: {audio_file.stat().st_size if audio_file.exists() else 'NA'}")
+            
+            if not audio_file.exists() or audio_file.stat().st_size == 0:
+                return "[오디오 파일이 비어있거나 존재하지 않음]"
+            
+            import whisper
+            model = whisper.load_model("medium")  # ★ base → medium 변경
+            
+            # ★ domain_terms를 initial_prompt로 사용
+            result = model.transcribe(
+                str(audio_path),
+                language="ko",
+                task="transcribe",
+                initial_prompt=self.domain_terms if self.domain_terms else None,
+                condition_on_previous_text=True,  # ★ 다시 True로
+            )
+
+            # ★ 후처리: 반복 및 이상한 텍스트 제거
+            text = result["text"]
+            text = self.clean_transcript(text)
+            return text
+
+        except Exception as e:
+            import traceback
+            self._dbg(f"DEBUG whisper traceback: {traceback.format_exc()}")
+            return f"[음성 인식 실패: {str(e)}]"
+
+    def clean_transcript(self, text):
+        """Whisper 결과 후처리 - 반복/환각 제거"""
+        import re
+            
+        # 1. 영어/일본어/중국어 환각 제거
+        text = re.sub(r'[A-Za-z]{3,}', '', text)  # 3글자 이상 영어 제거
+        text = re.sub(r'[\u3040-\u309F\u30A0-\u30FF]+', '', text)  # 일본어 제거
+        text = re.sub(r'[\u4E00-\u9FFF]+', '', text)  # 한자 제거 (필요시)
+            
+        # 2. 반복 문장 제거
+        sentences = text.split('.')
+        seen = set()
+        unique_sentences = []
+        for s in sentences:
+            s_clean = s.strip()
+            if s_clean and s_clean not in seen:
+                seen.add(s_clean)
+                unique_sentences.append(s_clean)
+            
+        text = '. '.join(unique_sentences)
+            
+        # 3. 이상한 문자 정리
+        text = re.sub(r'\s+', ' ', text)  # 다중 공백 제거
+        text = text.strip()
+            
+        return text
+
+    def get_video_transcript(self, video_path):
+        """동영상 파일의 음성을 텍스트로 변환"""
+        try:
+            # 임시 오디오 파일 경로
+            temp_audio = video_path.parent / f"{video_path.stem}_temp_audio.wav"
+            
+            # 1. 동영상에서 오디오 추출
+            if not self.extract_audio_from_video(video_path, temp_audio):
+                return self.get_basic_file_info(video_path) + "\n\n[오디오 추출 실패]"
+            if (not temp_audio.exists()) or temp_audio.stat().st_size == 0:
+                return self.get_basic_file_info(video_path) + "\n\n[오디오 파일 생성 실패]"
+            
+            # 2. Whisper로 음성 인식
+            transcript = self.transcribe_audio_with_whisper(temp_audio)
+            
+            # 3. 임시 오디오 파일 삭제
+            if temp_audio.exists():
+                temp_audio.unlink()
+            
+            # 4. 결과 포맷팅
+            stat = video_path.stat()
+            lines = []
+            lines.append(f"동영상 파일 음성 전사 (Speech-to-Text)")
+            lines.append(f"=" * 60)
+            lines.append(f"파일명: {video_path.name}")
+            lines.append(f"경로: {video_path}")
+            lines.append(f"파일 크기: {self.format_file_size(stat.st_size)}")
+            lines.append(f"생성일: {datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S')}")
+            lines.append("")
+            lines.append("=" * 60)
+            lines.append("음성 내용:")
+            lines.append("=" * 60)
+            lines.append("")
+            lines.append(transcript)
+            
+            return "\n".join(lines)
+                
+        except Exception as e:
+            return self.get_basic_file_info(video_path) + f"\n\n[음성 인식 오류: {str(e)}]"
+    
+    def convert_dwg_to_pdf_trueview(self, dwg_path, pdf_path):
+        """DWG TrueView를 사용한 DWG → PDF 변환"""
+        if not self.trueview_path:
+            return False, "DWG TrueView가 설치되지 않음"
+        
+        try:
+            # AutoCAD 스크립트 생성
+            script_content = f"""_-EXPORT_PDF{pdf_path}_Y"""            
+            script_path = dwg_path.parent / f"{dwg_path.stem}_plot.scr"
+            with open(script_path, 'w') as f:
+                f.write(script_content)
+            
+            # TrueView 실행
+            cmd = [
+                self.trueview_path,
+                str(dwg_path.absolute()),
+                "/b", str(script_path.absolute()),
+                "/nologo"
+            ]
+            
+            result = subprocess.run(cmd, timeout=120, capture_output=True)
+            
+            # 스크립트 파일 삭제
+            if script_path.exists():
+                try:
+                    script_path.unlink()
+                except:
+                    pass
+            
+            # PDF 생성 확인
+            if pdf_path.exists():
+                return True, "성공"
+            else:
+                return False, "PDF 생성 실패"
+                
+        except subprocess.TimeoutExpired:
+            return False, "변환 시간 초과"
+        except Exception as e:
+            return False, f"DWG 변환 실패: {str(e)}"
+    
+    def get_basic_file_info(self, file_path):
+        """기본 파일 정보 반환"""
+        stat = file_path.stat()
+        lines = []
+        lines.append(f"파일 정보")
+        lines.append(f"=" * 60)
+        lines.append(f"파일명: {file_path.name}")
+        lines.append(f"경로: {file_path}")
+        lines.append(f"파일 크기: {self.format_file_size(stat.st_size)}")
+        lines.append(f"생성일: {datetime.fromtimestamp(stat.st_ctime).strftime('%Y-%m-%d %H:%M:%S')}")
+        lines.append(f"수정일: {datetime.fromtimestamp(stat.st_mtime).strftime('%Y-%m-%d %H:%M:%S')}")
+        return "\n".join(lines)
+    
+    def format_file_size(self, size_bytes):
+        """파일 크기를 읽기 쉬운 형식으로 변환"""
+        for unit in ['B', 'KB', 'MB', 'GB']:
+            if size_bytes < 1024.0:
+                return f"{size_bytes:.2f} {unit}"
+            size_bytes /= 1024.0
+        return f"{size_bytes:.2f} TB"
+    
+    def convert_image_to_pdf(self, image_path, output_path):
+        """이미지 파일을 PDF로 변환"""
+        try:
+            img = Image.open(image_path)
+            # RGB 모드로 변환 (RGBA나 다른 모드 처리)
+            if img.mode in ('RGBA', 'LA', 'P'):
+                # 흰색 배경 생성
+                background = Image.new('RGB', img.size, (255, 255, 255))
+                if img.mode == 'P':
+                    img = img.convert('RGBA')
+                background.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
+                img = background
+            elif img.mode != 'RGB':
+                img = img.convert('RGB')
+            
+            img.save(output_path, 'PDF', resolution=100.0)
+            return True, "성공"
+        except Exception as e:
+            return False, f"이미지 변환 실패: {str(e)}"
+    
+    def convert_office_to_pdf(self, file_path, output_path):
+        """Office 문서를 PDF로 변환"""
+        pythoncom.CoInitialize()
+        try:
+            ext = file_path.suffix.lower()
+            
+            if ext in {'.hwp', '.hwpx'}:
+                return self.convert_hwp_to_pdf(file_path, output_path)
+            elif ext in {'.doc', '.docx'}:
+                return self.convert_word_to_pdf(file_path, output_path)
+            elif ext in {'.xls', '.xlsx'}:
+                return self.convert_excel_to_pdf(file_path, output_path)
+            elif ext in {'.ppt', '.pptx'}:
+                return self.convert_ppt_to_pdf(file_path, output_path)
+            else:
+                return False, "지원하지 않는 Office 형식"
+                
+        except Exception as e:
+            return False, f"Office 변환 실패: {str(e)}"
+        finally:
+            pythoncom.CoUninitialize()
+    
+    def convert_word_to_pdf(self, file_path, output_path):
+        """Word 문서를 PDF로 변환"""
+        try:
+            word = win32com.client.Dispatch("Word.Application")
+            word.Visible = False
+            doc = word.Documents.Open(str(file_path.absolute()))
+            doc.SaveAs(str(output_path.absolute()), FileFormat=17)  # 17 = PDF
+            doc.Close()
+            word.Quit()
+            return True, "성공"
+        except Exception as e:
+            return False, f"Word 변환 실패: {str(e)}"
+    
+    def convert_excel_to_pdf(self, file_path, output_path):
+        """Excel 파일을 PDF로 변환 - 열 너비에 맞춰 출력"""
+        try:
+            excel = win32com.client.Dispatch("Excel.Application")
+            excel.Visible = False
+            wb = excel.Workbooks.Open(str(file_path.absolute()))
+            
+            # 모든 시트에 대해 페이지 설정
+            for ws in wb.Worksheets:
+                # 페이지 설정
+                ws.PageSetup.Zoom = False  # 자동 크기 조정 비활성화
+                ws.PageSetup.FitToPagesWide = 1  # 너비를 1페이지에 맞춤
+                ws.PageSetup.FitToPagesTall = False  # 높이는 자동 (내용에 따라)
+                
+                # 여백 최소화 (단위: 포인트, 1cm ≈ 28.35 포인트)
+                ws.PageSetup.LeftMargin = excel.CentimetersToPoints(1)
+                ws.PageSetup.RightMargin = excel.CentimetersToPoints(1)
+                ws.PageSetup.TopMargin = excel.CentimetersToPoints(1)
+                ws.PageSetup.BottomMargin = excel.CentimetersToPoints(1)
+                
+                # 용지 방향 자동 결정 (가로가 긴 경우 가로 방향)
+                used_range = ws.UsedRange
+                if used_range.Columns.Count > used_range.Rows.Count:
+                    ws.PageSetup.Orientation = 2  # xlLandscape (가로)
+                else:
+                    ws.PageSetup.Orientation = 1  # xlPortrait (세로)
+            
+            # PDF로 저장
+            wb.ExportAsFixedFormat(0, str(output_path.absolute()))  # 0 = PDF
+            wb.Close()
+            excel.Quit()
+            return True, "성공"
+        except Exception as e:
+            return False, f"Excel 변환 실패: {str(e)}"
+            
+
+    def convert_ppt_to_pdf(self, file_path, output_path):
+        """PowerPoint 파일을 PDF로 변환"""
+        try:
+            ppt = win32com.client.Dispatch("PowerPoint.Application")
+            ppt.Visible = True
+            presentation = ppt.Presentations.Open(str(file_path.absolute()))
+            presentation.SaveAs(str(output_path.absolute()), 32)  # 32 = PDF
+            presentation.Close()
+            ppt.Quit()
+            return True, "성공"
+        except Exception as e:
+            return False, f"PowerPoint 변환 실패: {str(e)}"
+    
+    def convert_hwp_to_pdf(self, file_path, output_path):
+        hwp = None
+        try:
+            output_path.parent.mkdir(parents=True, exist_ok=True)
+
+            try:
+                hwp = win32com.client.gencache.EnsureDispatch("HWPFrame.HwpObject")
+            except Exception:
+                hwp = win32com.client.Dispatch("HWPFrame.HwpObject")
+
+            registered = False
+            last_reg_error = None
+
+            for module_name in getattr(self, "hwp_security_modules", [""]):
+                try:
+                    hwp.RegisterModule("FilePathCheckDLL", module_name)
+                    registered = True
+                    break
+                except Exception as e:
+                    last_reg_error = e
+
+            if not registered:
+                return False, f"HWP 보안 모듈 등록 실패: {last_reg_error}"
+
+            hwp.Open(str(file_path.absolute()), "", "")
+
+            hwp.HAction.GetDefault("FileSaveAsPdf", hwp.HParameterSet.HFileOpenSave.HSet)
+            hwp.HParameterSet.HFileOpenSave.filename = str(output_path.absolute())
+            hwp.HParameterSet.HFileOpenSave.Format = "PDF"
+            hwp.HAction.Execute("FileSaveAsPdf", hwp.HParameterSet.HFileOpenSave.HSet)
+
+            if output_path.exists() and output_path.stat().st_size > 0:
+                return True, "성공"
+            return False, "PDF 생성 확인 실패"
+
+        except Exception as e:
+            return False, f"HWP 변환 실패: {str(e)}"
+        finally:
+            try:
+                if hwp:
+                    try:
+                        hwp.Clear(1)
+                    except Exception:
+                        pass
+                    try:
+                        hwp.Quit()
+                    except Exception:
+                        pass
+            except Exception:
+                pass
+
+
+
+    def convert_text_to_pdf(self, text_path, output_path):
+        """텍스트 파일을 PDF로 변환 (reportlab 사용)"""
+        try:
+            from reportlab.lib.pagesizes import A4
+            from reportlab.pdfgen import canvas
+            from reportlab.pdfbase import pdfmetrics
+            from reportlab.pdfbase.ttfonts import TTFont
+            
+            # 한글 폰트 등록 (시스템에 설치된 폰트 사용)
+            try:
+                pdfmetrics.registerFont(TTFont('Malgun', 'malgun.ttf'))
+                font_name = 'Malgun'
+            except:
+                font_name = 'Helvetica'
+            
+            # 텍스트 읽기
+            with open(text_path, 'r', encoding='utf-8', errors='ignore') as f:
+                content = f.read()
+            
+            # PDF 생성
+            c = canvas.Canvas(str(output_path), pagesize=A4)
+            width, height = A4
+            
+            c.setFont(font_name, 10)
+            
+            # 여백 설정
+            margin = 50
+            y = height - margin
+            line_height = 14
+            
+            # 줄 단위로 처리
+            for line in content.split('\n'):
+                if y < margin:  # 페이지 넘김
+                    c.showPage()
+                    c.setFont(font_name, 10)
+                    y = height - margin
+                
+                # 긴 줄은 자동으로 줄바꿈
+                if len(line) > 100:
+                    chunks = [line[i:i+100] for i in range(0, len(line), 100)]
+                    for chunk in chunks:
+                        c.drawString(margin, y, chunk)
+                        y -= line_height
+                else:
+                    c.drawString(margin, y, line)
+                    y -= line_height
+            
+            c.save()
+            return True, "성공"
+        except Exception as e:
+            return False, f"텍스트 변환 실패: {str(e)}"
+    
+    def process_file(self, file_path):
+        """개별 파일 처리"""
+        ext = file_path.suffix.lower()
+        
+        # 출력 파일명 생성 (원본 경로 구조 유지)
+        relative_path = file_path.relative_to(self.source_dir)
+        output_subdir = self.output_dir / relative_path.parent
+        output_subdir.mkdir(parents=True, exist_ok=True)
+        
+        # PDF 파일명
+        output_pdf = output_subdir / f"{file_path.stem}.pdf"
+        
+        success = False
+        message = ""
+        
+        try:
+            # 이미 PDF인 경우
+            if ext in self.pdf_extension:
+                shutil.copy2(file_path, output_pdf)
+                success = True
+                message = "PDF 복사 완료"
+            
+            # DWG/DXF 파일
+            elif ext in self.dwg_extensions:
+                success, message = self.convert_dwg_to_pdf_trueview(file_path, output_pdf)
+            
+            # 이미지 파일
+            elif ext in self.image_extensions:
+                success, message = self.convert_image_to_pdf(file_path, output_pdf)
+            
+            # Office 문서
+            elif ext in self.office_extensions:
+                success, message = self.convert_office_to_pdf(file_path, output_pdf)
+            
+            # 동영상 파일 - 음성을 텍스트로 변환 후 PDF 생성
+            elif ext in self.video_extensions:
+                # 음성→텍스트 변환
+                transcript_text = self.get_video_transcript(file_path)
+                
+                # 임시 txt 파일 생성
+                temp_txt = output_subdir / f"{file_path.stem}_transcript.txt"
+                with open(temp_txt, 'w', encoding='utf-8') as f:
+                    f.write(transcript_text)
+                
+                # txt를 PDF로 변환
+                success, message = self.convert_text_to_pdf(temp_txt, output_pdf)
+                
+                if success:
+                    message = "성공 (음성 인식 완료)"
+                
+                # 임시 txt 파일은 남겨둠 (참고용)
+            
+            # 텍스트 파일
+            elif ext in self.text_extensions:
+                success, message = self.convert_text_to_pdf(file_path, output_pdf)
+            
+            else:
+                message = f"지원하지 않는 파일 형식: {ext}"
+        
+        except Exception as e:
+            message = f"처리 중 오류: {str(e)}"
+        
+        # 로그 기록
+        self.conversion_log.append({
+            '원본 경로': str(file_path),
+            '파일명': file_path.name,
+            '파일 형식': ext,
+            '변환 PDF 경로': str(output_pdf) if success else "",
+            '상태': "성공" if success else "실패",
+            '메시지': message,
+            '처리 시간': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+        })
+        
+        return success, message
+    
+    def create_excel_report(self, excel_path):
+        """변환 결과를 엑셀로 저장"""
+        wb = openpyxl.Workbook()
+        ws = wb.active
+        ws.title = "변환 결과"
+        
+        # 헤더 스타일
+        header_fill = PatternFill(start_color="366092", end_color="366092", fill_type="solid")
+        header_font = Font(bold=True, color="FFFFFF")
+        
+        # 헤더 작성
+        headers = ['번호', '원본 경로', '파일명', '파일 형식', '변환 PDF 경로', '상태', '메시지', '처리 시간']
+        for col, header in enumerate(headers, 1):
+            cell = ws.cell(row=1, column=col, value=header)
+            cell.fill = header_fill
+            cell.font = header_font
+            cell.alignment = Alignment(horizontal='center', vertical='center')
+        
+        # 데이터 작성
+        for idx, log in enumerate(self.conversion_log, 2):
+            ws.cell(row=idx, column=1, value=idx-1)
+            ws.cell(row=idx, column=2, value=log['원본 경로'])
+            ws.cell(row=idx, column=3, value=log['파일명'])
+            ws.cell(row=idx, column=4, value=log['파일 형식'])
+            ws.cell(row=idx, column=5, value=log['변환 PDF 경로'])
+            
+            # 상태에 따라 색상 표시
+            status_cell = ws.cell(row=idx, column=6, value=log['상태'])
+            if log['상태'] == "성공":
+                status_cell.fill = PatternFill(start_color="C6EFCE", end_color="C6EFCE", fill_type="solid")
+                status_cell.font = Font(color="006100")
+            else:
+                status_cell.fill = PatternFill(start_color="FFC7CE", end_color="FFC7CE", fill_type="solid")
+                status_cell.font = Font(color="9C0006")
+            
+            ws.cell(row=idx, column=7, value=log['메시지'])
+            ws.cell(row=idx, column=8, value=log['처리 시간'])
+        
+        # 열 너비 자동 조정
+        for column in ws.columns:
+            max_length = 0
+            column_letter = column[0].column_letter
+            for cell in column:
+                try:
+                    if len(str(cell.value)) > max_length:
+                        max_length = len(str(cell.value))
+                except:
+                    pass
+            adjusted_width = min(max_length + 2, 50)
+            ws.column_dimensions[column_letter].width = adjusted_width
+        
+        # 요약 시트 추가
+        summary_ws = wb.create_sheet(title="요약")
+        
+        total_files = len(self.conversion_log)
+        success_count = sum(1 for log in self.conversion_log if log['상태'] == "성공")
+        fail_count = total_files - success_count
+        
+        summary_data = [
+            ['항목', '값'],
+            ['총 파일 수', total_files],
+            ['변환 성공', success_count],
+            ['변환 실패', fail_count],
+            ['성공률', f"{(success_count/total_files*100):.1f}%" if total_files > 0 else "0%"],
+            ['', ''],
+            ['원본 폴더', str(self.source_dir)],
+            ['출력 폴더', str(self.output_dir)],
+            ['작업 완료 시간', datetime.now().strftime('%Y-%m-%d %H:%M:%S')]
+        ]
+        
+        for row_idx, row_data in enumerate(summary_data, 1):
+            for col_idx, value in enumerate(row_data, 1):
+                cell = summary_ws.cell(row=row_idx, column=col_idx, value=value)
+                if row_idx == 1:
+                    cell.fill = header_fill
+                    cell.font = header_font
+                cell.alignment = Alignment(horizontal='center' if col_idx == 1 else 'left')
+        
+        summary_ws.column_dimensions['A'].width = 20
+        summary_ws.column_dimensions['B'].width = 60
+        
+        # 저장
+        wb.save(excel_path)
+        print(f"\n엑셀 보고서 생성 완료: {excel_path}")
+    
+    def run(self):
+        """전체 변환 작업 실행"""
+        print(f"작업 시작: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
+        print(f"원본 폴더: {self.source_dir}")
+        print(f"출력 폴더: {self.output_dir}")
+        
+        # DWG TrueView 확인
+        if self.trueview_path:
+            print(f"DWG TrueView 발견: {self.trueview_path}")
+        else:
+            print("경고: DWG TrueView를 찾을 수 없습니다. DWG 파일 변환이 불가능합니다.")
+        
+        print("-" * 80)
+        
+        # 모든 파일 가져오기
+        all_files = self.get_all_files()
+        total_files = len(all_files)
+        
+        # ★ 파일 분류: 동영상 vs 나머지
+        video_files = []
+        other_files = []
+        
+        for file_path in all_files:
+            if file_path.suffix.lower() in self.video_extensions:
+                video_files.append(file_path)
+            else:
+                other_files.append(file_path)
+        
+        print(f"\n총 {total_files}개 파일 발견")
+        print(f"  - 문서/이미지 등: {len(other_files)}개")
+        print(f"  - 동영상: {len(video_files)}개")
+        print("\n[1단계] 문서 파일 변환 시작...\n")
+        
+        # ★ 1단계: 문서 파일 먼저 처리
+        for idx, file_path in enumerate(other_files, 1):
+            print(f"[{idx}/{len(other_files)}] {file_path.name} 처리 중...", end=' ')
+            success, message = self.process_file(file_path)
+            print(f"{'✓' if success else '✗'} {message}")
+        
+        # ★ 2단계: domain.txt 로드
+        domain_path = self.source_dir.parent / "domain.txt"  # D:\for python\테스트 중(측량)\domain.txt
+        if domain_path.exists():
+            self.domain_terms = domain_path.read_text(encoding='utf-8')
+            print(f"\n[2단계] 도메인 용어 사전 로드 완료: {domain_path}")
+            print(f"  - 용어 수: 약 {len(self.domain_terms.split())}개 단어")
+        else:
+            print(f"\n[2단계] 도메인 용어 사전 없음: {domain_path}")
+            print("  - 기본 음성 인식으로 진행합니다.")
+        
+        # ★ 3단계: 동영상 파일 처리
+        if video_files:
+            print(f"\n[3단계] 동영상 음성 인식 시작...\n")
+            for idx, file_path in enumerate(video_files, 1):
+                print(f"[{idx}/{len(video_files)}] {file_path.name} 처리 중...", end=' ')
+                success, message = self.process_file(file_path)
+                print(f"{'✓' if success else '✗'} {message}")
+        
+        # 엑셀 보고서 생성
+        excel_path = self.output_dir / f"변환_결과_{datetime.now().strftime('%Y%m%d_%H%M%S')}.xlsx"
+        self.create_excel_report(excel_path)
+        
+        # 최종 요약
+        success_count = sum(1 for log in self.conversion_log if log['상태'] == "성공")
+        print("\n" + "=" * 80)
+        print(f"작업 완료!")
+        print(f"총 파일: {total_files}개")
+        print(f"성공: {success_count}개")
+        print(f"실패: {total_files - success_count}개")
+        print(f"성공률: {(success_count/total_files*100):.1f}%" if total_files > 0 else "0%")
+        print("=" * 80)
+
+if __name__ == "__main__":
+    # 경로 설정
+    SOURCE_DIR = r"D:\for python\테스트 중(측량)\측량_GIS_드론 관련 자료들"
+    OUTPUT_DIR = r"D:\for python\테스트 중(측량)\추출"
+    
+    # 변환기 실행
+    converter = SurveyingFileConverter(SOURCE_DIR, OUTPUT_DIR)
+    converter.run()
\ No newline at end of file
diff --git a/converters/pipeline/step2_extract.py b/converters/pipeline/step2_extract.py
new file mode 100644
index 0000000..be4d6d6
--- /dev/null
+++ b/converters/pipeline/step2_extract.py
@@ -0,0 +1,789 @@
+# -*- coding: utf-8 -*-
+"""
+extract_1_v2.py
+
+PDF에서 텍스트(md)와 이미지(png)를 추출
+- 하위 폴더 구조 유지
+- 이미지 메타데이터 JSON 생성 (폴더경로, 파일명, 페이지, 위치, 캡션 등)
+"""
+
+import fitz  # PyMuPDF
+import os
+import re
+import json
+import numpy as np
+from pathlib import Path
+from datetime import datetime
+from PIL import Image
+import io
+
+# ===== OCR 설정 (선택적) =====
+try:
+    import pytesseract
+    pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+    TESSERACT_AVAILABLE = True
+except ImportError:
+    TESSERACT_AVAILABLE = False
+    print("[INFO] pytesseract 미설치 - 텍스트 잘림 필터 비활성화")
+
+# ===== 경로 설정 =====
+BASE_DIR = Path(r"D:\for python\survey_test\extract")        # PDF 원본 위치
+OUTPUT_BASE = Path(r"D:\for python\survey_test\process")   # 출력 위치
+
+CAPTION_PATTERN = re.compile(
+    r'^\s*(?:[<\[\(\{]\s*)?(그림|figure|fig)\s*\.?\s*(?:[<\[\(\{]\s*)?0*\d+(?:\s*[-–]\s*\d+)?',
+    re.IGNORECASE
+)
+
+
+def get_figure_rects(page):
+    """
+    Identifies figure regions based on '<그림 N>' captions and vector drawings.
+    Returns a list of dicts: {'rect': fitz.Rect, 'caption_block': block_index}
+    """
+    drawings = page.get_drawings()
+
+    blocks = page.get_text("blocks")
+    captions = []
+     
+    for i, b in enumerate(blocks):
+        text = b[4]
+        if CAPTION_PATTERN.search(text):
+            captions.append({'rect': fitz.Rect(b[:4]), 'index': i, 'text': text, 'drawings': []})
+
+    if not captions:
+        return []
+
+    filtered_drawings_rects = []
+    for d in drawings:
+        r = d["rect"]
+        if r.height > page.rect.height / 3 and r.width < 5:
+            continue
+        if r.width > page.rect.width * 0.9:
+            continue
+        filtered_drawings_rects.append(r)
+
+    page_area = page.rect.get_area()
+    img_rects = []
+    for b in page.get_text("dict")["blocks"]:
+        if b.get("type") == 1:
+            ir = fitz.Rect(b["bbox"])
+            if ir.get_area() < page_area * 0.01:
+                continue
+            img_rects.append(ir)
+
+    remaining_drawings = filtered_drawings_rects + img_rects
+    caption_clusters = {cap['index']: [cap['rect']] for cap in captions}
+    
+    def is_text_between(r1, r2, text_blocks):
+        if r1.intersects(r2):
+            return False
+        union = r1 | r2
+        for b in text_blocks:
+            b_rect = fitz.Rect(b[:4])
+            text_content = b[4]
+            if len(text_content.strip()) < 20: 
+                continue
+            if not b_rect.intersects(union):
+                continue
+            if b_rect.intersects(r1) or b_rect.intersects(r2):
+                continue
+            return True
+        return False
+
+    changed = True
+    while changed:
+        changed = False
+        to_remove = []
+        
+        for d_rect in remaining_drawings:
+            best_cluster_key = None
+            min_dist = float('inf')
+            
+            for cap_index, cluster_rects in caption_clusters.items():
+                for r in cluster_rects:
+                    dist = 0
+                    if d_rect.intersects(r):
+                        dist = 0
+                    else:
+                        x_dist = 0
+                        if d_rect.x1 < r.x0: x_dist = r.x0 - d_rect.x1
+                        elif d_rect.x0 > r.x1: x_dist = d_rect.x0 - r.x1
+                        
+                        y_dist = 0
+                        if d_rect.y1 < r.y0: y_dist = r.y0 - d_rect.y1
+                        elif d_rect.y0 > r.y1: y_dist = d_rect.y0 - r.y1
+                        
+                        if x_dist < 150 and y_dist < 150:
+                            dist = max(x_dist, y_dist) + 0.1 
+                        else:
+                            dist = float('inf')
+                    
+                    if dist < min_dist:
+                         if not is_text_between(r, d_rect, blocks):
+                             min_dist = dist
+                             best_cluster_key = cap_index
+                
+                if min_dist == 0: 
+                    break
+            
+            if best_cluster_key is not None and min_dist < 150:
+                caption_clusters[best_cluster_key].append(d_rect)
+                to_remove.append(d_rect)
+                changed = True
+        
+        for r in to_remove:
+            remaining_drawings.remove(r)
+            
+    figure_regions = []
+    
+    for cap in captions:
+        cluster_rects = caption_clusters[cap['index']]
+        content_rects = cluster_rects[1:] 
+        
+        if not content_rects:
+            continue
+            
+        union_rect = content_rects[0]
+        for r in content_rects[1:]:
+            union_rect = union_rect | r
+            
+        union_rect.x0 = max(0, union_rect.x0 - 5)
+        union_rect.x1 = min(page.rect.width, union_rect.x1 + 5)
+        union_rect.y0 = max(0, union_rect.y0 - 5)
+        union_rect.y1 = min(page.rect.height, union_rect.y1 + 5)
+        
+        cap_rect = cap['rect']
+        
+        if cap_rect.y0 + cap_rect.height/2 < union_rect.y0 + union_rect.height/2:
+             if union_rect.y0 < cap_rect.y1: union_rect.y0 = cap_rect.y1 + 2 
+        else:
+             if union_rect.y1 > cap_rect.y0: union_rect.y1 = cap_rect.y0 - 2 
+             
+        area = union_rect.get_area()
+        page_area = page.rect.get_area()
+
+        if area < page_area * 0.01:
+            continue
+
+        if union_rect.height < 20 and union_rect.width > page.rect.width * 0.6:
+            continue
+        if union_rect.width < 20 and union_rect.height > page.rect.height * 0.6:
+            continue
+
+        text_blocks = page.get_text("blocks")
+        text_count = 0
+
+        for b in text_blocks:
+            b_rect = fitz.Rect(b[:4])
+            if not b_rect.intersects(union_rect):
+                continue
+            text = b[4].strip()
+            if len(text) < 5:
+                continue
+            text_count += 1
+
+        if text_count < 0:
+            continue
+
+        figure_regions.append({
+            'rect': union_rect,
+            'caption_index': cap['index'],
+            'caption_rect': cap['rect'],
+            'caption_text': cap['text'].strip()  # ★ 캡션 텍스트 저장
+        })
+
+    return figure_regions
+
+
+def pixmap_metrics(pix):
+    arr = np.frombuffer(pix.samples, dtype=np.uint8)
+    c = 4 if pix.alpha else 3
+    arr = arr.reshape(pix.height, pix.width, c)[:, :, :3]
+    gray = (0.299 * arr[:, :, 0] + 0.587 * arr[:, :, 1] + 0.114 * arr[:, :, 2]).astype(np.uint8)
+    white = gray > 245
+    nonwhite_ratio = float(1.0 - white.mean())
+    gx = np.abs(np.diff(gray.astype(np.int16), axis=1))
+    gy = np.abs(np.diff(gray.astype(np.int16), axis=0))
+    edge = (gx[:-1, :] + gy[:, :-1]) > 40
+    edge_ratio = float(edge.mean())
+    var = float(gray.var())
+    return nonwhite_ratio, edge_ratio, var
+
+
+def keep_figure(pix):
+    nonwhite_ratio, edge_ratio, var = pixmap_metrics(pix)
+    if nonwhite_ratio < 0.004:
+        return False, nonwhite_ratio, edge_ratio, var
+    if nonwhite_ratio < 0.012 and edge_ratio < 0.004 and var < 20:
+        return False, nonwhite_ratio, edge_ratio, var
+    return True, nonwhite_ratio, edge_ratio, var
+
+
+# ===== 추가 이미지 필터 함수들 (v2.1) =====
+
+def pix_to_pil(pix):
+    """PyMuPDF Pixmap을 PIL Image로 변환"""
+    img_data = pix.tobytes("png")
+    return Image.open(io.BytesIO(img_data))
+
+
+def has_cut_text_at_boundary(pix, margin=5):
+    """
+    이미지 경계에서 텍스트가 잘렸는지 감지
+    - 이미지 테두리 근처에 텍스트 박스가 있으면 잘린 것으로 판단
+    
+    Args:
+        pix: PyMuPDF Pixmap
+        margin: 경계로부터의 여유 픽셀 (기본 5px)
+    
+    Returns:
+        bool: 텍스트가 잘렸으면 True
+    """
+    if not TESSERACT_AVAILABLE:
+        return False  # OCR 없으면 필터 비활성화
+    
+    try:
+        img = pix_to_pil(pix)
+        width, height = img.size
+        
+        # OCR로 텍스트 위치 추출
+        data = pytesseract.image_to_data(img, lang='kor+eng', output_type=pytesseract.Output.DICT)
+        
+        for i, text in enumerate(data['text']):
+            text = str(text).strip()
+            if len(text) < 2:  # 너무 짧은 텍스트는 무시
+                continue
+            
+            x = data['left'][i]
+            y = data['top'][i]
+            w = data['width'][i]
+            h = data['height'][i]
+            
+            # 텍스트가 이미지 경계에 너무 가까우면 = 잘린 것
+            # 왼쪽 경계
+            if x <= margin:
+                return True
+            # 오른쪽 경계
+            if x + w >= width - margin:
+                return True
+            # 상단 경계 (헤더 제외를 위해 좀 더 여유)
+            if y <= margin and h < height * 0.3:
+                return True
+            # 하단 경계
+            if y + h >= height - margin:
+                return True
+        
+        return False
+        
+    except Exception as e:
+        # OCR 실패 시 필터 통과 (이미지 유지)
+        return False
+
+
+def is_decorative_background(pix, edge_threshold=0.02, color_var_threshold=500):
+    """
+    배경 패턴 + 텍스트만 있는 장식용 이미지인지 감지
+    - 엣지가 적고 (복잡한 도표/사진이 아님)
+    - 색상 다양성이 낮으면 (단순 그라데이션 배경)
+    
+    Args:
+        pix: PyMuPDF Pixmap
+        edge_threshold: 엣지 비율 임계값 (기본 0.02 = 2%)
+        color_var_threshold: 색상 분산 임계값
+    
+    Returns:
+        bool: 장식용 배경이면 True
+    """
+    try:
+        nonwhite_ratio, edge_ratio, var = pixmap_metrics(pix)
+        
+        # 엣지가 거의 없고 (단순한 이미지)
+        # 색상 분산도 낮으면 (배경 패턴)
+        if edge_ratio < edge_threshold and var < color_var_threshold:
+            # 추가 확인: 텍스트만 있는지 OCR로 체크
+            if TESSERACT_AVAILABLE:
+                try:
+                    img = pix_to_pil(pix)
+                    text = pytesseract.image_to_string(img, lang='kor+eng').strip()
+                    
+                    # 텍스트가 있고, 이미지가 단순하면 = 텍스트 배경
+                    if len(text) > 3 and edge_ratio < 0.015:
+                        return True
+                except:
+                    pass
+            
+            return True
+        
+        return False
+        
+    except Exception:
+        return False
+
+
+def is_header_footer_region(rect, page_rect, height_threshold=0.12):
+    """
+    헤더/푸터 영역에 있는 이미지인지 감지
+    - 페이지 상단 12% 또는 하단 12%에 위치
+    - 높이가 낮은 strip 형태
+    
+    Args:
+        rect: 이미지 영역 (fitz.Rect)
+        page_rect: 페이지 전체 영역 (fitz.Rect)
+        height_threshold: 헤더/푸터 영역 비율 (기본 12%)
+    
+    Returns:
+        bool: 헤더/푸터 영역이면 True
+    """
+    page_height = page_rect.height
+    img_height = rect.height
+    
+    # 상단 영역 체크
+    if rect.y0 < page_height * height_threshold:
+        # 높이가 페이지의 15% 미만인 strip이면 헤더
+        if img_height < page_height * 0.15:
+            return True
+    
+    # 하단 영역 체크
+    if rect.y1 > page_height * (1 - height_threshold):
+        # 높이가 페이지의 15% 미만인 strip이면 푸터
+        if img_height < page_height * 0.15:
+            return True
+    
+    return False
+
+
+def should_filter_image(pix, rect, page_rect):
+    """
+    이미지를 필터링해야 하는지 종합 판단
+    
+    Args:
+        pix: PyMuPDF Pixmap
+        rect: 이미지 영역
+        page_rect: 페이지 전체 영역
+    
+    Returns:
+        tuple: (필터링 여부, 필터링 사유)
+    """
+    # 1. 헤더/푸터 영역 체크
+    if is_header_footer_region(rect, page_rect):
+        return True, "header_footer"
+    
+    # 2. 텍스트 잘림 체크
+    if has_cut_text_at_boundary(pix):
+        return True, "cut_text"
+    
+    # 3. 장식용 배경 체크
+    if is_decorative_background(pix):
+        return True, "decorative_background"
+    
+    return False, None
+
+
+def extract_pdf_content(pdf_path, output_md_path, img_dir, metadata):
+    """
+    PDF 내용 추출
+    
+    Args:
+        pdf_path: PDF 파일 경로
+        output_md_path: 출력 MD 파일 경로
+        img_dir: 이미지 저장 폴더
+        metadata: 메타데이터 딕셔너리 (폴더 경로, 파일명 등)
+    
+    Returns:
+        image_metadata_list: 추출된 이미지들의 메타데이터 리스트
+    """
+    os.makedirs(img_dir, exist_ok=True)
+    
+    image_metadata_list = []  # ★ 이미지 메타데이터 수집
+    
+    doc = fitz.open(pdf_path)
+    total_pages = len(doc)
+    
+    with open(output_md_path, "w", encoding="utf-8") as md_file:
+        # ★ 메타데이터 헤더 추가
+        md_file.write(f"---\n")
+        md_file.write(f"source_pdf: {metadata['pdf_name']}\n")
+        md_file.write(f"source_folder: {metadata['relative_folder']}\n")
+        md_file.write(f"total_pages: {total_pages}\n")
+        md_file.write(f"extracted_at: {datetime.now().isoformat()}\n")
+        md_file.write(f"---\n\n")
+        md_file.write(f"# {metadata['pdf_name']}\n\n")
+        
+        for page_num, page in enumerate(doc):
+            md_file.write(f"\n## Page {page_num + 1}\n\n")
+            img_rel_dir = os.path.basename(img_dir)
+            
+            figure_regions = get_figure_rects(page)
+            
+            kept_figures = []
+            for i, fig in enumerate(figure_regions):
+                rect = fig['rect']
+                pix_preview = page.get_pixmap(clip=rect, dpi=100, colorspace=fitz.csRGB)
+                ok, nonwhite_ratio, edge_ratio, var = keep_figure(pix_preview)
+                if not ok:
+                    continue
+
+                pix = page.get_pixmap(clip=rect, dpi=150, colorspace=fitz.csRGB)
+                
+                # ★ 추가 필터 적용 (v2.1)
+                should_filter, filter_reason = should_filter_image(pix, rect, page.rect)
+                if should_filter:
+                    continue
+                
+                img_name = f"p{page_num + 1:03d}_fig{len(kept_figures):02d}.png"
+                img_path = os.path.join(img_dir, img_name)
+                pix.save(img_path)
+
+                fig['img_path'] = os.path.join(img_rel_dir, img_name).replace("\\", "/")
+                fig['img_name'] = img_name
+                kept_figures.append(fig)
+                
+                # ★ 이미지 메타데이터 수집
+                image_metadata_list.append({
+                    "image_file": img_name,
+                    "image_path": str(Path(img_dir) / img_name),
+                    "type": "figure",
+                    "source_pdf": metadata['pdf_name'],
+                    "source_folder": metadata['relative_folder'],
+                    "full_path": metadata['full_path'],
+                    "page": page_num + 1,
+                    "total_pages": total_pages,
+                    "caption": fig.get('caption_text', ''),
+                    "rect": {
+                        "x0": round(rect.x0, 2),
+                        "y0": round(rect.y0, 2),
+                        "x1": round(rect.x1, 2),
+                        "y1": round(rect.y1, 2)
+                    }
+                })
+
+            figure_regions = kept_figures
+
+            caption_present = any(
+                CAPTION_PATTERN.search((tb[4] or "")) for tb in page.get_text("blocks")
+            )
+            uncaptioned_idx = 0
+
+            items = []
+
+            def inside_any_figure(block_rect, figures):
+                for fig in figures:
+                    intersect = block_rect & fig["rect"]
+                    if intersect.get_area() > 0.5 * block_rect.get_area():
+                        return True
+                return False
+
+            def is_full_width_rect(r, page_rect):
+                return r.width >= page_rect.width * 0.78
+
+            def figure_anchor_rect(fig, page_rect):
+                cap = fig["caption_rect"]
+                rect = fig["rect"]
+                if cap.y0 >= rect.y0:
+                    y = max(0.0, cap.y0 - 0.02)
+                else:
+                    y = min(page_rect.height - 0.02, cap.y1 + 0.02)
+                return fitz.Rect(cap.x0, y, cap.x1, y + 0.02)
+
+            for fig in figure_regions:
+                anchor = figure_anchor_rect(fig, page.rect)
+                md = (
+                    f"\n![{fig.get('caption_text', 'Figure')}]({fig['img_path']})\n"
+                    f"*{fig.get('caption_text', '')}*\n\n"
+                )
+                items.append({
+                    "kind": "figure",
+                    "rect": anchor,
+                    "kind_order": 0,
+                    "md": md,
+                })
+
+            raw_blocks = page.get_text("dict")["blocks"]
+
+            for block in raw_blocks:
+                block_rect = fitz.Rect(block["bbox"])
+
+                if block.get("type") == 0:
+                    if inside_any_figure(block_rect, figure_regions):
+                        continue
+                    items.append({
+                        "kind": "text",
+                        "rect": block_rect,
+                        "kind_order": 2,
+                        "block": block,
+                    })
+                    continue
+
+                if block.get("type") == 1:
+                    if inside_any_figure(block_rect, figure_regions):
+                        continue
+                    if caption_present:
+                        continue
+
+                    page_area = page.rect.get_area()
+                    if block_rect.get_area() < page_area * 0.005:
+                        continue
+
+                    ratio = block_rect.width / max(1.0, block_rect.height)
+                    if ratio < 0.25 or ratio > 4.0:
+                        continue
+
+                    pix_preview = page.get_pixmap(
+                        clip=block_rect, dpi=80, colorspace=fitz.csRGB
+                    )
+                    ok, nonwhite_ratio, edge_ratio, var = keep_figure(pix_preview)
+                    if not ok:
+                        continue
+
+                    pix = page.get_pixmap(
+                        clip=block_rect, dpi=150, colorspace=fitz.csRGB
+                    )
+                    
+                    # ★ 추가 필터 적용 (v2.1)
+                    should_filter, filter_reason = should_filter_image(pix, block_rect, page.rect)
+                    if should_filter:
+                        continue
+                    
+                    img_name = f"p{page_num + 1:03d}_photo{uncaptioned_idx:02d}.png"
+                    img_path = os.path.join(img_dir, img_name)
+                    pix.save(img_path)
+
+                    rel = os.path.join(img_rel_dir, img_name).replace("\\", "/")
+                    r = block_rect
+                    md = (
+                        f'\n![Photo]({rel})\n'
+                        f'*Page {page_num + 1} Photo*\n\n'
+                    )
+
+                    items.append({
+                        "kind": "raster",
+                        "rect": block_rect,
+                        "kind_order": 1,
+                        "md": md,
+                    })
+                    
+                    # ★ 캡션 없는 이미지 메타데이터
+                    image_metadata_list.append({
+                        "image_file": img_name,
+                        "image_path": str(Path(img_dir) / img_name),
+                        "type": "photo",
+                        "source_pdf": metadata['pdf_name'],
+                        "source_folder": metadata['relative_folder'],
+                        "full_path": metadata['full_path'],
+                        "page": page_num + 1,
+                        "total_pages": total_pages,
+                        "caption": "",
+                        "rect": {
+                            "x0": round(r.x0, 2),
+                            "y0": round(r.y0, 2),
+                            "x1": round(r.x1, 2),
+                            "y1": round(r.y1, 2)
+                        }
+                    })
+
+                    uncaptioned_idx += 1
+                    continue
+
+            # 읽기 순서 정렬
+            text_items = [it for it in items if it["kind"] == "text"]
+            page_w = page.rect.width
+            mid = page_w / 2.0
+
+            candidates = []
+            for it in text_items:
+                r = it["rect"]
+                if is_full_width_rect(r, page.rect):
+                    continue
+                if r.width < page_w * 0.2:
+                    continue
+                candidates.append(it)
+
+            left = [it for it in candidates if it["rect"].x0 < mid * 0.95]
+            right = [it for it in candidates if it["rect"].x0 > mid * 1.05]
+            two_cols = len(left) >= 3 and len(right) >= 3
+
+            col_y0 = None
+            col_y1 = None
+            seps = []
+
+            if two_cols and left and right:
+                col_y0 = min(
+                    min(it["rect"].y0 for it in left),
+                    min(it["rect"].y0 for it in right),
+                )
+                col_y1 = max(
+                    max(it["rect"].y1 for it in left),
+                    max(it["rect"].y1 for it in right),
+                )
+                for it in text_items:
+                    r = it["rect"]
+                    if col_y0 < r.y0 < col_y1 and is_full_width_rect(r, page.rect):
+                        seps.append(r.y0)
+                seps = sorted(set(seps))
+
+            def seg_index(y0, separators):
+                if not separators:
+                    return 0
+                n = 0
+                for s in separators:
+                    if y0 >= s:
+                        n += 1
+                    else:
+                        break
+                return n
+
+            def order_key(it):
+                r = it["rect"]
+                if not two_cols:
+                    return (r.y0, r.x0, it["kind_order"])
+                if col_y0 is not None and r.y1 <= col_y0:
+                    return (0, r.y0, r.x0, it["kind_order"])
+                if col_y1 is not None and r.y0 >= col_y1:
+                    return (2, r.y0, r.x0, it["kind_order"])
+                seg = seg_index(r.y0, seps)
+                if is_full_width_rect(r, page.rect):
+                    col = 2
+                else:
+                    col = 0 if r.x0 < mid else 1
+                return (1, seg, col, r.y0, r.x0, it["kind_order"])
+
+            items.sort(key=order_key)
+
+            for it in items:
+                if it["kind"] in ("figure", "raster"):
+                    md_file.write(it["md"])
+                    continue
+
+                block = it["block"]
+                for line in block.get("lines", []):
+                    for span in line.get("spans", []):
+                        md_file.write(span.get("text", "") + " ")
+                    md_file.write("\n")
+                md_file.write("\n")
+
+    doc.close()
+    return image_metadata_list
+
+
+def process_all_pdfs():
+    """
+    BASE_DIR 하위의 모든 PDF를 재귀적으로 처리
+    폴더 구조를 유지하면서 OUTPUT_BASE에 저장
+    """
+    # 출력 폴더 생성
+    OUTPUT_BASE.mkdir(parents=True, exist_ok=True)
+    
+    # 전체 이미지 메타데이터 수집
+    all_image_metadata = []
+    
+    # 처리 통계
+    stats = {
+        "total_pdfs": 0,
+        "success": 0,
+        "failed": 0,
+        "total_images": 0
+    }
+    
+    # 실패 로그
+    failed_files = []
+    
+    print(f"=" * 60)
+    print(f"PDF 추출 시작")
+    print(f"원본 폴더: {BASE_DIR}")
+    print(f"출력 폴더: {OUTPUT_BASE}")
+    print(f"=" * 60)
+    
+    # 모든 PDF 파일 찾기
+    pdf_files = list(BASE_DIR.rglob("*.pdf"))
+    stats["total_pdfs"] = len(pdf_files)
+    
+    print(f"\n총 {len(pdf_files)}개 PDF 발견\n")
+    
+    for idx, pdf_path in enumerate(pdf_files, 1):
+        try:
+            # 상대 경로 계산
+            relative_path = pdf_path.relative_to(BASE_DIR)
+            relative_folder = str(relative_path.parent)
+            if relative_folder == ".":
+                relative_folder = ""
+            
+            pdf_name = pdf_path.name
+            pdf_stem = pdf_path.stem
+            
+            # 출력 경로 설정 (폴더 구조 유지)
+            output_folder = OUTPUT_BASE / relative_path.parent
+            output_folder.mkdir(parents=True, exist_ok=True)
+            
+            output_md = output_folder / f"{pdf_stem}.md"
+            img_folder = output_folder / f"{pdf_stem}_img"
+            
+            # 메타데이터 준비
+            metadata = {
+                "pdf_name": pdf_name,
+                "pdf_stem": pdf_stem,
+                "relative_folder": relative_folder,
+                "full_path": str(relative_path),
+            }
+            
+            print(f"[{idx}/{len(pdf_files)}] {relative_path}")
+            
+            # PDF 처리
+            image_metas = extract_pdf_content(
+                str(pdf_path),
+                str(output_md),
+                str(img_folder),
+                metadata
+            )
+            
+            all_image_metadata.extend(image_metas)
+            stats["success"] += 1
+            stats["total_images"] += len(image_metas)
+            
+            print(f"    ✓ 완료 (이미지 {len(image_metas)}개)")
+            
+        except Exception as e:
+            stats["failed"] += 1
+            failed_files.append({
+                "file": str(pdf_path),
+                "error": str(e)
+            })
+            print(f"    ✗ 실패: {e}")
+    
+    # 전체 이미지 메타데이터 저장
+    meta_output_path = OUTPUT_BASE / "image_metadata.json"
+    with open(meta_output_path, "w", encoding="utf-8") as f:
+        json.dump(all_image_metadata, f, ensure_ascii=False, indent=2)
+    
+    # 처리 요약 저장
+    summary = {
+        "processed_at": datetime.now().isoformat(),
+        "source_dir": str(BASE_DIR),
+        "output_dir": str(OUTPUT_BASE),
+        "statistics": stats,
+        "failed_files": failed_files
+    }
+    
+    summary_path = OUTPUT_BASE / "extraction_summary.json"
+    with open(summary_path, "w", encoding="utf-8") as f:
+        json.dump(summary, f, ensure_ascii=False, indent=2)
+    
+    # 결과 출력
+    print(f"\n" + "=" * 60)
+    print(f"추출 완료!")
+    print(f"=" * 60)
+    print(f"총 PDF: {stats['total_pdfs']}개")
+    print(f"성공: {stats['success']}개")
+    print(f"실패: {stats['failed']}개")
+    print(f"추출된 이미지: {stats['total_images']}개")
+    print(f"\n이미지 메타데이터: {meta_output_path}")
+    print(f"처리 요약: {summary_path}")
+    
+    if failed_files:
+        print(f"\n실패한 파일:")
+        for f in failed_files:
+            print(f"  - {f['file']}: {f['error']}")
+
+
+if __name__ == "__main__":
+    process_all_pdfs()
\ No newline at end of file
diff --git a/converters/pipeline/step3_domain.py b/converters/pipeline/step3_domain.py
new file mode 100644
index 0000000..e01a87a
--- /dev/null
+++ b/converters/pipeline/step3_domain.py
@@ -0,0 +1,265 @@
+# -*- coding: utf-8 -*-
+"""
+domain_prompt.py
+
+기능:
+- D:\\test\\report 아래의 pdf/xlsx/png/txt/md 파일들의
+  파일명과 내용 일부를 샘플링한다.
+- 이 샘플을 기반으로, 문서 묶음의 분야/업무 맥락을 파악하고
+  "너는 ~~ 분야의 전문가이다. 나는 ~~를 하고 싶다..." 형식의
+  도메인 전용 시스템 프롬프트를 자동 생성한다.
+- 결과는 output/context/domain_prompt.txt 로 저장된다.
+
+이 domain_prompt.txt 내용은 이후 모든 GPT 호출(system role)에 공통으로 붙여 사용할 수 있다.
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+
+import pdfplumber
+import fitz  # PyMuPDF
+from PIL import Image
+import pytesseract
+import pandas as pd
+from openai import OpenAI
+import pytesseract
+from api_config import API_KEYS
+pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe"
+
+# ===== 경로 설정 =====
+DATA_ROOT   = Path(r"D:\for python\survey_test\extract")
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+CONTEXT_DIR = OUTPUT_ROOT / "context"
+LOG_DIR     = OUTPUT_ROOT / "logs"
+
+for d in [OUTPUT_ROOT, CONTEXT_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# ===== OpenAI 설정 (구조만 유지, 키는 마스터가 직접 입력) =====
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+GPT_MODEL      = "gpt-5-2025-08-07"
+
+client = OpenAI(api_key=OPENAI_API_KEY)
+
+# ===== OCR 설정 =====
+OCR_LANG = "kor+eng"
+
+SKIP_DIR_NAMES = {"System Volume Information", "$RECYCLE.BIN", ".git", "__pycache__"}
+
+
+def log(msg: str):
+    print(msg, flush=True)
+    with (LOG_DIR / "domain_prompt_log.txt").open("a", encoding="utf-8") as f:
+        f.write(msg + "\n")
+
+
+def safe_rel(p: Path) -> str:
+    try:
+        return str(p.relative_to(DATA_ROOT))
+    except Exception:
+        return str(p)
+
+
+def ocr_image(img_path: Path) -> str:
+    try:
+        return pytesseract.image_to_string(Image.open(img_path), lang=OCR_LANG).strip()
+    except Exception as e:
+        log(f"[WARN] OCR 실패: {safe_rel(img_path)} | {e}")
+        return ""
+
+
+def sample_from_pdf(p: Path, max_chars: int = 1000) -> str:
+    texts = []
+    try:
+        with pdfplumber.open(str(p)) as pdf:
+            # 앞쪽 몇 페이지만 샘플링
+            for page in pdf.pages[:3]:
+                t = page.extract_text() or ""
+                if t:
+                    texts.append(t)
+                if sum(len(x) for x in texts) >= max_chars:
+                    break
+    except Exception as e:
+        log(f"[WARN] PDF 샘플 추출 실패: {safe_rel(p)} | {e}")
+    joined = "\n".join(texts)
+    return joined[:max_chars]
+
+
+def sample_from_xlsx(p: Path, max_chars: int = 1000) -> str:
+    texts = [f"[파일명] {p.name}"]
+    try:
+        xls = pd.ExcelFile(str(p))
+        for sheet_name in xls.sheet_names[:3]:
+            try:
+                df = xls.parse(sheet_name)
+            except Exception as e:
+                log(f"[WARN] 시트 로딩 실패: {safe_rel(p)} | {sheet_name} | {e}")
+                continue
+            texts.append(f"\n[시트] {sheet_name}")
+            texts.append("컬럼: " + ", ".join(map(str, df.columns)))
+            head = df.head(5)
+            texts.append(head.to_string(index=False))
+            if sum(len(x) for x in texts) >= max_chars:
+                break
+    except Exception as e:
+        log(f"[WARN] XLSX 샘플 추출 실패: {safe_rel(p)} | {e}")
+    joined = "\n".join(texts)
+    return joined[:max_chars]
+
+
+def sample_from_text_file(p: Path, max_chars: int = 1000) -> str:
+    try:
+        t = p.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        t = p.read_text(encoding="cp949", errors="ignore")
+    return t[:max_chars]
+
+
+def gather_file_samples(
+    max_files_per_type: int = 100,
+    max_total_samples: int = 300,
+    max_chars_per_sample: int = 1000,
+):
+
+    file_names = []
+    samples = []
+
+    count_pdf = 0
+    count_xlsx = 0
+    count_img = 0
+    count_txt = 0
+
+    for root, dirs, files in os.walk(DATA_ROOT):
+        dirs[:] = [d for d in dirs if d not in SKIP_DIR_NAMES and not d.startswith(".")]
+        cur_dir = Path(root)
+
+        for fname in files:
+            fpath = cur_dir / fname
+            ext = fpath.suffix.lower()
+
+            # 파일명은 전체 다 모으되, 샘플 추출은 제한
+            file_names.append(safe_rel(fpath))
+
+            if len(samples) >= max_total_samples:
+                continue
+
+            try:
+                if ext == ".pdf" and count_pdf < max_files_per_type:
+                    s = sample_from_pdf(fpath, max_chars=max_chars_per_sample)
+                    if s.strip():
+                        samples.append(f"[PDF] {safe_rel(fpath)}\n{s}")
+                        count_pdf += 1
+                    continue
+
+                if ext in {".xlsx", ".xls"} and count_xlsx < max_files_per_type:
+                    s = sample_from_xlsx(fpath, max_chars=max_chars_per_sample)
+                    if s.strip():
+                        samples.append(f"[XLSX] {safe_rel(fpath)}\n{s}")
+                        count_xlsx += 1
+                    continue
+
+                if ext in {".png", ".jpg", ".jpeg"} and count_img < max_files_per_type:
+                    s = ocr_image(fpath)
+                    if s.strip():
+                        samples.append(f"[IMG] {safe_rel(fpath)}\n{s[:max_chars_per_sample]}")
+                        count_img += 1
+                    continue
+
+                if ext in {".txt", ".md"} and count_txt < max_files_per_type:
+                    s = sample_from_text_file(fpath, max_chars=max_chars_per_sample)
+                    if s.strip():
+                        samples.append(f"[TEXT] {safe_rel(fpath)}\n{s}")
+                        count_txt += 1
+                    continue
+
+            except Exception as e:
+                log(f"[WARN] 샘플 추출 실패: {safe_rel(fpath)} | {e}")
+                continue
+
+    return file_names, samples
+
+
+def build_domain_prompt():
+    """
+    파일명 + 내용 샘플을 GPT에게 넘겨
+    '너는 ~~ 분야의 전문가이다...' 형태의 시스템 프롬프트를 생성한다.
+    """
+    log("도메인 프롬프트 생성을 위한 샘플 수집 중...")
+    file_names, samples = gather_file_samples()
+
+    if not file_names and not samples:
+        log("파일 샘플이 없어 도메인 프롬프트를 생성할 수 없습니다.")
+        sys.exit(1)
+
+    file_names_text = "\n".join(file_names[:80])
+    sample_text = "\n\n".join(samples[:30])
+
+    prompt = f"""
+다음은 한 기업의 '이슈 리포트 및 시스템 관련 자료'로 추정되는 파일들의 목록과,
+각 파일에서 일부 추출한 내용 샘플이다.
+
+[파일명 목록]
+{file_names_text}
+
+[내용 샘플]
+{sample_text}
+
+위 자료를 바탕으로 다음을 수행하라.
+
+1) 이 문서 묶음이 어떤 산업, 업무, 분야에 대한 것인지,
+   핵심 키워드를 포함해 2~3줄 정도로 설명하라.
+
+2) 이후, 이 문서들을 다루는 AI에게 사용할 "프롬프트 머리말"을 작성하라.
+   이 머리말은 모든 후속 프롬프트 앞에 항상 붙일 예정이며,
+   다음 조건을 만족해야 한다.
+
+   - 첫 문단: "너는 ~~ 분야의 전문가이다." 형식으로, 이 문서 묶음의 분야와 역할을 정의한다.
+   - 두 번째 문단 이후: "나는 ~~을 하고 싶다.", "우리는 ~~ 의 문제를 분석하고 개선방안을 찾고자 한다." 등
+     사용자가 AI에게 요구하는 전반적 목적과 관점을 정리한다.
+   - 총 5~7줄 정도의 한국어 문장으로 작성한다.
+   - 이후에 붙을 프롬프트(청킹, 요약, RAG, 보고서 작성 등)와 자연스럽게 연결될 수 있도록,
+     역할(role), 목적, 기준(추측 금지, 사실 기반, 근거 명시 등)을 모두 포함한다.
+
+출력 형식:
+- 설명과 머리말을 한 번에 출력하되,
+  별도의 마크다운 없이 순수 텍스트로만 작성하라.
+- 이 출력 전체를 domain_prompt.txt에 그대로 저장할 것이다.
+"""
+
+    resp = client.chat.completions.create(
+        model=GPT_MODEL,
+        messages=[
+            {
+                "role": "system",
+                "content": "너는 문서 묶음의 분야를 식별하고, 그에 맞는 AI 시스템 프롬프트와 컨텍스트를 설계하는 컨설턴트이다."
+            },
+            {
+                "role": "user",
+                "content": prompt
+            }
+        ],
+    )
+
+    content = (resp.choices[0].message.content or "").strip()
+    out_path = CONTEXT_DIR / "domain_prompt.txt"
+    out_path.write_text(content, encoding="utf-8")
+
+    log(f"도메인 프롬프트 생성 완료: {out_path}")
+    return content
+
+
+def main():
+    log("=== 도메인 프롬프트 생성 시작 ===")
+    out_path = CONTEXT_DIR / "domain_prompt.txt"
+    if out_path.exists():
+        log(f"이미 domain_prompt.txt가 존재합니다: {out_path}")
+        log("기존 파일을 사용하려면 종료하고, 재생성이 필요하면 파일을 삭제한 뒤 다시 실행하십시오.")
+    else:
+        build_domain_prompt()
+    log("=== 도메인 프롬프트 작업 종료 ===")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/converters/pipeline/step4_chunk.py b/converters/pipeline/step4_chunk.py
new file mode 100644
index 0000000..9680692
--- /dev/null
+++ b/converters/pipeline/step4_chunk.py
@@ -0,0 +1,357 @@
+# -*- coding: utf-8 -*-
+"""
+chunk_and_summary_v2.py
+
+기능:
+- 정리중 폴더 아래의 .md 파일들을 대상으로
+  1) domain_prompt.txt 기반 GPT 의미 청킹
+  2) 청크별 요약 생성
+  3) 청크 내 이미지 참조 보존
+  4) JSON 저장 (원문+청크+요약+이미지)
+  5) RAG용 *_chunks.json 저장
+
+전제:
+- extract_1_v2.py 실행 후 .md 파일들이 존재할 것
+- step1_domainprompt.py 실행 후 domain_prompt.txt가 존재할 것
+"""
+
+import os
+import sys
+import json
+import re
+from pathlib import Path
+from datetime import datetime
+
+from openai import OpenAI
+from api_config import API_KEYS
+
+# ===== 경로 =====
+DATA_ROOT      = Path(r"D:\for python\survey_test\process")  
+OUTPUT_ROOT    = Path(r"D:\for python\survey_test\output")
+
+TEXT_DIR       = OUTPUT_ROOT / "text"
+JSON_DIR       = OUTPUT_ROOT / "json"
+RAG_DIR        = OUTPUT_ROOT / "rag"
+CONTEXT_DIR    = OUTPUT_ROOT / "context"
+LOG_DIR        = OUTPUT_ROOT / "logs"
+
+for d in [TEXT_DIR, JSON_DIR, RAG_DIR, CONTEXT_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# ===== OpenAI 설정 =====
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+GPT_MODEL      = "gpt-5-2025-08-07"
+
+client = OpenAI(api_key=OPENAI_API_KEY)
+
+# ===== 스킵할 폴더 =====
+SKIP_DIR_NAMES = {"System Volume Information", "$RECYCLE.BIN", ".git", "__pycache__", "output"}
+
+# ===== 이미지 참조 패턴 =====
+IMAGE_PATTERN = re.compile(r'!\[([^\]]*)\]\(([^)]+)\)')
+
+
+def log(msg: str):
+    print(msg, flush=True)
+    with (LOG_DIR / "chunk_and_summary_log.txt").open("a", encoding="utf-8") as f:
+        f.write(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}\n")
+
+
+def load_domain_prompt() -> str:
+    p = CONTEXT_DIR / "domain_prompt.txt"
+    if not p.exists():
+        log(f"domain_prompt.txt가 없습니다: {p}")
+        log("먼저 step1_domainprompt.py를 실행해야 합니다.")
+        sys.exit(1)
+    return p.read_text(encoding="utf-8", errors="ignore").strip()
+
+
+def safe_rel(p: Path) -> str:
+    """DATA_ROOT 기준 상대 경로 반환"""
+    try:
+        return str(p.relative_to(DATA_ROOT))
+    except Exception:
+        return str(p)
+
+
+def extract_text_md(p: Path) -> str:
+    """마크다운 파일 텍스트 읽기"""
+    try:
+        return p.read_text(encoding="utf-8", errors="ignore")
+    except Exception:
+        return p.read_text(encoding="cp949", errors="ignore")
+
+
+def find_images_in_text(text: str) -> list:
+    """텍스트에서 이미지 참조 찾기"""
+    matches = IMAGE_PATTERN.findall(text)
+    return [{"alt": m[0], "path": m[1]} for m in matches]
+
+
+def semantic_chunk(domain_prompt: str, text: str, source_name: str):
+    """GPT 기반 의미 청킹"""
+    if not text.strip():
+        return []
+
+    # 텍스트가 너무 짧으면 그냥 하나의 청크로
+    if len(text) < 500:
+        return [{
+            "title": "전체 내용",
+            "keywords": "",
+            "content": text
+        }]
+
+    user_prompt = f"""
+아래 문서를 의미 단위(문단/항목/섹션 등)로 분리하고,
+각 청크는 title / keywords / content 를 포함한 JSON 배열로 출력하라.
+
+규칙:
+1. 추측 금지, 문서 내용 기반으로만 분리
+2. 이미지 참조(![...](...))는 관련 텍스트와 같은 청크에 포함
+3. 각 청크는 최소 100자 이상
+4. keywords는 쉼표로 구분된 핵심 키워드 3~5개
+
+문서:
+{text[:12000]}
+
+JSON 배열만 출력하라. 다른 설명 없이.
+"""
+
+    try:
+        resp = client.chat.completions.create(
+            model=GPT_MODEL,
+            messages=[
+                {"role": "system", "content": domain_prompt + "\n\n너는 의미 기반 청킹 전문가이다. JSON 배열만 출력한다."},
+                {"role": "user", "content": user_prompt},
+            ],
+        )
+        data = resp.choices[0].message.content.strip()
+        
+        # JSON 파싱 시도
+        # ```json ... ``` 형식 처리
+        if "```json" in data:
+            data = data.split("```json")[1].split("```")[0].strip()
+        elif "```" in data:
+            data = data.split("```")[1].split("```")[0].strip()
+        
+        if data.startswith("["):
+            return json.loads(data)
+            
+    except json.JSONDecodeError as e:
+        log(f"[WARN] JSON 파싱 실패 ({source_name}): {e}")
+    except Exception as e:
+        log(f"[WARN] semantic_chunk API 실패 ({source_name}): {e}")
+
+    # fallback: 페이지/섹션 기반 분리
+    log(f"[INFO] Fallback 청킹 적용: {source_name}")
+    return fallback_chunk(text)
+
+
+def fallback_chunk(text: str) -> list:
+    """GPT 실패 시 대체 청킹 (페이지/섹션 기반)"""
+    chunks = []
+    
+    # 페이지 구분자로 분리 시도
+    if "## Page " in text:
+        pages = re.split(r'\n## Page \d+\n', text)
+        for i, page_content in enumerate(pages):
+            if page_content.strip():
+                chunks.append({
+                    "title": f"Page {i+1}",
+                    "keywords": "",
+                    "content": page_content.strip()
+                })
+    else:
+        # 빈 줄 2개 이상으로 분리
+        sections = re.split(r'\n{3,}', text)
+        for i, section in enumerate(sections):
+            if section.strip() and len(section.strip()) > 50:
+                chunks.append({
+                    "title": f"섹션 {i+1}",
+                    "keywords": "",
+                    "content": section.strip()
+                })
+    
+    # 청크가 없으면 전체를 하나로
+    if not chunks:
+        chunks.append({
+            "title": "전체 내용",
+            "keywords": "",
+            "content": text.strip()
+        })
+    
+    return chunks
+
+
+def summary_chunk(domain_prompt: str, text: str, limit: int = 300) -> str:
+    """청크 요약 생성"""
+    if not text.strip():
+        return ""
+    
+    # 이미지 참조 제거 후 요약 (텍스트만)
+    text_only = IMAGE_PATTERN.sub('', text).strip()
+    
+    if len(text_only) < 100:
+        return text_only
+    
+    prompt = f"""
+아래 텍스트를 {limit}자 이내로 사실 기반으로 요약하라.
+추측 금지, 고유명사와 수치는 보존.
+
+{text_only[:8000]}
+"""
+    try:
+        resp = client.chat.completions.create(
+            model=GPT_MODEL,
+            messages=[
+                {"role": "system", "content": domain_prompt + "\n\n너는 사실만 요약하는 전문가이다."},
+                {"role": "user", "content": prompt},
+            ],
+        )
+        return resp.choices[0].message.content.strip()
+    except Exception as e:
+        log(f"[WARN] summary 실패: {e}")
+        return text_only[:limit]
+
+
+def save_chunk_files(src: Path, text: str, domain_prompt: str) -> int:
+    """
+    의미 청킹 → 요약 → JSON 저장
+    
+    Returns:
+        생성된 청크 수
+    """
+    stem = src.stem
+    folder_ctx = safe_rel(src.parent)
+    
+    # 원문 저장
+    (TEXT_DIR / f"{stem}_text.txt").write_text(text, encoding="utf-8", errors="ignore")
+    
+    # 의미 청킹
+    chunks = semantic_chunk(domain_prompt, text, src.name)
+    
+    if not chunks:
+        log(f"[WARN] 청크 없음: {src.name}")
+        return 0
+    
+    rag_items = []
+    
+    for idx, ch in enumerate(chunks, start=1):
+        content = ch.get("content", "")
+        
+        # 요약 생성
+        summ = summary_chunk(domain_prompt, content, 300)
+        
+        # 이 청크에 포함된 이미지 찾기
+        images_in_chunk = find_images_in_text(content)
+        
+        rag_items.append({
+            "source": src.name,
+            "source_path": safe_rel(src),
+            "chunk": idx,
+            "total_chunks": len(chunks),
+            "title": ch.get("title", ""),
+            "keywords": ch.get("keywords", ""),
+            "text": content,
+            "summary": summ,
+            "folder_context": folder_ctx,
+            "images": images_in_chunk,
+            "has_images": len(images_in_chunk) > 0
+        })
+    
+    # JSON 저장
+    (JSON_DIR / f"{stem}.json").write_text(
+        json.dumps(rag_items, ensure_ascii=False, indent=2),
+        encoding="utf-8"
+    )
+    
+    # RAG용 JSON 저장
+    (RAG_DIR / f"{stem}_chunks.json").write_text(
+        json.dumps(rag_items, ensure_ascii=False, indent=2),
+        encoding="utf-8"
+    )
+    
+    return len(chunks)
+
+
+def main():
+    log("=" * 60)
+    log("청킹/요약 파이프라인 시작")
+    log(f"데이터 폴더: {DATA_ROOT}")
+    log(f"출력 폴더: {OUTPUT_ROOT}")
+    log("=" * 60)
+    
+    # 도메인 프롬프트 로드
+    domain_prompt = load_domain_prompt()
+    log(f"도메인 프롬프트 로드 완료 ({len(domain_prompt)}자)")
+    
+    # 통계
+    stats = {"docs": 0, "chunks": 0, "images": 0, "errors": 0}
+    
+    # .md 파일 찾기
+    md_files = []
+    for root, dirs, files in os.walk(DATA_ROOT):
+        dirs[:] = [d for d in dirs if d not in SKIP_DIR_NAMES and not d.startswith(".")]
+        for fname in files:
+            if fname.lower().endswith(".md"):
+                md_files.append(Path(root) / fname)
+    
+    log(f"\n총 {len(md_files)}개 .md 파일 발견\n")
+    
+    for idx, fpath in enumerate(md_files, 1):
+        try:
+            rel_path = safe_rel(fpath)
+            log(f"[{idx}/{len(md_files)}] {rel_path}")
+            
+            # 텍스트 읽기
+            text = extract_text_md(fpath)
+            
+            if not text.strip():
+                log(f"    ⚠ 빈 파일, 스킵")
+                continue
+            
+            # 이미지 개수 확인
+            images = find_images_in_text(text)
+            stats["images"] += len(images)
+            
+            # 청킹 및 저장
+            chunk_count = save_chunk_files(fpath, text, domain_prompt)
+            
+            stats["docs"] += 1
+            stats["chunks"] += chunk_count
+            
+            log(f"    ✓ {chunk_count}개 청크, {len(images)}개 이미지")
+            
+        except Exception as e:
+            stats["errors"] += 1
+            log(f"    ✗ 오류: {e}")
+    
+    # 전체 통계 저장
+    summary = {
+        "processed_at": datetime.now().isoformat(),
+        "data_root": str(DATA_ROOT),
+        "output_root": str(OUTPUT_ROOT),
+        "statistics": stats
+    }
+    
+    (LOG_DIR / "chunk_summary_stats.json").write_text(
+        json.dumps(summary, ensure_ascii=False, indent=2),
+        encoding="utf-8"
+    )
+    
+    # 결과 출력
+    log("\n" + "=" * 60)
+    log("청킹/요약 완료!")
+    log("=" * 60)
+    log(f"처리된 문서: {stats['docs']}개")
+    log(f"생성된 청크: {stats['chunks']}개")
+    log(f"포함된 이미지: {stats['images']}개")
+    log(f"오류: {stats['errors']}개")
+    log(f"\n결과 저장 위치:")
+    log(f"  - 원문: {TEXT_DIR}")
+    log(f"  - JSON: {JSON_DIR}")
+    log(f"  - RAG: {RAG_DIR}")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/converters/pipeline/step5_rag.py b/converters/pipeline/step5_rag.py
new file mode 100644
index 0000000..30ef48e
--- /dev/null
+++ b/converters/pipeline/step5_rag.py
@@ -0,0 +1,141 @@
+# -*- coding: utf-8 -*-
+"""
+build_rag.py
+
+기능:
+- chunk_and_summary.py 에서 생성된 output/rag/*_chunks.json 파일들을 읽어서
+  text + summary 를 임베딩(text-embedding-3-small)한다.
+- FAISS IndexFlatIP 인덱스를 구축하여
+  output/rag/faiss.index, meta.json, vectors.npy 를 생성한다.
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+
+import numpy as np
+import faiss
+from openai import OpenAI
+from api_config import API_KEYS
+
+# ===== 경로 설정 =====
+DATA_ROOT   = Path(r"D:\for python\survey_test\process")
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+RAG_DIR     = OUTPUT_ROOT / "rag"
+LOG_DIR     = OUTPUT_ROOT / "logs"
+
+for d in [RAG_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# ===== OpenAI 설정 (구조 유지) =====
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+GPT_MODEL      = "gpt-5-2025-08-07"
+EMBED_MODEL    = "text-embedding-3-small"
+
+client = OpenAI(api_key=OPENAI_API_KEY)
+
+
+def log(msg: str):
+    print(msg, flush=True)
+    with (LOG_DIR / "build_rag_log.txt").open("a", encoding="utf-8") as f:
+        f.write(msg + "\n")
+
+
+def embed_texts(texts):
+    if not texts:
+        return np.zeros((0, 1536), dtype="float32")
+    embs = []
+    B = 96
+    for i in range(0, len(texts), B):
+        batch = texts[i:i+B]
+        resp = client.embeddings.create(model=EMBED_MODEL, input=batch)
+        for d in resp.data:
+            embs.append(np.array(d.embedding, dtype="float32"))
+    return np.vstack(embs)
+
+
+def _build_embed_input(u: dict) -> str:
+    """
+    text + summary 를 합쳐 임베딩 입력을 만든다.
+    - text, summary 중 없는 것은 생략
+    - 공백 정리
+    - 최대 길이 제한
+    """
+    sum_ = (u.get("summary") or "").strip()
+    txt  = (u.get("text") or "").strip()
+
+    if txt and sum_:
+        merged = txt + "\n\n요약: " + sum_[:1000]
+    else:
+        merged = txt or sum_
+
+    merged = " ".join(merged.split())
+    if not merged:
+        return ""
+    if len(merged) > 4000:
+        merged = merged[:4000]
+    return merged
+
+
+def build_faiss_index():
+    docs = []
+    metas = []
+
+    rag_files = list(RAG_DIR.glob("*_chunks.json"))
+    if not rag_files:
+        log("RAG 파일(*_chunks.json)이 없습니다. 먼저 chunk_and_summary.py를 실행해야 합니다.")
+        sys.exit(1)
+
+    for f in rag_files:
+        try:
+            units = json.loads(f.read_text(encoding="utf-8", errors="ignore"))
+        except Exception as e:
+            log(f"[WARN] RAG 파일 읽기 실패: {f.name} | {e}")
+            continue
+
+        for u in units:
+            embed_input = _build_embed_input(u)
+            if not embed_input:
+                continue
+            if len(embed_input) < 40:
+                continue
+            docs.append(embed_input)
+            metas.append({
+                "source": u.get("source", ""),
+                "chunk": int(u.get("chunk", 0)),
+                "folder_context": u.get("folder_context", "")
+            })
+
+    if not docs:
+        log("임베딩할 텍스트가 없습니다.")
+        sys.exit(1)
+
+    log(f"임베딩 대상 텍스트 수: {len(docs)}")
+
+    E = embed_texts(docs)
+    if E.shape[0] != len(docs):
+        log(f"[WARN] 임베딩 수 불일치: E={E.shape[0]}, docs={len(docs)}")
+
+    faiss.normalize_L2(E)
+    index = faiss.IndexFlatIP(E.shape[1])
+    index.add(E)
+
+    np.save(str(RAG_DIR / "vectors.npy"), E)
+    (RAG_DIR / "meta.json").write_text(
+        json.dumps(metas, ensure_ascii=False, indent=2),
+        encoding="utf-8"
+    )
+    faiss.write_index(index, str(RAG_DIR / "faiss.index"))
+
+    log(f"FAISS 인덱스 구축 완료: 벡터 수={len(metas)}")
+
+
+def main():
+    log("=== FAISS RAG 인덱스 구축 시작 ===")
+    build_faiss_index()
+    log("=== FAISS RAG 인덱스 구축 종료 ===")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/converters/pipeline/step6_corpus.py b/converters/pipeline/step6_corpus.py
new file mode 100644
index 0000000..d3e33d0
--- /dev/null
+++ b/converters/pipeline/step6_corpus.py
@@ -0,0 +1,232 @@
+# -*- coding: utf-8 -*-
+"""
+make_corpus_v2.py
+
+기능:
+- output/rag/*_chunks.json 에서 모든 청크의 summary를 모아
+- AI가 CEL 목적(교육+자사솔루션 홍보)에 맞게 압축 정리
+- 중복은 빈도 표시, 희귀하지만 중요한 건 [핵심] 표시
+- 결과를 output/context/corpus.txt 로 저장
+
+전제:
+- chunk_and_summary.py 실행 후 *_chunks.json 들이 존재해야 한다.
+- domain_prompt.txt가 존재해야 한다.
+"""
+
+import os
+import sys
+import json
+from pathlib import Path
+from datetime import datetime
+
+from openai import OpenAI
+from api_config import API_KEYS
+
+# ===== 경로 설정 =====
+DATA_ROOT   = Path(r"D:\for python\survey_test\process")
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+RAG_DIR     = OUTPUT_ROOT / "rag"
+CONTEXT_DIR = OUTPUT_ROOT / "context"
+LOG_DIR     = OUTPUT_ROOT / "logs"
+
+for d in [RAG_DIR, CONTEXT_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# ===== OpenAI 설정 =====
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+GPT_MODEL      = "gpt-5-2025-08-07"
+
+client = OpenAI(api_key=OPENAI_API_KEY)
+
+# ===== 압축 설정 =====
+BATCH_SIZE = 80  # 한 번에 처리할 요약 개수
+MAX_CHARS_PER_BATCH = 3000  # 배치당 압축 결과 글자수
+MAX_FINAL_CHARS = 8000  # 최종 corpus 글자수
+
+
+def log(msg: str):
+    print(msg, flush=True)
+    with (LOG_DIR / "make_corpus_log.txt").open("a", encoding="utf-8") as f:
+        f.write(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}\n")
+
+
+def load_domain_prompt() -> str:
+    p = CONTEXT_DIR / "domain_prompt.txt"
+    if not p.exists():
+        log("domain_prompt.txt가 없습니다. 먼저 step1을 실행해야 합니다.")
+        sys.exit(1)
+    return p.read_text(encoding="utf-8", errors="ignore").strip()
+
+
+def load_all_summaries() -> list:
+    """모든 청크의 summary + 출처 정보 수집"""
+    summaries = []
+    rag_files = sorted(RAG_DIR.glob("*_chunks.json"))
+    
+    if not rag_files:
+        log("RAG 파일(*_chunks.json)이 없습니다. 먼저 chunk_and_summary.py를 실행해야 합니다.")
+        sys.exit(1)
+
+    for f in rag_files:
+        try:
+            units = json.loads(f.read_text(encoding="utf-8", errors="ignore"))
+        except Exception as e:
+            log(f"[WARN] RAG 파일 읽기 실패: {f.name} | {e}")
+            continue
+
+        for u in units:
+            summ = (u.get("summary") or "").strip()
+            source = (u.get("source") or "").strip()
+            keywords = (u.get("keywords") or "")
+            
+            if summ:
+                # 출처와 키워드 포함
+                entry = f"[{source}] {summ}"
+                if keywords:
+                    entry += f" (키워드: {keywords})"
+                summaries.append(entry)
+
+    return summaries
+
+
+def compress_batch(domain_prompt: str, batch: list, batch_num: int, total_batches: int) -> str:
+    """배치 단위로 요약들을 AI가 압축"""
+    
+    batch_text = "\n".join([f"{i+1}. {s}" for i, s in enumerate(batch)])
+    
+    prompt = f"""
+아래는 문서에서 추출한 요약 {len(batch)}개이다. (배치 {batch_num}/{total_batches})
+
+[요약 목록]
+{batch_text}
+
+다음 기준으로 이 요약들을 압축 정리하라:
+
+1) 중복/유사 내용: 하나로 통합하되, 여러 문서에서 언급되면 "(N회 언급)" 표시
+2) domain_prompt에 명시된 핵심 솔루션/시스템: 반드시 보존하고 [솔루션] 표시
+3) domain_prompt의 목적에 중요한 내용 우선 보존:
+   - 해당 분야의 기초 개념
+   - 기존 방식의 한계점과 문제점
+   - 새로운 기술/방식의 장점
+4) 단순 나열/절차만 있는 내용: 과감히 축약
+5) 희귀하지만 핵심적인 인사이트: [핵심] 표시
+
+출력 형식:
+- 주제별로 그룹핑
+- 각 항목은 1~2문장으로 간결하게
+- 전체 {MAX_CHARS_PER_BATCH}자 이내
+- 마크다운 없이 순수 텍스트로
+"""
+    
+    try:
+        resp = client.chat.completions.create(
+            model=GPT_MODEL,
+            messages=[
+                {"role": "system", "content": domain_prompt + "\n\n너는 문서 요약을 주제별로 압축 정리하는 전문가이다."},
+                {"role": "user", "content": prompt}
+            ]
+        )
+        result = resp.choices[0].message.content.strip()
+        log(f"    배치 {batch_num}/{total_batches} 압축 완료 ({len(result)}자)")
+        return result
+    except Exception as e:
+        log(f"[ERROR] 배치 {batch_num} 압축 실패: {e}")
+        # 실패 시 원본 일부 반환
+        return "\n".join(batch[:10])
+
+
+def merge_compressed_parts(domain_prompt: str, parts: list) -> str:
+    """배치별 압축 결과를 최종 통합"""
+    
+    if len(parts) == 1:
+        return parts[0]
+    
+    all_parts = "\n\n---\n\n".join([f"[파트 {i+1}]\n{p}" for i, p in enumerate(parts)])
+    
+    prompt = f"""
+아래는 대량의 문서 요약을 배치별로 압축한 결과이다.
+이것을 최종 corpus로 통합하라.
+
+[배치별 압축 결과]
+{all_parts}
+
+통합 기준:
+1) 파트 간 중복 내용 제거 및 통합
+2) domain_prompt에 명시된 목적과 흐름에 맞게 재구성
+3) [솔루션], [핵심], (N회 언급) 표시는 유지
+4) 전체 {MAX_FINAL_CHARS}자 이내
+
+출력: 주제별로 정리된 최종 corpus (마크다운 없이)
+"""
+    
+    try:
+        resp = client.chat.completions.create(
+            model=GPT_MODEL,
+            messages=[
+                {"role": "system", "content": domain_prompt + "\n\n너는 CEL 교육 콘텐츠 기획을 위한 corpus를 설계하는 전문가이다."},
+                {"role": "user", "content": prompt}
+            ]
+        )
+        return resp.choices[0].message.content.strip()
+    except Exception as e:
+        log(f"[ERROR] 최종 통합 실패: {e}")
+        return "\n\n".join(parts)
+
+
+def main():
+    log("=" * 60)
+    log("corpus 생성 시작 (AI 압축 버전)")
+    log("=" * 60)
+    
+    # 도메인 프롬프트 로드
+    domain_prompt = load_domain_prompt()
+    log(f"도메인 프롬프트 로드 완료 ({len(domain_prompt)}자)")
+    
+    # 모든 요약 수집
+    summaries = load_all_summaries()
+    if not summaries:
+        log("summary가 없습니다. corpus를 생성할 수 없습니다.")
+        sys.exit(1)
+    
+    log(f"원본 요약 수집 완료: {len(summaries)}개")
+    
+    # 원본 저장 (백업)
+    raw_corpus = "\n".join(summaries)
+    raw_path = CONTEXT_DIR / "corpus_raw.txt"
+    raw_path.write_text(raw_corpus, encoding="utf-8")
+    log(f"원본 corpus 백업: {raw_path} ({len(raw_corpus)}자)")
+    
+    # 배치별 압축
+    total_batches = (len(summaries) + BATCH_SIZE - 1) // BATCH_SIZE
+    log(f"\n배치 압축 시작 ({BATCH_SIZE}개씩, 총 {total_batches}배치)")
+    
+    compressed_parts = []
+    for i in range(0, len(summaries), BATCH_SIZE):
+        batch = summaries[i:i+BATCH_SIZE]
+        batch_num = (i // BATCH_SIZE) + 1
+        
+        compressed = compress_batch(domain_prompt, batch, batch_num, total_batches)
+        compressed_parts.append(compressed)
+    
+    # 최종 통합
+    log(f"\n최종 통합 시작 ({len(compressed_parts)}개 파트)")
+    final_corpus = merge_compressed_parts(domain_prompt, compressed_parts)
+    
+    # 저장
+    out_path = CONTEXT_DIR / "corpus.txt"
+    out_path.write_text(final_corpus, encoding="utf-8")
+    
+    # 통계
+    log("\n" + "=" * 60)
+    log("corpus 생성 완료!")
+    log("=" * 60)
+    log(f"원본 요약: {len(summaries)}개 ({len(raw_corpus)}자)")
+    log(f"압축 corpus: {len(final_corpus)}자")
+    log(f"압축률: {100 - (len(final_corpus) / len(raw_corpus) * 100):.1f}%")
+    log(f"\n저장 위치:")
+    log(f"  - 원본: {raw_path}")
+    log(f"  - 압축: {out_path}")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/converters/pipeline/step7_index.py b/converters/pipeline/step7_index.py
new file mode 100644
index 0000000..3180719
--- /dev/null
+++ b/converters/pipeline/step7_index.py
@@ -0,0 +1,504 @@
+# -*- coding: utf-8 -*-
+"""
+make_outline.py
+
+기능:
+- output_context/context/domain_prompt.txt
+- output_context/context/corpus.txt
+을 기반으로 목차를 생성하고,
+
+1) outline_issue_report.txt 저장
+2) outline_issue_report.html 저장 (테스트.html 레이아웃 기반 표 형태)
+"""
+
+import os
+import sys
+import re
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Tuple
+
+from openai import OpenAI
+from api_config import API_KEYS
+
+# ===== 경로 설정 =====
+DATA_ROOT   = Path(r"D:\for python\survey_test\process")
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+CONTEXT_DIR = OUTPUT_ROOT / "context"
+LOG_DIR     = OUTPUT_ROOT / "logs"
+
+for d in [CONTEXT_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# ===== OpenAI 설정 (구조 유지) =====
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+GPT_MODEL      = "gpt-5-2025-08-07"
+
+client = OpenAI(api_key=OPENAI_API_KEY)
+
+# ===== 목차 파싱용 정규식 보완 (5분할 대응) =====
+RE_KEYWORDS = re.compile(r"(#\S+)")
+RE_L1 = re.compile(r"^\s*(\d+)\.\s+(.+?)\s*$")
+RE_L2 = re.compile(r"^\s*(\d+\.\d+)\s+(.+?)\s*$")
+RE_L3 = re.compile(r"^\s*(\d+\.\d+\.\d+)\s+(.+?)\s*$")
+
+def log(msg: str):
+    print(msg, flush=True)
+    with (LOG_DIR / "make_outline_log.txt").open("a", encoding="utf-8") as f:
+        f.write(msg + "\n")
+
+def load_domain_prompt() -> str:
+    p = CONTEXT_DIR / "domain_prompt.txt"
+    if not p.exists():
+        log("domain_prompt.txt가 없습니다. 먼저 domain_prompt.py를 실행해야 합니다.")
+        sys.exit(1)
+    return p.read_text(encoding="utf-8", errors="ignore").strip()
+
+def load_corpus() -> str:
+    p = CONTEXT_DIR / "corpus.txt"
+    if not p.exists():
+        log("corpus.txt가 없습니다. 먼저 make_corpus.py를 실행해야 합니다.")
+        sys.exit(1)
+    return p.read_text(encoding="utf-8", errors="ignore").strip()
+
+
+# 기존 RE_L1, RE_L2는 유지하고 아래 두 개를 추가/교체합니다.
+RE_L3_HEAD = re.compile(r"^\s*(\d+\.\d+\.\d+)\s+(.+)$") 
+RE_L3_TOPIC = re.compile(r"^\s*[\-\*]\s+(.+?)\s*\|\s*(.+?)\s*\|\s*(\[.+?\])\s*\|\s*(.+)$") 
+
+def generate_outline(domain_prompt: str, corpus: str) -> str:
+    sys_msg = {
+        "role": "system",
+        "content": (
+            domain_prompt + "\n\n"
+            "너는 건설/측량 DX 기술 보고서의 구조를 설계하는 시니어 기술사이다. "
+            "주어진 corpus를 분석하여, 실무자가 즉시 활용 가능한 고밀도 지침서 목차를 설계하라."
+        ),
+    }
+
+    user_msg = {
+        "role": "user",
+        "content": f"""
+아래 [corpus]를 바탕으로 보고서 제목과 전략적 목차를 설계하라.
+
+[corpus]
+{corpus}
+
+요구 사항:
+1) 첫 줄에 보고서 제목 1개를 작성하라.
+2) 그 아래 목차를 번호 기반 계측 구조로 작성하라.
+   - 대목차: 1. / 2. / 3. ...
+   - 중목차: 1.1 / 1.2 / ...
+   - 소목차: 1.1.1 / 1.1.2 / ...
+3) **수량 제약 (중요)**:
+   - 대목차(1.)는 5~8개로 구성하라.
+   - **중목차(1.1) 하나당 소목차(1.1.1, 1.1.2...)는 반드시 2개에서 4개 사이로 구성하라.** (절대 1개만 만들지 말 것)
+   - 소목차(1.1.1) 하나당 '핵심주제(꼭지)'는 반드시 2개에서 3개 사이로 구성하라.
+
+[소목차 작성 형식]
+1.1.1 소목차 제목 
+ - 핵심주제 1 | #키워드 | [유형] | 집필가이드(데이터/표 구성 지침) 
+ - 핵심주제 2 | #키워드 | [유형] | 집필가이드(데이터/표 구성 지침)
+
+5) [유형] 분류 가이드:
+   - [비교형]: 기존 vs DX 방식의 비교표(Table)가 필수적인 경우
+   - [기술형]: RMSE, GSD, 중복도 등 정밀 수치와 사양 설명이 핵심인 경우
+   - [절차형]: 단계별 워크플로 및 체크리스트가 중심인 경우
+   - [인사이트형]: 한계점 분석 및 전문가 제언(☞)이 중심인 경우
+6) 집필가이드는 50자 내외로, "어떤 데이터를 검색해서 어떤 표를 그려라"와 같이 구체적으로 지시하라.
+7) 대목차는 최대 8개 이내로 구성하라.
+"""
+    }
+    resp = client.chat.completions.create(
+        model=GPT_MODEL,
+        messages=[sys_msg, user_msg],
+    )
+    return (resp.choices[0].message.content or "").strip()
+
+
+
+def parse_outline(outline_text: str) -> Tuple[str, List[Dict[str, Any]]]:
+    lines = [ln.rstrip() for ln in outline_text.splitlines() if ln.strip()]
+    if not lines: return "", []
+
+    title = lines[0].strip() # 첫 줄은 보고서 제목
+    rows = []
+    current_section = None # 현재 처리 중인 소목차(1.1.1)를 추적
+
+    for ln in lines[1:]:
+        raw = ln.strip()
+        
+        # 1. 소목차 헤더(1.1.1 제목) 발견 시
+        m3_head = RE_L3_HEAD.match(raw)
+        if m3_head:
+            num, s_title = m3_head.groups()
+            current_section = {
+                "depth": 3, 
+                "num": num, 
+                "title": s_title,
+                "sub_topics": [] # 여기에 아래 줄의 꼭지들을 담을 예정
+            }
+            rows.append(current_section)
+            continue
+            
+        # 2. 세부 꼭지(- 주제 | #키워드 | [유형] | 가이드) 발견 시
+        m_topic = RE_L3_TOPIC.match(raw)
+        if m_topic and current_section:
+            t_title, kws_raw, t_type, guide = m_topic.groups()
+            # 키워드 추출 (#키워드 형태)
+            kws = [k.lstrip("#").strip() for k in RE_KEYWORDS.findall(kws_raw)]
+            
+            # 현재 소목차(current_section)의 리스트에 추가
+            current_section["sub_topics"].append({
+                "topic_title": t_title,
+                "keywords": kws,
+                "type": t_type,
+                "guide": guide
+            })
+            continue
+
+        # 3. 대목차(1.) 처리
+        m1 = RE_L1.match(raw)
+        if m1:
+            rows.append({"depth": 1, "num": m1.group(1).strip(), "title": m1.group(2).strip()})
+            current_section = None # 소목차 구간 종료
+            continue
+
+        # 4. 중목차(1.1) 처리
+        m2 = RE_L2.match(raw)
+        if m2:
+            rows.append({"depth": 2, "num": m2.group(1).strip(), "title": m2.group(2).strip()})
+            current_section = None # 소목차 구간 종료
+            continue
+
+    return title, rows
+
+def html_escape(s: str) -> str:
+    s = s or ""
+    return (s.replace("&", "&amp;")
+             .replace("<", "&lt;")
+             .replace(">", "&gt;")
+             .replace('"', "&quot;")
+             .replace("'", "&#39;"))
+
+def chunk_rows(rows: List[Dict[str, Any]], max_rows_per_page: int = 26) -> List[List[Dict[str, Any]]]:
+    """
+    A4 1장에 표가 길어지면 넘치므로, 단순 행 개수로 페이지 분할한다.
+    """
+    out = []
+    cur = []
+    for r in rows:
+        cur.append(r)
+        if len(cur) >= max_rows_per_page:
+            out.append(cur)
+            cur = []
+    if cur:
+        out.append(cur)
+    return out
+
+def build_outline_table_html(rows: List[Dict[str, Any]]) -> str:
+    """
+    테스트.html의 table 스타일을 그대로 쓰는 전제의 표 HTML
+    """
+    head = """
+    <table>
+        <thead>
+            <tr>
+                <th>구분</th>
+                <th>번호</th>
+                <th>제목</th>
+                <th>키워드</th>
+            </tr>
+        </thead>
+        <tbody>
+    """
+
+    body_parts = []
+    for r in rows:
+        depth = r["depth"]
+        num = html_escape(r["num"])
+        title = html_escape(r["title"])
+        kw = " ".join([f"#{k}" for k in r.get("keywords", []) if k])
+        kw = html_escape(kw)
+
+        if depth == 1:
+            body_parts.append(
+                f"""
+                <tr>
+                    <td class="group-cell">대목차</td>
+                    <td>{num}</td>
+                    <td>{title}</td>
+                    <td></td>
+                </tr>
+                """
+            )
+        elif depth == 2:
+            body_parts.append(
+                f"""
+                <tr>
+                    <td class="group-cell">중목차</td>
+                    <td>{num}</td>
+                    <td>{title}</td>
+                    <td></td>
+                </tr>
+                """
+            )
+        else:
+            body_parts.append(
+                f"""
+                <tr>
+                    <td class="group-cell">소목차</td>
+                    <td>{num}</td>
+                    <td>{title}</td>
+                    <td>{kw}</td>
+                </tr>
+                """
+            )
+
+    tail = """
+        </tbody>
+    </table>
+    """
+    return head + "\n".join(body_parts) + tail
+
+def build_outline_html(report_title: str, rows: List[Dict[str, Any]]) -> str:
+    """
+    테스트.html 레이아웃 구조를 그대로 따라 A4 시트 형태로 HTML 생성
+    """
+    css = r"""
+        @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700;900&display=swap');
+
+        :root {
+            --primary-blue: #3057B9;
+            --gray-light: #F2F2F2;
+            --gray-medium: #E6E6E6;
+            --gray-dark: #666666;
+            --border-light: #DDDDDD;
+            --text-black: #000000;
+        }
+
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+            -webkit-print-color-adjust: exact;
+        }
+
+        body {
+            font-family: 'Noto Sans KR', sans-serif;
+            background-color: #f0f0f0;
+            color: var(--text-black);
+            line-height: 1.35;
+            display: flex;
+            justify-content: center;
+            padding: 10px 0;
+        }
+
+        .sheet {
+            background-color: white;
+            width: 210mm;
+            height: 297mm;
+            padding: 20mm 20mm;
+            box-shadow: 0 0 10px rgba(0,0,0,0.1);
+            position: relative;
+            display: flex;
+            flex-direction: column;
+            overflow: hidden;
+            margin-bottom: 12px;
+        }
+
+        @media print {
+            body { background: none; padding: 0; }
+            .sheet { box-shadow: none; margin: 0; border: none; page-break-after: always; }
+        }
+
+        .page-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: flex-start;
+            margin-bottom: 15px;
+            font-size: 8.5pt;
+            color: var(--gray-dark);
+        }
+
+        .header-title {
+            font-size: 24pt;
+            font-weight: 900;
+            margin-bottom: 8px;
+            letter-spacing: -1.5px;
+            color: #111;
+        }
+
+        .title-divider {
+            height: 4px;
+            background-color: var(--primary-blue);
+            width: 100%;
+            margin-bottom: 20px;
+        }
+
+        .lead-box {
+            background-color: var(--gray-light);
+            padding: 18px 20px;
+            margin-bottom: 5px;
+            border-radius: 2px;
+            text-align: center;
+        }
+
+        .lead-box div {
+            font-size: 13pt;
+            font-weight: 700;
+            color: var(--primary-blue);
+            letter-spacing: -0.5px;
+        }
+
+        .lead-notes {
+            font-size: 8.5pt;
+            color: #777;
+            margin-bottom: 20px;
+            padding-left: 5px;
+            text-align: right;
+        }
+
+        .body-content { flex: 1; }
+
+        .section { margin-bottom: 22px; }
+
+        .section-title {
+            font-size: 13pt;
+            font-weight: 700;
+            display: flex;
+            align-items: center;
+            margin-bottom: 10px;
+            color: #111;
+        }
+
+        .section-title::before {
+            content: "";
+            display: inline-block;
+            width: 10px;
+            height: 10px;
+            background-color: #999;
+            margin-right: 10px;
+        }
+
+        table {
+            width: 100%;
+            border-collapse: collapse;
+            margin: 8px 0;
+            font-size: 9.5pt;
+            border-top: 1.5px solid #333;
+        }
+
+        th {
+            background-color: var(--gray-medium);
+            font-weight: 700;
+            padding: 10px;
+            border: 1px solid var(--border-light);
+        }
+
+        td {
+            padding: 10px;
+            border: 1px solid var(--border-light);
+            vertical-align: middle;
+        }
+
+        .group-cell {
+            background-color: #F9F9F9;
+            font-weight: 700;
+            width: 16%;
+            text-align: center;
+            color: var(--primary-blue);
+            white-space: nowrap;
+        }
+
+        .page-footer {
+            margin-top: 15px;
+            padding-top: 10px;
+            display: flex;
+            justify-content: space-between;
+            font-size: 8.5pt;
+            color: var(--gray-dark);
+            border-top: 1px solid #EEE;
+        }
+
+        .footer-page { flex: 1; text-align: center; }
+    """
+
+    pages = chunk_rows(rows, max_rows_per_page=26)
+
+    html_pages = []
+    total_pages = len(pages) if pages else 1
+    for i, page_rows in enumerate(pages, start=1):
+        table_html = build_outline_table_html(page_rows)
+
+        html_pages.append(f"""
+        <div class="sheet">
+            <header class="page-header">
+                <div class="header-left">
+                    보고서: 목차 자동 생성 결과
+                </div>
+                <div class="header-right">
+                    작성일자: {datetime.now().strftime("%Y. %m. %d.")}
+                </div>
+            </header>
+
+            <div class="title-block">
+                <h1 class="header-title">{html_escape(report_title)}</h1>
+                <div class="title-divider"></div>
+            </div>
+
+            <div class="body-content">
+                <div class="lead-box">
+                    <div>확정 목차 표 형태 정리본</div>
+                </div>
+                <div class="lead-notes">목차는 outline_issue_report.txt를 기반으로 표로 재구성됨</div>
+
+                <div class="section">
+                    <div class="section-title">목차</div>
+                    {table_html}
+                </div>
+            </div>
+
+            <footer class="page-footer">
+                <div class="footer-slogan">Word Style v2 Outline</div>
+                <div class="footer-page">- {i} / {total_pages} -</div>
+                <div class="footer-info">outline_issue_report.html</div>
+            </footer>
+        </div>
+        """)
+
+    return f"""<!DOCTYPE html>
+<html lang="ko">
+<head>
+  <meta charset="UTF-8">
+  <title>{html_escape(report_title)} - Outline</title>
+  <style>{css}</style>
+</head>
+<body>
+  {''.join(html_pages)}
+</body>
+</html>
+"""
+
+def main():
+    log("=== 목차 생성 시작 ===")
+    domain_prompt = load_domain_prompt()
+    corpus        = load_corpus()
+
+    outline = generate_outline(domain_prompt, corpus)
+
+    # TXT 저장 유지
+    out_txt = CONTEXT_DIR / "outline_issue_report.txt"
+    out_txt.write_text(outline, encoding="utf-8")
+    log(f"목차 TXT 저장 완료: {out_txt}")
+
+    # HTML 추가 저장
+    title, rows = parse_outline(outline)
+    out_html = CONTEXT_DIR / "outline_issue_report.html"
+    out_html.write_text(build_outline_html(title, rows), encoding="utf-8")
+    log(f"목차 HTML 저장 완료: {out_html}")
+
+    log("=== 목차 생성 종료 ===")
+
+if __name__ == "__main__":
+    main()
diff --git a/converters/pipeline/step8_content.py b/converters/pipeline/step8_content.py
new file mode 100644
index 0000000..5f66190
--- /dev/null
+++ b/converters/pipeline/step8_content.py
@@ -0,0 +1,1021 @@
+# -*- coding: utf-8 -*-
+"""
+step8_generate_report_gemini.py
+
+기능
+- 확정 목차(outline_issue_report.txt)를 읽어 섹션(소목차) 목록을 만든다.
+- 섹션별로 RAG에서 근거 청크를 검색한다(FAISS 있으면 FAISS, 없으면 키워드 기반).
+- 섹션별 본문 초안을 생성한다(내부 근거 우선, 원문 보존 원칙).
+- 섹션별 이미지 후보를 매핑하고, md에는 이미지 자리표시자를 삽입한다.
+- 산출물 2개를 만든다.
+  1) report_draft.md
+  2) report_sections.json
+
+변경사항 (OpenAI → Gemini)
+- google.genai 라이브러리 사용
+- 자율성 통제: temperature=0.3, thinking_budget=0
+- 원문 보존 원칙 강화
+- 소목차별 중복 방지 로직 추가
+- ★ 이미지 assets 복사 로직 추가
+"""
+
+import os
+import re
+import json
+import shutil  # ★ 추가: 이미지 복사용
+from dataclasses import dataclass, field
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Optional, Tuple
+
+import numpy as np
+
+try:
+    import faiss  # type: ignore
+except Exception:
+    faiss = None
+
+# ===== 하이브리드 API 설정 =====
+# 검색/임베딩: OpenAI (기존 FAISS 인덱스 호환)
+# 본문 작성: Gemini (글쓰기 품질)
+
+from google import genai
+from google.genai import types
+from openai import OpenAI
+from api_config import API_KEYS
+
+# OpenAI (임베딩/검색용)
+OPENAI_API_KEY = API_KEYS.get('GPT_API_KEY', '')
+EMBED_MODEL = "text-embedding-3-small"
+openai_client = OpenAI(api_key=OPENAI_API_KEY)
+
+# Gemini (본문 작성용)
+GEMINI_API_KEY = API_KEYS.get('GEMINI_API_KEY', '')
+GEMINI_MODEL = "gemini-3-pro-preview"
+gemini_client = genai.Client(api_key=GEMINI_API_KEY)
+
+# ===== 경로 설정 =====
+DATA_ROOT   = Path(r"D:\for python\survey_test\process")
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+CONTEXT_DIR = OUTPUT_ROOT / "context"
+LOG_DIR     = OUTPUT_ROOT / "logs"
+RAG_DIR     = OUTPUT_ROOT / "rag"
+GEN_DIR     = OUTPUT_ROOT / "generated"
+
+# ★ 추가: 이미지 assets 경로
+ASSETS_DIR  = GEN_DIR / "assets"
+IMAGES_ROOT = DATA_ROOT / "images"  # 추출된 이미지 원본 위치
+
+for d in [CONTEXT_DIR, LOG_DIR, RAG_DIR, GEN_DIR, ASSETS_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+# 파일명
+OUTLINE_PATH = CONTEXT_DIR / "outline_issue_report.txt"
+DOMAIN_PROMPT_PATH = CONTEXT_DIR / "domain_prompt.txt"
+
+# 선택 파일(있으면 사용)
+FAISS_INDEX_PATH = RAG_DIR / "faiss.index"
+FAISS_META_PATH = RAG_DIR / "meta.json"
+FAISS_VECTORS_PATH = RAG_DIR / "vectors.npy"
+
+# 이미지 메타(있으면 캡션 보강)
+IMAGE_META_PATH = DATA_ROOT / "image_metadata.json"
+
+# 출력 파일
+REPORT_MD_PATH = GEN_DIR / "report_draft.md"
+REPORT_JSON_PATH = GEN_DIR / "report_sections.json"
+
+# 설정값
+TOP_K_EVIDENCE = int(os.getenv("TOP_K_EVIDENCE", "10"))
+MAX_IMAGES_PER_SECTION = int(os.getenv("MAX_IMAGES_PER_SECTION", "3"))
+MAX_EVIDENCE_SNIPPET_CHARS = int(os.getenv("MAX_EVIDENCE_SNIPPET_CHARS", "900"))
+
+# 패턴
+RE_TITLE_LINE = re.compile(r"^\s*(.+?)\s*$")
+RE_L1 = re.compile(r"^\s*(\d+)\.\s+(.+?)\s*$")
+RE_L2 = re.compile(r"^\s*(\d+\.\d+)\s+(.+?)\s*$")
+RE_L3 = re.compile(r"^\s*(\d+\.\d+\.\d+)\s+(.+?)\s*$")
+RE_KEYWORDS = re.compile(r"(#\S+)")
+
+RE_IMAGE_PATH_IN_MD = re.compile(r"!\[([^\]]*)\]\(([^)]+)\)")
+
+
+def log(msg: str):
+    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
+    print(line, flush=True)
+    with (LOG_DIR / "step8_generate_report_log.txt").open("a", encoding="utf-8") as f:
+        f.write(line + "\n")
+
+
+@dataclass
+class SubTopic:
+    title: str
+    keywords: List[str]
+    type: str
+    guide: str
+
+
+@dataclass
+class OutlineItem:
+    number: str
+    title: str
+    depth: int
+    sub_topics: List[SubTopic] = field(default_factory=list)
+
+
+def read_text(p: Path) -> str:
+    return p.read_text(encoding="utf-8", errors="ignore").strip()
+
+
+def load_domain_prompt() -> str:
+    if not DOMAIN_PROMPT_PATH.exists():
+        raise RuntimeError(f"domain_prompt.txt 없음: {DOMAIN_PROMPT_PATH}")
+    return read_text(DOMAIN_PROMPT_PATH)
+
+
+def load_outline() -> Tuple[str, List[OutlineItem]]:
+    if not OUTLINE_PATH.exists():
+        raise RuntimeError("목차 파일이 없습니다.")
+    raw = OUTLINE_PATH.read_text(encoding="utf-8", errors="ignore").splitlines()
+    if not raw:
+        return "", []
+
+    report_title = raw[0].strip()
+    items: List[OutlineItem] = []
+    current_l3 = None
+
+    # 꼭지 파싱용 정규식
+    re_l3_head = re.compile(r"^\s*(\d+\.\d+\.\d+)\s+(.+)$")
+    re_l3_topic = re.compile(r"^\s*[\-\*]\s+(.+?)\s*\|\s*(.+?)\s*\|\s*(\[.+?\])\s*\|\s*(.+)$")
+
+    for ln in raw[1:]:
+        line = ln.strip()
+        if not line:
+            continue
+
+        m3h = re_l3_head.match(line)
+        if m3h:
+            current_l3 = OutlineItem(number=m3h.group(1), title=m3h.group(2), depth=3)
+            items.append(current_l3)
+            continue
+
+        m3t = re_l3_topic.match(line)
+        if m3t and current_l3:
+            kws = [k.lstrip("#").strip() for k in RE_KEYWORDS.findall(m3t.group(2))]
+            current_l3.sub_topics.append(SubTopic(
+                title=m3t.group(1), keywords=kws, type=m3t.group(3), guide=m3t.group(4)
+            ))
+            continue
+
+        m2 = RE_L2.match(line)
+        if m2:
+            items.append(OutlineItem(number=m2.group(1), title=m2.group(2), depth=2))
+            current_l3 = None
+            continue
+        m1 = RE_L1.match(line)
+        if m1:
+            items.append(OutlineItem(number=m1.group(1), title=m1.group(2), depth=1))
+            current_l3 = None
+            continue
+
+    return report_title, items
+
+
+def load_image_metadata() -> Dict[str, Dict[str, Any]]:
+    """image_metadata.json이 있으면 image_file 기준으로 맵을 만든다."""
+    if not IMAGE_META_PATH.exists():
+        return {}
+    try:
+        data = json.loads(IMAGE_META_PATH.read_text(encoding="utf-8", errors="ignore"))
+        out: Dict[str, Dict[str, Any]] = {}
+        for it in data:
+            fn = (it.get("image_file") or "").strip()
+            if fn:
+                out[fn] = it
+        return out
+    except Exception as e:
+        log(f"[WARN] image_metadata.json 로드 실패: {e}")
+        return {}
+
+
+def iter_rag_items() -> List[Dict[str, Any]]:
+    """rag 폴더의 *_chunks.json 모두 로드"""
+    items: List[Dict[str, Any]] = []
+    files = sorted(RAG_DIR.glob("*_chunks.json"))
+    if not files:
+        raise RuntimeError(f"rag 폴더에 *_chunks.json 없음: {RAG_DIR}")
+
+    for f in files:
+        try:
+            data = json.loads(f.read_text(encoding="utf-8", errors="ignore"))
+            if isinstance(data, list):
+                for it in data:
+                    if isinstance(it, dict):
+                        items.append(it)
+        except Exception as e:
+            log(f"[WARN] RAG 파일 로드 실패: {f.name} {e}")
+
+    return items
+
+
+def normalize_ws(s: str) -> str:
+    return " ".join((s or "").split())
+
+
+def make_evidence_snippet(text: str, max_chars: int) -> str:
+    t = normalize_ws(text)
+    if len(t) <= max_chars:
+        return t
+    return t[:max_chars] + "..."
+
+
+def get_item_key(it: Dict[str, Any]) -> Tuple[str, int]:
+    src = (it.get("source") or "").strip()
+    ch = int(it.get("chunk") or 0)
+    return (src, ch)
+
+
+def build_item_index(items: List[Dict[str, Any]]) -> Dict[Tuple[str, int], Dict[str, Any]]:
+    m: Dict[Tuple[str, int], Dict[str, Any]] = {}
+    for it in items:
+        m[get_item_key(it)] = it
+    return m
+
+
+def try_load_faiss():
+    """faiss.index, meta.json, vectors.npy가 모두 있고 faiss 모듈이 있으면 사용"""
+    if faiss is None:
+        log("[INFO] faiss 모듈 없음 - 키워드 검색 사용")
+        return None
+    if not (FAISS_INDEX_PATH.exists() and FAISS_META_PATH.exists() and FAISS_VECTORS_PATH.exists()):
+        log("[INFO] FAISS 파일 없음 - 키워드 검색 사용")
+        return None
+    try:
+        index = faiss.read_index(str(FAISS_INDEX_PATH))
+        metas = json.loads(FAISS_META_PATH.read_text(encoding="utf-8", errors="ignore"))
+        vecs = np.load(str(FAISS_VECTORS_PATH))
+        log(f"[INFO] FAISS 로드 성공 - 인덱스 차원: {index.d}, 메타 수: {len(metas)}")
+        return index, metas, vecs
+    except Exception as e:
+        log(f"[WARN] FAISS 로드 실패: {e}")
+        return None
+
+
+def embed_query_openai(q: str) -> np.ndarray:
+    """OpenAI 임베딩 (기존 FAISS 인덱스와 호환)"""
+    try:
+        resp = openai_client.embeddings.create(model=EMBED_MODEL, input=[q])
+        v = np.array(resp.data[0].embedding, dtype="float32")
+        n = np.linalg.norm(v) + 1e-12
+        return v / n
+    except Exception as e:
+        log(f"[WARN] OpenAI 임베딩 실패: {e}")
+        return np.zeros(1536, dtype="float32")  # OpenAI 차원
+
+
+def retrieve_with_faiss(
+    index,
+    metas: List[Dict[str, Any]],
+    item_map: Dict[Tuple[str, int], Dict[str, Any]],
+    query: str,
+    top_k: int
+) -> List[Dict[str, Any]]:
+    qv = embed_query_openai(query).reshape(1, -1).astype("float32")
+    D, I = index.search(qv, top_k)
+    out: List[Dict[str, Any]] = []
+    for idx in I[0]:
+        if idx < 0 or idx >= len(metas):
+            continue
+        meta = metas[idx]
+        src = (meta.get("source") or "").strip()
+        ch = int(meta.get("chunk") or 0)
+        it = item_map.get((src, ch))
+        if it:
+            out.append(it)
+    return out
+
+
+def tokenize_simple(s: str) -> List[str]:
+    s = normalize_ws(s).lower()
+    return [t for t in re.split(r"\s+", s) if t]
+
+
+def retrieve_with_keywords(
+    all_items: List[Dict[str, Any]],
+    query: str,
+    keywords: List[str],
+    top_k: int
+) -> List[Dict[str, Any]]:
+    q_tokens = set(tokenize_simple(query))
+    k_tokens = set([kw.lower() for kw in keywords if kw])
+
+    scored: List[Tuple[float, Dict[str, Any]]] = []
+    for it in all_items:
+        txt = " ".join([
+            str(it.get("title") or ""),
+            str(it.get("keywords") or ""),
+            str(it.get("summary") or ""),
+            str(it.get("text") or ""),
+            str(it.get("folder_context") or ""),
+            str(it.get("source_path") or ""),
+        ])
+        t = normalize_ws(txt).lower()
+
+        score = 0.0
+        for tok in q_tokens:
+            if tok and tok in t:
+                score += 1.0
+        for tok in k_tokens:
+            if tok and tok in t:
+                score += 2.0
+
+        if it.get("has_images"):
+            score += 0.5
+
+        if score > 0:
+            scored.append((score, it))
+
+    scored.sort(key=lambda x: x[0], reverse=True)
+    return [it for _, it in scored[:top_k]]
+
+
+def select_images_for_section(
+    evidences: List[Dict[str, Any]],
+    image_meta_by_file: Dict[str, Dict[str, Any]],
+    max_images: int
+) -> List[Dict[str, Any]]:
+    """근거 청크에서 images를 모아 섹션 이미지 후보를 만들고 상한으로 자른다."""
+    seen = set()
+    out: List[Dict[str, Any]] = []
+
+    def infer_image_file(p: str) -> str:
+        p = p.replace("\\", "/")
+        return p.split("/")[-1]
+
+    for ev in evidences:
+        imgs = ev.get("images") or []
+        if not isinstance(imgs, list):
+            continue
+        for img in imgs:
+            if not isinstance(img, dict):
+                continue
+            rel_path = (img.get("path") or "").strip()
+            if not rel_path:
+                continue
+            key = rel_path.replace("\\", "/")
+            if key in seen:
+                continue
+            seen.add(key)
+
+            img_file = infer_image_file(key)
+            meta = image_meta_by_file.get(img_file, {})
+
+            caption = ""
+            if meta:
+                caption = (meta.get("caption") or "").strip()
+            if not caption:
+                caption = (img.get("alt") or "").strip() or img_file
+
+            out.append({
+                "image_id": "",
+                "rel_path": key,
+                "image_file": img_file,
+                "caption": caption,
+                "source_path": ev.get("source_path") or ev.get("source") or "",
+                "page": meta.get("page", None) if meta else None,
+                "type": meta.get("type", None) if meta else None,
+            })
+            if len(out) >= max_images:
+                return out
+
+    return out
+
+
+def make_image_placeholders(section_number: str, images: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
+    """섹션번호 기반으로 이미지아이디를 만들고 placeholder를 만든다."""
+    sec_key = section_number.replace(".", "_")
+    out = []
+    for i, img in enumerate(images, start=1):
+        img_id = f"{sec_key}_img{i:02d}"
+        out.append({**img, "image_id": img_id, "placeholder": f"{{{{IMG:{img_id}}}}}"})
+    return out
+
+
+# ★ 추가: 이미지 파일을 assets 폴더로 복사하는 함수
+def copy_images_to_assets(image_info_list: List[Dict[str, Any]]) -> None:
+    """선택된 이미지들을 generated/assets/로 복사"""
+    for img in image_info_list:
+        # 원본 경로 찾기 (여러 경로 시도)
+        rel_path = img.get('rel_path', '')
+        src_path = None
+        
+        # 1차: DATA_ROOT 기준 상대경로
+        candidate1 = DATA_ROOT / rel_path
+        if candidate1.exists():
+            src_path = candidate1
+        
+        # 2차: IMAGES_ROOT에서 파일명으로 검색
+        if src_path is None:
+            candidate2 = IMAGES_ROOT / img.get('image_file', '')
+            if candidate2.exists():
+                src_path = candidate2
+        
+        # 3차: DATA_ROOT 전체에서 파일명 검색 (재귀)
+        if src_path is None:
+            img_file = img.get('image_file', '')
+            if img_file:
+                for found in DATA_ROOT.rglob(img_file):
+                    src_path = found
+                    break
+        
+        if src_path and src_path.exists():
+            # image_id 기반으로 새 파일명 생성 (확장자 유지)
+            ext = src_path.suffix or '.png'
+            dst_filename = f"{img['image_id']}{ext}"
+            dst_path = ASSETS_DIR / dst_filename
+            
+            try:
+                shutil.copy2(src_path, dst_path)
+                img['asset_path'] = f"assets/{dst_filename}"
+                log(f"    [IMG] {img['image_id']} → {dst_filename}")
+            except Exception as e:
+                log(f"    [WARN] 이미지 복사 실패: {img['image_id']} - {e}")
+                img['asset_path'] = None
+        else:
+            log(f"    [WARN] 이미지 없음: {rel_path} ({img.get('image_file', '')})")
+            img['asset_path'] = None
+
+
+# ===== Gemini 프롬프트 구성 (자율성 통제 강화) =====
+
+def build_system_instruction(domain_prompt: str) -> str:
+    """
+    Gemini 시스템 지시문 (v4 - 최종)
+    """
+    return f"""{domain_prompt}
+
+═══════════════════════════════════════════════════════════════
+                    ★★★ 절대 준수 규칙 ★★★
+═══════════════════════════════════════════════════════════════
+
+[금지 사항]
+1. 원문의 수치, 용어, 표현을 임의로 변경 금지
+2. 제공되지 않은 정보 추론/창작 금지
+3. 추측성 표현 금지 ("~로 보인다", "~일 것이다")
+4. 중복 내용 작성 금지
+5. 마크다운 헤딩(#, ##, ###, ####) 사용 금지
+6. ★ "꼭지", "항목 1", "Topic" 등 내부 분류 용어 출력 금지
+7. ★ "1. 2. 3." 형태 번호 사용 금지 (반드시 "1) 2) 3)" 사용)
+
+[필수 사항]
+1. 원문 최대 보존
+2. 수치는 원본 그대로
+3. 전문 용어 변경 없이 사용
+4. 보고서 형식으로 전문적 작성
+
+═══════════════════════════════════════════════════════════════
+           ★★★ 번호 체계 및 서식 규칙 (필수) ★★★
+═══════════════════════════════════════════════════════════════
+
+【레벨별 번호와 서식】
+
+■ 1단계: 1), 2), 3)
+■ 2단계: (1), (2), (3)  
+■ 3단계: ①, ②, ③ 또는 -, *
+
+【핵심 서식 규칙】
+
+★ 모든 번호의 제목은 반드시 **볼드** 처리
+★ 제목과 본문 사이에 반드시 빈 줄(엔터) 삽입
+★ 본문과 다음 번호 사이에 반드시 빈 줄(엔터) 삽입
+
+【올바른 예시】
+```
+1) **VRS GNSS 측량의 개요**
+
+인공위성과 위성기준점을 이용한 위치 측량 방식이다. 실시간 보정을 통해 높은 정확도를 확보할 수 있다.
+
+2) **UAV 사진측량의 특징**
+
+무인항공기를 활용한 광역 측량 방식이다. 목적에 따라 다음과 같이 구분된다.
+
+   (1) **맵핑측량**
+   
+   정사영상 제작에 특화된 촬영 방식이다.
+   
+   (2) **모델측량**
+   
+   3D 모델 생성에 특화된 촬영 방식이다.
+```
+
+【잘못된 예시 - 절대 금지】
+```
+꼭지 1 VRS GNSS 측량        ← "꼭지" 용어 금지!
+1. VRS GNSS 측량             ← "1." 형태 금지!
+1) VRS GNSS 측량 인공위성을... ← 제목+본문 한줄 금지!
+1) VRS GNSS 측량             ← 볼드 없음 금지!
+```
+
+═══════════════════════════════════════════════════════════════
+
+[작성 형식]
+- 섹션 제목 없이 바로 본문 시작
+- 주제별 구분: 1), 2), 3) + **볼드 제목** + 줄바꿈 + 본문
+- 하위 구분: (1), (2), (3) + **볼드 제목** + 줄바꿈 + 본문
+- [비교형]: 마크다운 표 포함
+- [기술형]: 기술 사양/수치 정확히 기재
+- [절차형]: 단계별 1), 2), 3) 사용
+
+[출력 제한]
+- 마크다운 헤딩 금지
+- "꼭지", "Topic", "항목" 등 분류 용어 출력 금지
+- 내부 메모용 표현 금지
+- 출처 표시 금지
+═══════════════════════════════════════════════════════════════
+"""
+
+
+def build_user_prompt(
+    report_title: str,
+    item,  # OutlineItem
+    evidences,
+    image_info_list,
+    previous_sections_summary: str = ""
+) -> str:
+    """
+    섹션별 사용자 프롬프트 (v4)
+    """
+    
+    # 근거 자료 정리
+    ev_text = ""
+    for i, ev in enumerate(evidences, 1):
+        src = ev.get('source_path') or ev.get('source', '내부자료')
+        text = ev.get('text', '')[:1500]
+        title = ev.get('title', '')
+        keywords = ev.get('keywords', '')
+        
+        ev_text += f"""
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+[데이터 {i}] 출처: {src}
+제목: {title}
+키워드: {keywords}
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+{text}
+"""
+
+    # ★ "꼭지" → "주제"로 변경, 번호 부여
+    topic_guides = ""
+    for idx, st in enumerate(item.sub_topics, 1):
+        topic_guides += f"""
+【작성할 내용 {idx}】 {st.title}
+  - 유형: {st.type}
+  - 핵심 키워드: {', '.join(['#'+k for k in st.keywords]) if st.keywords else '없음'}
+  - 참고 지침: {st.guide}
+  - ★ 출력 시 "{idx}) **{st.title}**" 형태로 시작할 것
+"""
+
+    # 이미지 안내
+    img_guide = ""
+    if image_info_list:
+        img_guide = "\n【삽입 가능 이미지】\n"
+        for img in image_info_list:
+            img_guide += f"  - {img['placeholder']}: {img['caption']}\n"
+        img_guide += "  → 문맥에 맞는 위치에 삽입\n"
+
+    # 중복 방지
+    dup_guide = ""
+    if previous_sections_summary:
+        dup_guide = f"""
+【중복 방지 - 이미 다룬 내용이므로 제외】
+{previous_sections_summary}
+"""
+
+    # ★ 서식 리마인더 강화
+    format_reminder = """
+═══════════════════════════════════════════════════════════════
+              ★★★ 출력 서식 필수 준수 ★★★
+═══════════════════════════════════════════════════════════════
+1) **제목은 반드시 볼드**
+
+본문은 제목 다음 줄에 작성
+
+2) **다음 제목도 볼드**
+
+본문...
+
+   (1) **하위 제목도 볼드**
+   
+   하위 본문...
+
+★ "꼭지", "항목", "Topic" 등 내부 용어 절대 출력 금지!
+★ 제목과 본문 사이 반드시 빈 줄!
+═══════════════════════════════════════════════════════════════
+"""
+
+    return f"""
+╔═══════════════════════════════════════════════════════════════╗
+║  보고서: {report_title}
+║  작성 섹션: {item.number} {item.title}
+╚═══════════════════════════════════════════════════════════════╝
+
+{dup_guide}
+
+【이 섹션에서 다룰 내용】
+{topic_guides}
+
+{img_guide}
+
+{format_reminder}
+
+【참고 데이터】
+{ev_text}
+
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+지시: '{item.number} {item.title}' 섹션 본문을 작성하라.
+
+★ 번호: 1), 2) → (1), (2) → -, *
+★ 제목: 반드시 **볼드**
+★ 줄바꿈: 제목↔본문 사이 빈 줄 필수
+★ 금지어: "꼭지", "항목", "Topic" 출력 금지
+━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
+"""
+
+
+def generate_section_text_gemini(
+    system_instruction: str,
+    user_prompt: str
+) -> str:
+    """
+    Gemini API를 사용한 섹션 본문 생성
+    - temperature=0.3으로 자율성 억제
+    """
+    try:
+        response = gemini_client.models.generate_content(
+            model=GEMINI_MODEL,
+            contents=user_prompt,
+            config=types.GenerateContentConfig(
+                system_instruction=system_instruction,
+                temperature=0.3,  # 낮은 temperature로 창의성 억제
+            )
+        )
+        return (response.text or "").strip()
+    except Exception as e:
+        log(f"[ERROR] Gemini API 호출 실패: {e}")
+        return f"[생성 실패: {e}]"
+
+import re
+
+def extract_section_summary(text: str, max_chars: int = 200) -> str:
+    """섹션 본문에서 핵심 키워드/주제 추출 (중복 방지용)"""
+    # 첫 200자 또는 첫 문단
+    lines = text.split('\n')
+    summary_parts = []
+    char_count = 0
+    
+    for line in lines:
+        line = line.strip()
+        if not line or line.startswith('#'):
+            continue
+        summary_parts.append(line)
+        char_count += len(line)
+        if char_count >= max_chars:
+            break
+    
+    return ' '.join(summary_parts)[:max_chars]
+
+
+def fix_numbering_format(text: str) -> str:
+    """
+    Gemini가 "1. 2. 3." 형태로 출력했을 때 "1) 2) 3)" 형태로 변환
+    
+    변환 규칙:
+    - "1. " → "1) " (줄 시작, 들여쓰기 0)
+    - "   1. " → "   (1) " (들여쓰기 있으면 하위 레벨)
+    """
+    lines = text.split('\n')
+    result = []
+    
+    for line in lines:
+        # 원본 들여쓰기 측정
+        stripped = line.lstrip()
+        indent = len(line) - len(stripped)
+        
+        # "숫자. " 패턴 감지 (마크다운 순서 리스트)
+        match = re.match(r'^(\d+)\.\s+(.+)$', stripped)
+        
+        if match:
+            num = match.group(1)
+            content = match.group(2)
+            
+            if indent == 0:
+                # 최상위 레벨: 1. → 1)
+                result.append(f"{num}) {content}")
+            elif indent <= 4:
+                # 1단계 들여쓰기: 1. → (1)
+                result.append(" " * indent + f"({num}) {content}")
+            else:
+                # 2단계 이상 들여쓰기: 그대로 유지 또는 - 로 변환
+                result.append(" " * indent + f"- {content}")
+        else:
+            result.append(line)
+    
+    return '\n'.join(result)
+
+
+def clean_generated_text_final(section_number: str, text: str) -> str:
+    """
+    Gemini 출력 후처리 (최종 버전)
+    
+    1. 중복 섹션 제목 제거
+    2. "꼭지 N" 패턴 제거
+    3. 번호 체계 변환 (1. → 1))
+    4. 제목 볼드 + 줄바꿈 강제 적용
+    5. #### 헤딩 → 볼드 변환
+    """
+    
+    # 1단계: 기본 정리
+    lines = text.split('\n')
+    cleaned = []
+    
+    for line in lines:
+        stripped = line.strip()
+        
+        # 중복 섹션 제목 제거 (# 숫자.숫자.숫자 형태)
+        if re.match(r'^#{1,4}\s*\d+(\.\d+)*\s+', stripped):
+            continue
+        
+        # "꼭지 N" 패턴 제거 (독립 라인)
+        if re.match(r'^[\*\*]*꼭지\s*\d+[\*\*]*\s*', stripped):
+            continue
+        
+        # "**꼭지 N 제목**" → "**제목**" 변환
+        cleaned_line = re.sub(r'\*\*꼭지\s*\d+\s*', '**', stripped)
+        
+        # #### 헤딩 → 볼드
+        h4_match = re.match(r'^####\s+(.+)$', cleaned_line)
+        if h4_match:
+            title = h4_match.group(1).strip()
+            if not re.match(r'^\d+', title):
+                cleaned.append(f"\n**{title}**\n")
+                continue
+        
+        # 빈 줄 연속 방지 (3줄 이상 → 2줄)
+        if not stripped:
+            if len(cleaned) >= 2 and not cleaned[-1].strip() and not cleaned[-2].strip():
+                continue
+        
+        cleaned.append(cleaned_line if cleaned_line != stripped else line)
+    
+    result = '\n'.join(cleaned)
+    
+    # 2단계: 번호 체계 변환
+    result = fix_numbering_format(result)
+    
+    # 3단계: 제목+본문 붙어있는 것 분리 + 볼드 적용
+    result = fix_title_format(result)
+    
+    return result.strip()
+
+
+def fix_numbering_format(text: str) -> str:
+    """
+    "1. " → "1) " 변환
+    들여쓰기 있으면 "(1)" 형태로
+    """
+    lines = text.split('\n')
+    result = []
+    
+    for line in lines:
+        stripped = line.lstrip()
+        indent = len(line) - len(stripped)
+        
+        # "숫자. " 패턴 (마크다운 순서 리스트)
+        match = re.match(r'^(\d+)\.\s+(.+)$', stripped)
+        
+        if match:
+            num = match.group(1)
+            content = match.group(2)
+            
+            if indent == 0:
+                # 최상위: 1. → 1)
+                result.append(f"{num}) {content}")
+            elif indent <= 4:
+                # 1단계 들여쓰기: → (1)
+                result.append(" " * indent + f"({num}) {content}")
+            else:
+                # 2단계 이상: → -
+                result.append(" " * indent + f"- {content}")
+        else:
+            result.append(line)
+    
+    return '\n'.join(result)
+
+
+def fix_title_format(text: str) -> str:
+    """
+    번호+제목+본문 한줄 → 번호+제목 / 본문 분리
+    제목에 볼드 적용
+    
+    핵심: **볼드 제목** 뒤에 본문이 이어지면 줄바꿈 삽입
+    """
+    lines = text.split('\n')
+    result = []
+    
+    for line in lines:
+        stripped = line.strip()
+        indent = len(line) - len(stripped)
+        indent_str = " " * indent
+        
+        # 패턴 1: "1) **제목** 본문..." → "1) **제목**\n\n본문..."
+        m1 = re.match(r'^(\d+)\)\s+(\*\*[^*]+\*\*)\s+(.{20,})$', stripped)
+        if m1:
+            num = m1.group(1)
+            title = m1.group(2)
+            body = m1.group(3).strip()
+            result.append(f"{indent_str}{num}) {title}")
+            result.append("")
+            result.append(f"{indent_str}{body}")
+            result.append("")
+            continue
+        
+        # 패턴 2: "(1) **제목** 본문..." → "(1) **제목**\n\n본문..."
+        m2 = re.match(r'^\((\d+)\)\s+(\*\*[^*]+\*\*)\s+(.{20,})$', stripped)
+        if m2:
+            num = m2.group(1)
+            title = m2.group(2)
+            body = m2.group(3).strip()
+            result.append(f"{indent_str}({num}) {title}")
+            result.append("")
+            result.append(f"{indent_str}{body}")
+            result.append("")
+            continue
+        
+        # 패턴 3: "1) 제목:" 또는 "1) 제목" (볼드 없음, 짧은 제목) → 볼드 적용
+        m3 = re.match(r'^(\d+)\)\s+([^*\n]{3,40})$', stripped)
+        if m3:
+            num = m3.group(1)
+            title = m3.group(2).strip().rstrip(':')
+            # 문장이 아닌 제목으로 판단 (마침표로 안 끝남)
+            if not title.endswith(('.', '다', '요', '음', '함')):
+                result.append(f"{indent_str}{num}) **{title}**")
+                result.append("")
+                continue
+        
+        # 패턴 4: "(1) 제목" (볼드 없음) → 볼드 적용  
+        m4 = re.match(r'^\((\d+)\)\s+([^*\n]{3,40})$', stripped)
+        if m4:
+            num = m4.group(1)
+            title = m4.group(2).strip().rstrip(':')
+            if not title.endswith(('.', '다', '요', '음', '함')):
+                result.append(f"{indent_str}({num}) **{title}**")
+                result.append("")
+                continue
+        
+        result.append(line)
+    
+    # 연속 빈줄 정리
+    final = []
+    for line in result:
+        if not line.strip():
+            if len(final) >= 2 and not final[-1].strip() and not final[-2].strip():
+                continue
+        final.append(line)
+    
+    return '\n'.join(final)
+
+
+def main():
+    log("=== step8 Gemini 기반 보고서 생성 시작 ===")
+    
+    domain_prompt = load_domain_prompt()
+    report_title, outline_items = load_outline()
+    
+    log(f"보고서 제목: {report_title}")
+    log(f"목차 항목 수: {len(outline_items)}")
+
+    # 데이터 및 이미지 메타 로드
+    image_meta_by_file = load_image_metadata()
+    all_rag_items = iter_rag_items()
+    item_map = build_item_index(all_rag_items)
+    faiss_pack = try_load_faiss()
+    use_faiss = faiss_pack is not None
+
+    log(f"RAG 청크 수: {len(all_rag_items)}")
+    log(f"FAISS 사용: {use_faiss}")
+
+    # 시스템 지시문 (한 번만 생성)
+    system_instruction = build_system_instruction(domain_prompt)
+
+    md_lines = [f"# {report_title}", ""]
+    report_json_sections = []
+    
+    # 중복 방지를 위한 이전 섹션 요약 누적
+    previous_sections_summary = ""
+    
+    # ★ 추가: 복사된 이미지 카운트
+    total_images_copied = 0
+
+    for it in outline_items:
+        # 대목차와 중목차는 제목만 적고 통과
+        if it.depth < 3:
+            prefix = "## " if it.depth == 1 else "### "
+            md_lines.append(f"\n{prefix}{it.number} {it.title}\n")
+            continue
+
+        log(f"집필 중: {it.number} {it.title} (꼭지 {len(it.sub_topics)}개)")
+
+        # 꼭지들의 키워드를 합쳐서 검색
+        all_kws = []
+        for st in it.sub_topics:
+            all_kws.extend(st.keywords)
+        query = f"{it.title} " + " ".join(all_kws)
+
+        # RAG 검색
+        if use_faiss:
+            evidences = retrieve_with_faiss(faiss_pack[0], faiss_pack[1], item_map, query, 12)
+        else:
+            evidences = retrieve_with_keywords(all_rag_items, query, all_kws, 12)
+
+        log(f"  → 검색된 근거 청크: {len(evidences)}개")
+
+        # 이미지 선택 및 플레이스홀더 생성
+        section_images = select_images_for_section(evidences, image_meta_by_file, MAX_IMAGES_PER_SECTION)
+        image_info_list = make_image_placeholders(it.number, section_images)
+
+        # ★ 추가: 이미지 파일을 assets 폴더로 복사
+        copy_images_to_assets(image_info_list)
+        copied_count = sum(1 for img in image_info_list if img.get('asset_path'))
+        total_images_copied += copied_count
+
+        # 사용자 프롬프트 생성
+        user_prompt = build_user_prompt(
+            report_title=report_title,
+            item=it,
+            evidences=evidences,
+            image_info_list=image_info_list,
+            previous_sections_summary=previous_sections_summary
+        )
+
+        # Gemini로 본문 생성
+        section_text = generate_section_text_gemini(system_instruction, user_prompt)
+        section_text = clean_generated_text_final(it.number, section_text)  # ★ 이 한 줄만 추가!
+
+        # 마크다운 내용 추가
+        md_lines.append(f"\n#### {it.number} {it.title}\n")
+        md_lines.append(section_text + "\n")
+
+        # 중복 방지를 위해 현재 섹션 요약 누적  ← 이 부분은 그대로!
+        section_summary = extract_section_summary(section_text)
+        if section_summary:
+            previous_sections_summary += f"\n- {it.number}: {section_summary[:100]}..."
+
+        # JSON용 데이터 수집 (★ asset_path 추가)
+        report_json_sections.append({
+            "section_id": it.number,
+            "section_title": it.title,
+            "generated_text": section_text,
+            "sub_topics": [vars(st) for st in it.sub_topics],
+            "evidence_count": len(evidences),
+            "assets": [
+                {
+                    "type": "image",
+                    "image_id": img["image_id"],
+                    "filename": img["image_file"],
+                    "caption": img["caption"],
+                    "placeholder": img["placeholder"],
+                    "source_path": img.get("source_path", ""),
+                    "page": img.get("page"),
+                    "asset_path": img.get("asset_path"),  # ★ 추가
+                }
+                for img in image_info_list
+            ]
+        })
+
+        log(f"  → 생성 완료 ({len(section_text)} 자)")
+
+    # 1. 마크다운(.md) 파일 저장
+    REPORT_MD_PATH.write_text("\n".join(md_lines), encoding="utf-8")
+
+    # 2. JSON(.json) 파일 저장
+    REPORT_JSON_PATH.write_text(
+        json.dumps({
+            "generated_at": datetime.now().isoformat(),
+            "report_title": report_title,
+            "model": GEMINI_MODEL,
+            "sections": report_json_sections
+        }, ensure_ascii=False, indent=2),
+        encoding="utf-8"
+    )
+
+    log(f"")
+    log(f"═══════════════════════════════════════════════════")
+    log(f"파일 저장 완료:")
+    log(f"  1. {REPORT_MD_PATH}")
+    log(f"  2. {REPORT_JSON_PATH}")
+    log(f"  3. {ASSETS_DIR} (이미지 {total_images_copied}개 복사)")  # ★ 추가
+    log(f"═══════════════════════════════════════════════════")
+    log("=== step8 보고서 생성 종료 ===")
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/converters/pipeline/step9_html.py b/converters/pipeline/step9_html.py
new file mode 100644
index 0000000..3ee7365
--- /dev/null
+++ b/converters/pipeline/step9_html.py
@@ -0,0 +1,1249 @@
+# -*- coding: utf-8 -*-
+"""
+9_md_to_html_publisher.py
+
+기능:
+- report_draft.md + report_sections.json → report.html 변환
+- A4 규격 페이지네이션 템플릿 적용
+- 마크다운 테이블 → HTML 테이블 변환
+- 이미지 플레이스홀더 {{IMG:xxx}} → <figure> 변환
+- 목차(TOC) 자동 생성
+
+사용법:
+    python 9_md_to_html_publisher.py
+    python 9_md_to_html_publisher.py --md report_draft.md --json report_sections.json --output report.html
+    python 9_md_to_html_publisher.py --no-toc --no-summary
+"""
+
+import os
+import re
+import json
+import argparse
+from pathlib import Path
+from datetime import datetime
+from typing import List, Dict, Any, Tuple, Optional
+from dataclasses import dataclass, field
+
+# ===== 경로 설정 =====
+OUTPUT_ROOT = Path(r"D:\for python\survey_test\output")
+GEN_DIR = OUTPUT_ROOT / "generated"
+ASSETS_DIR = GEN_DIR / "assets"
+LOG_DIR = OUTPUT_ROOT / "logs"
+
+# 기본 입출력 파일
+DEFAULT_MD_PATH = GEN_DIR / "report_draft.md"
+DEFAULT_JSON_PATH = GEN_DIR / "report_sections.json"
+DEFAULT_OUTPUT_PATH = GEN_DIR / "report.html"
+
+for d in [GEN_DIR, ASSETS_DIR, LOG_DIR]:
+    d.mkdir(parents=True, exist_ok=True)
+
+
+def log(msg: str):
+    """로깅 함수"""
+    line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
+    print(line, flush=True)
+    with (LOG_DIR / "step9_html_publish_log.txt").open("a", encoding="utf-8") as f:
+        f.write(line + "\n")
+
+
+# ===== 데이터 클래스 =====
+@dataclass
+class ImageAsset:
+    """이미지 자산 정보"""
+    image_id: str
+    filename: str
+    caption: str
+    placeholder: str
+    source_path: str = ""
+    page: Optional[int] = None
+    asset_path: Optional[str] = None
+
+
+@dataclass
+class Section:
+    """섹션 정보"""
+    section_id: str
+    section_title: str
+    generated_text: str
+    assets: List[ImageAsset] = field(default_factory=list)
+
+
+@dataclass
+class TocItem:
+    """목차 항목"""
+    number: str
+    title: str
+    level: int  # 1, 2, 3
+
+
+# ===== 파일 로더 =====
+def load_json_meta(json_path: Path) -> Tuple[str, List[Section]]:
+    """JSON 파일에서 메타정보와 섹션 로드"""
+    if not json_path.exists():
+        raise FileNotFoundError(f"JSON 파일 없음: {json_path}")
+    
+    data = json.loads(json_path.read_text(encoding="utf-8"))
+    report_title = data.get("report_title", "보고서")
+    
+    sections = []
+    for sec in data.get("sections", []):
+        assets = []
+        for asset in sec.get("assets", []):
+            assets.append(ImageAsset(
+                image_id=asset.get("image_id", ""),
+                filename=asset.get("filename", ""),
+                caption=asset.get("caption", ""),
+                placeholder=asset.get("placeholder", ""),
+                source_path=asset.get("source_path", ""),
+                page=asset.get("page"),
+                asset_path=asset.get("asset_path")
+            ))
+        
+        sections.append(Section(
+            section_id=sec.get("section_id", ""),
+            section_title=sec.get("section_title", ""),
+            generated_text=sec.get("generated_text", ""),
+            assets=assets
+        ))
+    
+    return report_title, sections
+
+
+def load_markdown(md_path: Path) -> str:
+    """마크다운 파일 로드"""
+    if not md_path.exists():
+        raise FileNotFoundError(f"MD 파일 없음: {md_path}")
+    return md_path.read_text(encoding="utf-8")
+
+
+# ===== 이미지 맵 생성 =====
+def build_image_map(sections: List[Section]) -> Dict[str, ImageAsset]:
+    """placeholder → ImageAsset 매핑 생성"""
+    img_map = {}
+    for sec in sections:
+        for asset in sec.assets:
+            if asset.placeholder:
+                # {{IMG:xxx}} 형태에서 xxx 추출
+                img_map[asset.image_id] = asset
+    return img_map
+
+
+# ===== 목차 생성 =====
+def extract_toc_from_md(md_content: str) -> List[TocItem]:
+    """마크다운에서 목차 구조 추출"""
+    toc_items = []
+    
+    # 헤딩 패턴
+    patterns = [
+        (re.compile(r'^##\s+(\d+)\s+(.+)$', re.MULTILINE), 1),      # ## 1 대목차
+        (re.compile(r'^###\s+(\d+\.\d+)\s+(.+)$', re.MULTILINE), 2), # ### 1.1 중목차
+        (re.compile(r'^####\s+(\d+\.\d+\.\d+)\s+(.+)$', re.MULTILINE), 3),  # #### 1.1.1 소목차
+    ]
+    
+    for pattern, level in patterns:
+        for match in pattern.finditer(md_content):
+            number = match.group(1)
+            title = match.group(2).strip()
+            toc_items.append(TocItem(number=number, title=title, level=level))
+    
+    # 번호순 정렬
+    def sort_key(item: TocItem) -> tuple:
+        parts = item.number.split('.')
+        return tuple(int(p) for p in parts)
+    
+    toc_items.sort(key=sort_key)
+    return toc_items
+
+
+def generate_toc_html(toc_items: List[TocItem]) -> str:
+    """목차 HTML 생성"""
+    if not toc_items:
+        return ""
+    
+    lines = ['<ul style="list-style:none; padding:0; margin:0;">']
+    
+    current_l1 = None
+    for item in toc_items:
+        if item.level == 1:
+            # 새로운 대목차 그룹
+            if current_l1 is not None:
+                lines.append('</div>')  # 이전 그룹 닫기
+            lines.append('<div class="toc-group atomic-block">')
+            lines.append(f'<li class="toc-item toc-lvl-1">{item.number}. {item.title}</li>')
+            current_l1 = item.number
+        elif item.level == 2:
+            lines.append(f'<li class="toc-item toc-lvl-2">{item.number} {item.title}</li>')
+        elif item.level == 3:
+            lines.append(f'<li class="toc-item toc-lvl-3">{item.number} {item.title}</li>')
+    
+    if current_l1 is not None:
+        lines.append('</div>')  # 마지막 그룹 닫기
+    
+    lines.append('</ul>')
+    return '\n'.join(lines)
+
+
+# ===== 마크다운 → HTML 변환 =====
+class MarkdownToHtmlConverter:
+    """마크다운을 HTML로 변환하는 클래스"""
+    
+    def __init__(self, image_map: Dict[str, ImageAsset]):
+        self.image_map = image_map
+        self.table_counter = {}  # chapter -> count
+        self.figure_counter = {}  # chapter -> count
+    
+    def get_chapter(self, context: str = "1") -> str:
+        """현재 챕터 번호 추출"""
+        return context.split('.')[0] if context else "1"
+    
+    def next_table_num(self, chapter: str) -> str:
+        """다음 표 번호"""
+        if chapter not in self.table_counter:
+            self.table_counter[chapter] = 0
+        self.table_counter[chapter] += 1
+        return f"{chapter}-{self.table_counter[chapter]}"
+    
+    def next_figure_num(self, chapter: str) -> str:
+        """다음 그림 번호"""
+        if chapter not in self.figure_counter:
+            self.figure_counter[chapter] = 0
+        self.figure_counter[chapter] += 1
+        return f"{chapter}-{self.figure_counter[chapter]}"
+    
+    def convert_table(self, md_table: str, caption: str = "", chapter: str = "1") -> str:
+        """마크다운 테이블 → HTML 테이블"""
+        lines = [l.strip() for l in md_table.strip().split('\n') if l.strip()]
+        if len(lines) < 2:
+            return ""
+        
+        # 헤더 행
+        header_cells = [c.strip() for c in lines[0].split('|') if c.strip()]
+        
+        # 구분선 건너뛰기 (|---|---|)
+        data_start = 1
+        if len(lines) > 1 and re.match(r'^[\|\s\-:]+$', lines[1]):
+            data_start = 2
+        
+        # 데이터 행
+        data_rows = []
+        for line in lines[data_start:]:
+            cells = [c.strip() for c in line.split('|') if c.strip()]
+            if cells:
+                data_rows.append(cells)
+        
+        # HTML 생성
+        html_lines = ['<table class="atomic-block">']
+        
+        # thead
+        html_lines.append('<thead><tr>')
+        for cell in header_cells:
+            # **text** → <strong>text</strong>
+            cell = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', cell)
+            html_lines.append(f'<th>{cell}</th>')
+        html_lines.append('</tr></thead>')
+        
+        # tbody
+        html_lines.append('<tbody>')
+        for row in data_rows:
+            html_lines.append('<tr>')
+            for cell in row:
+                # **text** 처리
+                cell = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', cell)
+                # <br> 처리
+                cell = cell.replace('<br>', '<br/>')
+                html_lines.append(f'<td>{cell}</td>')
+            html_lines.append('</tr>')
+        html_lines.append('</tbody>')
+        html_lines.append('</table>')
+        
+        # 캡션 추가
+        if caption:
+            html_lines.append(f'<figcaption>{caption}</figcaption>')
+        
+        return '\n'.join(html_lines)
+    
+    def convert_image_placeholder(self, placeholder: str, chapter: str = "1") -> str:
+        """{{IMG:xxx}} → <figure> 변환"""
+        # {{IMG:1_1_1_img01}} 에서 ID 추출
+        match = re.match(r'\{\{IMG:(.+?)\}\}', placeholder)
+        if not match:
+            return placeholder
+        
+        image_id = match.group(1)
+        asset = self.image_map.get(image_id)
+        
+        if asset and asset.asset_path:
+            fig_num = self.next_figure_num(chapter)
+            caption = asset.caption if asset.caption and asset.caption != "Photo" else ""
+            caption_text = f"[그림 {fig_num}] {caption}" if caption else f"[그림 {fig_num}]"
+            
+            return f'''<figure class="atomic-block">
+    <img src="{asset.asset_path}" alt="{caption}">
+    <figcaption>{caption_text}</figcaption>
+</figure>'''
+        else:
+            # 이미지 파일이 없는 경우 플레이스홀더 주석으로
+            return f'<!-- 이미지 없음: {image_id} -->'
+    
+    def convert_list(self, md_list: str) -> str:
+        """마크다운 리스트 → HTML 리스트"""
+        lines = md_list.strip().split('\n')
+        html_lines = []
+        in_list = False
+        list_type = 'ul'
+        
+        for line in lines:
+            line = line.strip()
+            if not line:
+                continue
+            
+            # 순서 없는 리스트
+            ul_match = re.match(r'^[\*\-]\s+(.+)$', line)
+            # 순서 있는 리스트
+            ol_match = re.match(r'^(\d+)\.\s+(.+)$', line)
+            
+            if ul_match:
+                if not in_list:
+                    html_lines.append('<ul>')
+                    in_list = True
+                    list_type = 'ul'
+                content = ul_match.group(1)
+                content = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', content)
+                html_lines.append(f'<li>{content}</li>')
+            elif ol_match:
+                if not in_list:
+                    html_lines.append('<ol>')
+                    in_list = True
+                    list_type = 'ol'
+                content = ol_match.group(2)
+                content = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', content)
+                html_lines.append(f'<li>{content}</li>')
+        
+        if in_list:
+            html_lines.append(f'</{list_type}>')
+        
+        return '\n'.join(html_lines)
+    
+    def convert_paragraph(self, text: str) -> str:
+        """일반 텍스트 → <p> 변환"""
+        # 빈 줄이면 무시
+        if not text.strip():
+            return ""
+        
+        # **text** → <strong>
+        text = re.sub(r'\*\*(.+?)\*\*', r'<strong>\1</strong>', text)
+        # *text* → <em>
+        text = re.sub(r'\*(.+?)\*', r'<em>\1</em>', text)
+        # `code` → <code>
+        text = re.sub(r'`(.+?)`', r'<code>\1</code>', text)
+        
+        return f'<p>{text}</p>'
+    
+    def convert_full_content(self, md_content: str) -> str:
+        """전체 마크다운 콘텐츠를 HTML로 변환"""
+        lines = md_content.split('\n')
+        html_parts = []
+        
+        current_chapter = "1"
+        i = 0
+        
+        while i < len(lines):
+            line = lines[i].strip()
+            
+            # 빈 줄
+            if not line:
+                i += 1
+                continue
+            
+            # H1 (# 제목) - 보고서 제목, 섹션 시작 등
+            h1_match = re.match(r'^#\s+(.+)$', line)
+            if h1_match and not line.startswith('##'):
+                title = h1_match.group(1)
+                # 섹션 번호가 있으면 추출
+                num_match = re.match(r'^(\d+(?:\.\d+)*)\s+', title)
+                if num_match:
+                    current_chapter = num_match.group(1).split('.')[0]
+                html_parts.append(f'<h1>{title}</h1>')
+                i += 1
+                continue
+            
+            # H2 (## 대목차)
+            h2_match = re.match(r'^##\s+(.+)$', line)
+            if h2_match:
+                title = h2_match.group(1)
+                num_match = re.match(r'^(\d+)\s+', title)
+                if num_match:
+                    current_chapter = num_match.group(1)
+                html_parts.append(f'<h1>{title}</h1>')  # H1으로 변환 (페이지 분리 트리거)
+                i += 1
+                continue
+            
+            # H3 (### 중목차)
+            h3_match = re.match(r'^###\s+(.+)$', line)
+            if h3_match:
+                html_parts.append(f'<h2>{h3_match.group(1)}</h2>')
+                i += 1
+                continue
+            
+            # H4 (#### 소목차/꼭지)
+            h4_match = re.match(r'^####\s+(.+)$', line)
+            if h4_match:
+                html_parts.append(f'<h3>{h4_match.group(1)}</h3>')
+                i += 1
+                continue
+            
+            # 이미지 플레이스홀더 {{IMG:xxx}}
+            img_match = re.match(r'^\{\{IMG:(.+?)\}\}$', line)
+            if img_match:
+                html_parts.append(self.convert_image_placeholder(line, current_chapter))
+                i += 1
+                continue
+            
+            # 이미지 캡션 *(참고: ...)* - figure 바로 뒤에 나오면 무시 (이미 figcaption으로 처리)
+            if line.startswith('*(') and line.endswith(')*'):
+                i += 1
+                continue
+            
+            # 테이블 감지 (| 로 시작)
+            if line.startswith('|') or (line.startswith('**[표') and i + 1 < len(lines)):
+                # 표 제목 캡션
+                caption = ""
+                if line.startswith('**[표'):
+                    caption_match = re.match(r'^\*\*(\[표.+?\].*?)\*\*$', line)
+                    if caption_match:
+                        caption = caption_match.group(1)
+                    i += 1
+                    if i >= len(lines):
+                        break
+                    line = lines[i].strip()
+                
+                # 테이블 본문 수집
+                table_lines = []
+                while i < len(lines) and (lines[i].strip().startswith('|') or 
+                                          re.match(r'^[\|\s\-:]+$', lines[i].strip())):
+                    table_lines.append(lines[i])
+                    i += 1
+                
+                if table_lines:
+                    table_md = '\n'.join(table_lines)
+                    html_parts.append(self.convert_table(table_md, caption, current_chapter))
+                continue
+            
+            # 리스트 감지 (* 또는 - 또는 1. 로 시작)
+            if re.match(r'^[\*\-]\s+', line) or re.match(r'^\d+\.\s+', line):
+                list_lines = [line]
+                i += 1
+                while i < len(lines):
+                    next_line = lines[i].strip()
+                    if re.match(r'^[\*\-]\s+', next_line) or re.match(r'^\d+\.\s+', next_line):
+                        list_lines.append(next_line)
+                        i += 1
+                    elif not next_line:
+                        i += 1
+                        break
+                    else:
+                        break
+                
+                html_parts.append(self.convert_list('\n'.join(list_lines)))
+                continue
+            
+            # 일반 문단
+            para_lines = [line]
+            i += 1
+            while i < len(lines):
+                next_line = lines[i].strip()
+                # 다음이 특수 요소면 문단 종료
+                if (not next_line or 
+                    next_line.startswith('#') or 
+                    next_line.startswith('|') or 
+                    next_line.startswith('**[표') or
+                    next_line.startswith('{{IMG:') or
+                    next_line.startswith('*(') or
+                    re.match(r'^[\*\-]\s+', next_line) or
+                    re.match(r'^\d+\.\s+', next_line)):
+                    break
+                para_lines.append(next_line)
+                i += 1
+            
+            para_text = ' '.join(para_lines)
+            if para_text:
+                html_parts.append(self.convert_paragraph(para_text))
+        
+        return '\n'.join(html_parts)
+
+
+# ===== HTML 템플릿 =====
+def get_html_template() -> str:
+    """A4 보고서 HTML 템플릿 반환"""
+    return '''<!DOCTYPE html>
+<html lang="ko">
+<head>
+<meta charset="UTF-8">
+<title>{report_title}</title>
+<style>
+    @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700;900&display=swap');
+    
+    :root {{ 
+        --primary: #006400; 
+        --accent: #228B22;  
+        --light-green: #E8F5E9; 
+        --bg: #525659;
+    }}
+    body {{ margin: 0; background: var(--bg); font-family: 'Noto Sans KR', sans-serif; }}
+    
+    /* [A4 용지 규격] */
+    .sheet {{
+        width: 210mm; height: 297mm; 
+        background: white; margin: 20px auto; 
+        position: relative; overflow: hidden; box-sizing: border-box;
+        box-shadow: 0 0 15px rgba(0,0,0,0.1);
+    }}
+    @media print {{ 
+        .sheet {{ margin: 0; break-after: page; box-shadow: none; }} 
+        body {{ background: white; }} 
+    }}
+
+    /* [헤더/푸터] */
+    .page-header {{ 
+        position: absolute; top: 10mm; left: 20mm; right: 20mm;
+        font-size: 9pt; color: #000000; font-weight: bold;
+        text-align: right; border-bottom: none !important; padding-bottom: 5px;
+    }}
+    .page-footer {{ 
+        position: absolute; bottom: 10mm; left: 20mm; right: 20mm;
+        display: flex; justify-content: space-between; align-items: flex-end;
+        font-size: 9pt; color: #555; border-top: 1px solid #eee; padding-top: 5px;
+    }}
+    
+    /* [본문 영역] */
+    .body-content {{ 
+        position: absolute;
+        top: 20mm; left: 20mm; right: 20mm; 
+        bottom: auto;
+    }}
+
+    /* [타이포그래피] */
+    h1, h2, h3 {{ 
+        white-space: nowrap; overflow: hidden; word-break: keep-all; color: var(--primary); 
+        margin: 0; padding: 0;
+    }}
+    h1 {{ 
+        font-size: 20pt;
+        font-weight: 900;
+        color: var(--primary);
+        border-bottom: 2px solid var(--primary); 
+        margin-bottom: 20px; 
+        margin-top: 0; 
+    }}
+    h2 {{ 
+        font-size: 18pt; 
+        border-left: 5px solid var(--accent); 
+        padding-left: 10px; 
+        margin-top: 30px; 
+        margin-bottom: 10px; 
+        color: #03581dff; 
+    }}
+    h3 {{ font-size: 14pt; margin-top: 20px; margin-bottom: 5px; color: var(--accent); font-weight: 700; }}
+    p, li {{ font-size: 12pt !important; line-height: 1.6 !important; text-align: justify; word-break: keep-all; margin-bottom: 5px; }}
+
+    /* [목차 스타일] */
+    .toc-item {{ line-height: 1.8; list-style: none; border-bottom: 1px dotted #eee; }}
+    .toc-lvl-1 {{ 
+        color: #006400;
+        font-weight: 900;
+        font-size: 13.5pt;
+        margin-top: 15px;
+        margin-bottom: 5px;
+        border-bottom: 2px solid #ccc; 
+        list-style: none !important;
+    }}
+    .toc-lvl-2 {{ font-size: 10.5pt; color: #333; margin-left: 20px; font-weight: normal; list-style: none !important; }}
+    .toc-lvl-3 {{ font-size: 10.5pt; color: #666; margin-left: 40px; list-style: none !important; }}
+    .toc-group {{
+        margin-bottom: 12px;
+        break-inside: avoid;
+        page-break-inside: avoid;
+    }}
+    
+    /* [표/이미지 스타일] */
+    table {{ 
+        width: 100%; 
+        border-collapse: collapse; 
+        margin: 15px 0; 
+        font-size: 9.5pt; 
+        table-layout: auto; 
+        border-top: 2px solid var(--primary); 
+    }}
+    th, td {{ 
+        border: 1px solid #ddd; 
+        padding: 6px 5px; 
+        text-align: center; 
+        vertical-align: middle;
+        word-break: keep-all;
+        word-wrap: break-word;
+    }}
+    th {{ 
+        background: var(--light-green); 
+        color: var(--primary); 
+        font-weight: 900; 
+        white-space: nowrap;
+        letter-spacing: -0.05em; 
+        font-size: 9pt;
+    }}
+    
+    /* [캡션 및 그림 스타일] */
+    figure {{ display: block; margin: 20px auto; text-align: center; width: 100%; }}
+    img, svg {{ max-width: 95% !important; height: auto !important; display: block; margin: 0 auto; border: 1px solid #eee; }}
+    figcaption {{ 
+        display: block; text-align: center; margin-top: 10px; 
+        font-size: 9.5pt; color: #666; font-weight: 600; 
+    }}
+    
+    .atomic-block {{ break-inside: avoid; page-break-inside: avoid; }}
+    #raw-container {{ display: none; }}
+
+    /* [하이라이트 박스] */
+    .highlight-box {{
+        background-color: rgb(226, 236, 226);
+        border: 1px solid #2a2c2aff; 
+        padding: 5px; margin: 1.5px 1.5px 2px 0px; border-radius: 3px;
+        color: #333; 
+    }}
+    .highlight-box li, .highlight-box p {{
+        font-size: 11pt !important;
+        line-height: 1.2;
+        letter-spacing: -0.6px;
+        margin-bottom: 3px;
+        color: #1a1919ff;
+    }}
+    .highlight-box h3, .highlight-box strong, .highlight-box b {{
+        font-size: 12pt !important; color: rgba(2, 37, 2, 1) !important;
+        font-weight: bold; margin: 0; display: block; margin-bottom: 5px;
+    }}
+
+    /* [요약 페이지 스타일] */
+    .squeeze {{
+        line-height: 1.35 !important;
+        letter-spacing: -0.5px !important;
+        margin-bottom: 2px !important;
+    }}
+    .squeeze-title {{
+        margin-bottom: 5px !important;
+        padding-bottom: 2px !important;
+    }}
+    #box-summary p, #box-summary li {{
+        font-size: 10pt !important;
+        line-height: 1.45 !important;
+        letter-spacing: -0.04em !important;
+        margin-bottom: 3px !important;
+        text-align: justify;
+    }}
+    #box-summary h1 {{
+        margin-bottom: 10px !important;
+        padding-bottom: 5px !important;
+    }}
+</style>
+</head>
+<body>
+
+    <div id="raw-container">
+        <div id="box-cover">{box_cover}</div>
+        <div id="box-toc">{box_toc}</div>
+        <div id="box-summary">{box_summary}</div>
+        <div id="box-content">{box_content}</div>
+    </div>
+
+    <template id="page-template">
+        <div class="sheet">
+            <div class="page-header"></div>
+            <div class="body-content"></div>
+            <div class="page-footer">
+                <span class="rpt-title"></span>
+                <span class="pg-num"></span>
+            </div>
+        </div>
+    </template>
+
+    <script>
+        window.addEventListener("load", async () => {{
+            await document.fonts.ready;
+            const CONFIG = {{ maxHeight: 970 }}; 
+            
+            const rawContainer = document.getElementById('raw-container');
+            if (rawContainer) {{
+                rawContainer.innerHTML = rawContainer.innerHTML.replace(
+                    /(<rect[^>]*?)\\s+y="[^"]*"\\s+([^>]*?y="[^"]*")/gi, 
+                    "$1 $2"
+                );
+            }}
+            const raw = {{
+                cover: document.getElementById('box-cover'),
+                toc: document.getElementById('box-toc'),
+                summary: document.getElementById('box-summary'),
+                content: document.getElementById('box-content')
+            }};
+
+            let globalPage = 1;
+            let reportTitle = raw.cover.querySelector('h1')?.innerText || "Report";
+
+            function cleanH1Text(text) {{
+                if (!text) return "";
+                const parts = text.split("-");
+                return parts[0].trim();
+            }}
+
+            function detox(node) {{
+                if (node.nodeType !== 1) return;
+                if (node.closest('svg')) return;
+
+                let cls = "";
+                if (node.hasAttribute('class')) {{
+                    cls = node.getAttribute('class');
+                }}
+
+                if ( (cls.includes('bg-') || cls.includes('border-') || cls.includes('box')) && 
+                     !cls.includes('title-box') && 
+                     !cls.includes('toc-') && 
+                     !cls.includes('cover-') &&
+                     !cls.includes('highlight-box') ) {{ 
+                    node.setAttribute('class', 'highlight-box atomic-block');
+                    const internalHeads = node.querySelectorAll('h3, h4, strong, b');
+                    internalHeads.forEach(head => {{
+                        head.removeAttribute('style');
+                        head.removeAttribute('class');
+                    }});
+                    node.removeAttribute('style');
+                    cls = 'highlight-box atomic-block'; 
+                }}
+
+                if (node.hasAttribute('class')) {{
+                    if (!cls.includes('toc-') && 
+                        !cls.includes('cover-') && 
+                        !cls.includes('highlight-') && 
+                        !cls.includes('title-box') &&
+                        !cls.includes('atomic-block')) {{
+                        node.removeAttribute('class');
+                    }}
+                }}
+
+                node.removeAttribute('style');
+                if (node.tagName === 'TABLE') node.border = "1";
+                if (node.tagName === 'FIGURE') {{
+                    const internalTitles = node.querySelectorAll('h3, h4, .chart-title');
+                    internalTitles.forEach(t => t.style.display = 'none');
+                }}
+            }}
+
+            function formatTOC(element) {{
+                const items = element.querySelectorAll('li');
+                items.forEach(li => {{
+                    const text = li.innerText.trim();
+                    const m1 = text.match(/^(\\d+)\\.\\s+(.+)$/);
+                    const m2 = text.match(/^(\\d+\\.\\d+)\\s+(.+)$/);
+                    const m3 = text.match(/^(\\d+\\.\\d+\\.\\d+)\\s+(.+)$/);
+                    
+                    if (m3) li.classList.add('toc-lvl-3');
+                    else if (m2) li.classList.add('toc-lvl-2');
+                    else if (m1) li.classList.add('toc-lvl-1');
+                }});
+            }}
+
+            function getFlatNodes(element) {{
+                if(element.id === 'box-toc') {{
+                    element.querySelectorAll('*').forEach(el => detox(el));
+                    formatTOC(element);
+
+                    const tocNodes = [];
+                    let title = element.querySelector('h1');
+                    if (!title) {{
+                        title = document.createElement('h1');
+                        title.innerText = "목차";
+                    }}
+                    tocNodes.push(title.cloneNode(true));
+
+                    const allLis = element.querySelectorAll('li');
+                    let currentGroup = null;
+
+                    allLis.forEach(li => {{
+                        const isLevel1 = li.classList.contains('toc-lvl-1');
+                        if (isLevel1) {{
+                            if (currentGroup) tocNodes.push(currentGroup);
+                            currentGroup = document.createElement('div');
+                            currentGroup.className = 'toc-group atomic-block';
+                            const ulWrapper = document.createElement('ul');
+                            ulWrapper.style.margin = "0";
+                            ulWrapper.style.padding = "0";
+                            currentGroup.appendChild(ulWrapper);
+                        }}
+                        if (!currentGroup) {{
+                            currentGroup = document.createElement('div');
+                            currentGroup.className = 'toc-group atomic-block';
+                            const ulWrapper = document.createElement('ul');
+                            ulWrapper.style.margin = "0";
+                            ulWrapper.style.padding = "0";
+                            currentGroup.appendChild(ulWrapper);
+                        }}
+                        currentGroup.querySelector('ul').appendChild(li.cloneNode(true));
+                    }});
+                    if (currentGroup) tocNodes.push(currentGroup);
+                    return tocNodes;
+                }}
+
+                let nodes = [];
+                Array.from(element.children).forEach(child => {{
+                    detox(child);
+                    if (child.classList.contains('highlight-box')) {{
+                        child.querySelectorAll('h3, h4, strong, b').forEach(head => {{
+                            head.removeAttribute('style');
+                            head.removeAttribute('class');
+                        }});
+                        nodes.push(child.cloneNode(true));
+                    }}
+                    else if(['DIV','SECTION','ARTICLE','MAIN'].includes(child.tagName)) {{
+                        nodes = nodes.concat(getFlatNodes(child));
+                    }} 
+                    else if (['UL','OL'].includes(child.tagName)) {{
+                        Array.from(child.children).forEach((li, idx) => {{
+                            detox(li);
+                            const w = document.createElement(child.tagName);
+                            w.style.margin="0"; w.style.paddingLeft="20px";
+                            if(child.tagName==='OL') w.start=idx+1;
+                            const cloneLi = li.cloneNode(true);
+                            cloneLi.querySelectorAll('*').forEach(el => detox(el));
+                            w.appendChild(cloneLi);
+                            nodes.push(w);
+                        }});
+                    }} else {{
+                        const clone = child.cloneNode(true);
+                        detox(clone);
+                        clone.querySelectorAll('*').forEach(el => detox(el));
+                        nodes.push(clone);
+                    }}
+                }});
+                return nodes;
+            }}
+
+            function renderFlow(sectionType, sourceNodes) {{
+                if (!sourceNodes.length) return;
+                let currentHeaderTitle = sectionType === 'toc' ? "목차" : (sectionType === 'summary' ? "요약" : reportTitle);
+                let page = createPage(sectionType, currentHeaderTitle);
+                let body = page.querySelector('.body-content');
+                let queue = [...sourceNodes];
+
+                while (queue.length > 0) {{
+                    let node = queue.shift();
+                    let clone = node.cloneNode(true);
+                    let isH1 = clone.tagName === 'H1';
+                    let isHeading = ['H2', 'H3'].includes(clone.tagName);
+                    let isText = ['P', 'LI'].includes(clone.tagName) && !clone.classList.contains('atomic-block');
+                    let isAtomic = ['TABLE', 'FIGURE', 'IMG', 'SVG'].includes(clone.tagName) || 
+                                   clone.querySelector('table, img, svg') || 
+                                   clone.classList.contains('atomic-block');
+
+                    if (isH1 && clone.innerText.includes('-')) {{
+                        clone.innerText = clone.innerText.split('-')[0].trim();
+                    }}
+
+                    if (isH1 && (sectionType === 'body' || sectionType === 'summary')) {{
+                        currentHeaderTitle = clone.innerText;
+                        if (body.children.length > 0) {{
+                            page = createPage(sectionType, currentHeaderTitle);
+                            body = page.querySelector('.body-content');
+                        }} else {{
+                            page.querySelector('.page-header').innerText = currentHeaderTitle;
+                        }}
+                    }}
+
+                    if (isHeading) {{
+                        const spaceLeft = CONFIG.maxHeight - body.scrollHeight;
+                        if (spaceLeft < 90) {{ 
+                            page = createPage(sectionType, currentHeaderTitle);
+                            body = page.querySelector('.body-content');
+                        }}
+                    }}
+
+                    body.appendChild(clone);
+
+                    if (isText && clone.innerText.length > 10) {{
+                        const originalHeight = clone.offsetHeight;
+                        clone.style.letterSpacing = "-1.0px";
+                        if (clone.offsetHeight < originalHeight) {{
+                            clone.style.letterSpacing = "-0.8px";
+                        }} else {{
+                            clone.style.letterSpacing = "";
+                        }}
+                    }}
+
+                    if (body.scrollHeight > CONFIG.maxHeight) {{
+                        if (isText) {{
+                            body.removeChild(clone);
+                            let textContent = node.innerText;
+                            let tempP = node.cloneNode(false);
+                            tempP.innerText = "";
+                            if (clone.style.letterSpacing) tempP.style.letterSpacing = clone.style.letterSpacing;
+                            body.appendChild(tempP);
+
+                            const words = textContent.split(' ');
+                            let currentText = "";
+                            
+                            for (let i = 0; i < words.length; i++) {{
+                                let word = words[i];
+                                let prevText = currentText;
+                                currentText += (currentText ? " " : "") + word;
+                                tempP.innerText = currentText;
+
+                                if (body.scrollHeight > CONFIG.maxHeight) {{
+                                    tempP.innerText = prevText;
+                                    tempP.style.textAlign = "justify";
+                                    tempP.style.textAlignLast = "justify";
+                                    
+                                    let remainingText = words.slice(i).join(' ');
+                                    let remainingNode = node.cloneNode(false);
+                                    remainingNode.innerText = remainingText;
+                                    queue.unshift(remainingNode);
+                                    
+                                    page = createPage(sectionType, currentHeaderTitle);
+                                    body = page.querySelector('.body-content');
+                                    body.style.lineHeight = "";
+                                    body.style.letterSpacing = "";
+                                    break;
+                                }}
+                            }}
+                        }}
+                        else {{
+                            body.removeChild(clone);
+                            let spaceLeft = CONFIG.maxHeight - body.scrollHeight;
+                            
+                            if (body.children.length > 0 && spaceLeft > 50 && queue.length > 0) {{
+                                while(queue.length > 0) {{
+                                    let candidate = queue[0]; 
+                                    if (['H1','H2','H3'].includes(candidate.tagName) || 
+                                        candidate.classList.contains('atomic-block') ||
+                                        candidate.querySelector('img, table')) break; 
+
+                                    let filler = candidate.cloneNode(true);
+                                    if(['P','LI'].includes(filler.tagName) && filler.innerText.length > 10) {{
+                                        filler.style.letterSpacing = "-1.0px";
+                                    }}
+                                    body.appendChild(filler);
+
+                                    if (body.scrollHeight <= CONFIG.maxHeight) {{
+                                        if(filler.style.letterSpacing === "-1.0px") filler.style.letterSpacing = "-0.8px";
+                                        queue.shift(); 
+                                    }} else {{
+                                        body.removeChild(filler);
+                                        break; 
+                                    }}
+                                }}
+                            }}
+
+                            if (body.children.length > 0) {{
+                                page = createPage(sectionType, currentHeaderTitle);
+                                body = page.querySelector('.body-content');
+                            }}
+                            body.appendChild(clone);
+                            
+                            if (isAtomic && body.scrollHeight > CONFIG.maxHeight) {{
+                                const currentH = clone.offsetHeight;
+                                const overflow = body.scrollHeight - CONFIG.maxHeight;
+                                body.removeChild(clone);
+
+                                if (overflow > 0 && overflow < (currentH * 0.15)) {{
+                                    clone.style.transform = "scale(0.85)";
+                                    clone.style.transformOrigin = "top center";
+                                    clone.style.marginBottom = `-${{currentH * 0.15}}px`;
+                                    body.appendChild(clone);
+                                }} else {{
+                                    body.appendChild(clone);
+                                }}
+                            }}
+                        }}
+                    }}
+                }}
+            }}
+
+            function createPage(type, headerTitle) {{
+                const tpl = document.getElementById('page-template');
+                const clone = tpl.content.cloneNode(true);
+                const sheet = clone.querySelector('.sheet');
+                
+                if (type === 'cover') {{
+                    sheet.innerHTML = "";
+                    const title = raw.cover.querySelector('h1')?.innerText || "Report";
+                    const sub = raw.cover.querySelector('h2')?.innerText || "";
+                    const pTags = raw.cover.querySelectorAll('p');
+                    const infos = pTags.length > 0 ? Array.from(pTags).map(p => p.innerText).join(" / ") : "";
+                    
+                    sheet.innerHTML = `
+                        <div style="position:absolute; top:20mm; right:20mm; text-align:right; font-size:11pt; color:#666;">${{infos}}</div>
+                        <div style="display:flex; flex-direction:column; justify-content:center; align-items:center; height:100%; text-align:center; width:100%;">
+                            <div style="width:85%;">
+                                <div style="font-size:32pt; font-weight:900; color:var(--primary); line-height:1.2; margin-bottom:30px; word-break:keep-all;">${{title}}</div>
+                                <div style="font-size:20pt; font-weight:300; color:#444; word-break:keep-all;">${{sub}}</div>
+                            </div>
+                        </div>`;
+                }} else {{
+                    clone.querySelector('.page-header').innerText = headerTitle;
+                    clone.querySelector('.rpt-title').innerText = reportTitle;
+                    if (type !== 'toc') clone.querySelector('.pg-num').innerText = `- ${{globalPage++}} -`;
+                    else clone.querySelector('.pg-num').innerText = "";
+                }}
+                document.body.appendChild(sheet);
+                return sheet;
+            }}
+
+            createPage('cover');
+            if(raw.toc && raw.toc.innerHTML.trim()) renderFlow('toc', getFlatNodes(raw.toc));
+
+            const summaryNodes = getFlatNodes(raw.summary);
+            const tempBox = document.createElement('div');
+            tempBox.style.width = "210mm"; 
+            tempBox.style.position = "absolute"; 
+            tempBox.style.visibility = "hidden";
+            tempBox.id = 'box-summary';
+            document.body.appendChild(tempBox);
+            summaryNodes.forEach(node => tempBox.appendChild(node.cloneNode(true)));
+            
+            const totalHeight = tempBox.scrollHeight;
+            const pageHeight = CONFIG.maxHeight;
+            const lastPart = totalHeight % pageHeight; 
+
+            if (totalHeight > pageHeight && lastPart > 0 && lastPart < 180) {{ 
+                summaryNodes.forEach(node => {{
+                    if(node.nodeType === 1) {{ 
+                        node.classList.add('squeeze');
+                        if(node.tagName === 'H1') node.classList.add('squeeze-title');
+                        if(node.tagName === 'P' || node.tagName === 'LI') {{
+                             node.style.fontSize = "9.5pt"; 
+                             node.style.lineHeight = "1.4"; 
+                             node.style.letterSpacing = "-0.8px";
+                        }}
+                    }}
+                }});
+            }}
+            document.body.removeChild(tempBox);
+            if(summaryNodes.length > 0) renderFlow('summary', summaryNodes);
+
+            renderFlow('body', getFlatNodes(raw.content));
+            
+            document.querySelectorAll('.sheet h1, .sheet h2').forEach(el => {{
+                let fs = 100;
+                while(el.scrollWidth > el.clientWidth && fs > 50) {{ el.style.fontSize = (--fs)+"%"; }}
+            }});
+
+            const allTextNodes = document.querySelectorAll('.sheet .body-content p, .sheet .body-content li');
+            allTextNodes.forEach(el => {{
+                if (el.closest('table') || el.closest('figure') || el.closest('.chart')) return;
+                if (el.innerText.trim().length < 10) return;
+                const originH = el.offsetHeight;
+                const originSpacing = el.style.letterSpacing;
+                el.style.fontSize = "12pt";
+                el.style.letterSpacing = "-1.4px"; 
+                const newH = el.offsetHeight;
+                if (newH < originH) {{
+                    el.style.letterSpacing = "-1.0px";
+                }} else {{
+                    el.style.letterSpacing = originSpacing;
+                }}
+            }});
+
+            document.querySelectorAll('.sheet h1, .sheet h2').forEach(el => {{
+                let fs = 100;
+                while(el.scrollWidth > el.clientWidth && fs > 50) {{ el.style.fontSize = (--fs)+"%"; }}
+            }});
+
+            const pages = document.querySelectorAll('.sheet');
+            if (pages.length >= 2) {{
+                const lastSheet = pages[pages.length - 1];
+                const prevSheet = pages[pages.length - 2];
+                if(lastSheet.querySelector('.rpt-title')) {{
+                    const lastBody = lastSheet.querySelector('.body-content');
+                    const prevBody = prevSheet.querySelector('.body-content');
+
+                    if (lastBody.scrollHeight < 150 && lastBody.innerText.trim().length > 0) {{
+                        prevBody.style.lineHeight = "1.3";
+                        prevBody.style.paddingBottom = "0px";
+                        const contentToMove = Array.from(lastBody.children);
+                        contentToMove.forEach(child => prevBody.appendChild(child.cloneNode(true)));
+
+                        if (prevBody.scrollHeight <= CONFIG.maxHeight + 5) {{
+                            lastSheet.remove();
+                        }} else {{
+                            for(let i=0; i<contentToMove.length; i++) prevBody.lastElementChild.remove();
+                            prevBody.style.lineHeight = "";
+                        }}
+                    }}
+                }}
+            }}
+
+            const rawContainerFinal = document.getElementById('raw-container');
+            if(rawContainerFinal) rawContainerFinal.remove();
+        }});
+    </script>
+</body>
+</html>'''
+
+
+# ===== 메인 함수 =====
+def generate_report_html(
+    md_path: Path,
+    json_path: Path,
+    output_path: Path,
+    include_toc: bool = True,
+    include_summary: bool = True,
+    cover_info: Optional[Dict[str, str]] = None
+):
+    """
+    MD와 JSON을 A4 HTML 보고서로 변환
+    
+    Args:
+        md_path: report_draft.md 경로
+        json_path: report_sections.json 경로
+        output_path: 출력할 report.html 경로
+        include_toc: 목차 포함 여부
+        include_summary: 요약 포함 여부
+        cover_info: 표지 정보 (date, author, department 등)
+    """
+    log("=== Step 9: MD → HTML 변환 시작 ===")
+    
+    # 1. 데이터 로드
+    log(f"JSON 로드: {json_path}")
+    report_title, sections = load_json_meta(json_path)
+    
+    log(f"MD 로드: {md_path}")
+    md_content = load_markdown(md_path)
+    
+    log(f"보고서 제목: {report_title}")
+    log(f"섹션 수: {len(sections)}")
+    
+    # 2. 이미지 맵 생성
+    image_map = build_image_map(sections)
+    log(f"이미지 자산 수: {len(image_map)}")
+    
+    # 3. 목차 추출
+    toc_items = extract_toc_from_md(md_content)
+    log(f"목차 항목 수: {len(toc_items)}")
+    
+    # 4. MD → HTML 변환
+    converter = MarkdownToHtmlConverter(image_map)
+    content_html = converter.convert_full_content(md_content)
+    
+    # 5. 박스별 콘텐츠 생성
+    
+    # box-cover (표지)
+    cover_date = cover_info.get('date', datetime.now().strftime('%Y.%m.%d')) if cover_info else datetime.now().strftime('%Y.%m.%d')
+    cover_author = cover_info.get('author', '') if cover_info else ''
+    cover_dept = cover_info.get('department', '') if cover_info else ''
+    
+    # 제목에서 부제목 분리 (: 기준)
+    title_parts = report_title.split(':')
+    main_title = title_parts[0].strip()
+    sub_title = title_parts[1].strip() if len(title_parts) > 1 else ""
+    
+    box_cover = f'''
+        <h1>{main_title}</h1>
+        <h2>{sub_title}</h2>
+        <p>{cover_date}</p>
+        {f'<p>{cover_author}</p>' if cover_author else ''}
+        {f'<p>{cover_dept}</p>' if cover_dept else ''}
+    '''
+    
+    # box-toc (목차)
+    box_toc = ""
+    if include_toc and toc_items:
+        box_toc = generate_toc_html(toc_items)
+        log(f"목차 HTML 생성 완료")
+    
+    # box-summary (요약) - 첫 번째 섹션을 요약으로 사용하거나 비워둠
+    box_summary = ""
+    if include_summary:
+        # 요약 섹션이 있으면 사용
+        for sec in sections:
+            if '요약' in sec.section_title or 'summary' in sec.section_title.lower():
+                summary_converter = MarkdownToHtmlConverter(image_map)
+                box_summary = f"<h1>요약</h1>\n{summary_converter.convert_full_content(sec.generated_text)}"
+                break
+    
+    # box-content (본문)
+    box_content = content_html
+    
+    # 6. 템플릿에 주입
+    template = get_html_template()
+    html_output = template.format(
+        report_title=report_title,
+        box_cover=box_cover,
+        box_toc=box_toc,
+        box_summary=box_summary,
+        box_content=box_content
+    )
+    
+    # 7. 파일 저장
+    output_path.write_text(html_output, encoding='utf-8')
+    
+    log(f"")
+    log(f"═══════════════════════════════════════════════════")
+    log(f"HTML 보고서 생성 완료!")
+    log(f"  출력 파일: {output_path}")
+    log(f"  파일 크기: {output_path.stat().st_size / 1024:.1f} KB")
+    log(f"═══════════════════════════════════════════════════")
+    log("=== Step 9 종료 ===")
+    
+    return output_path
+
+
+def main():
+    """CLI 진입점"""
+    parser = argparse.ArgumentParser(
+        description='MD + JSON → A4 HTML 보고서 변환',
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog='''
+예시:
+    python 9_md_to_html_publisher.py
+    python 9_md_to_html_publisher.py --md report_draft.md --json report_sections.json
+    python 9_md_to_html_publisher.py --no-toc --no-summary
+    python 9_md_to_html_publisher.py --cover-date "2026.01.15" --cover-author "홍길동"
+        '''
+    )
+    
+    parser.add_argument('--md', type=Path, default=DEFAULT_MD_PATH,
+                        help='입력 마크다운 파일 경로')
+    parser.add_argument('--json', type=Path, default=DEFAULT_JSON_PATH,
+                        help='입력 JSON 파일 경로')
+    parser.add_argument('--output', '-o', type=Path, default=DEFAULT_OUTPUT_PATH,
+                        help='출력 HTML 파일 경로')
+    parser.add_argument('--no-toc', action='store_true',
+                        help='목차 페이지 제외')
+    parser.add_argument('--no-summary', action='store_true',
+                        help='요약 페이지 제외')
+    parser.add_argument('--cover-date', type=str, default=None,
+                        help='표지 날짜 (예: 2026.01.15)')
+    parser.add_argument('--cover-author', type=str, default=None,
+                        help='표지 작성자')
+    parser.add_argument('--cover-dept', type=str, default=None,
+                        help='표지 부서명')
+    
+    args = parser.parse_args()
+    
+    # 표지 정보 구성
+    cover_info = {}
+    if args.cover_date:
+        cover_info['date'] = args.cover_date
+    if args.cover_author:
+        cover_info['author'] = args.cover_author
+    if args.cover_dept:
+        cover_info['department'] = args.cover_dept
+    
+    # 변환 실행
+    generate_report_html(
+        md_path=args.md,
+        json_path=args.json,
+        output_path=args.output,
+        include_toc=not args.no_toc,
+        include_summary=not args.no_summary,
+        cover_info=cover_info if cover_info else None
+    )
+
+
+if __name__ == "__main__":
+    main()
\ No newline at end of file
diff --git a/output/assets/1_1_1_img01.png b/output/assets/1_1_1_img01.png
new file mode 100644
index 0000000..d04d8a1
Binary files /dev/null and b/output/assets/1_1_1_img01.png differ
diff --git a/output/assets/1_1_1_img02.png b/output/assets/1_1_1_img02.png
new file mode 100644
index 0000000..6533ac1
Binary files /dev/null and b/output/assets/1_1_1_img02.png differ
diff --git a/output/assets/1_1_1_img03.png b/output/assets/1_1_1_img03.png
new file mode 100644
index 0000000..4b2f849
Binary files /dev/null and b/output/assets/1_1_1_img03.png differ
diff --git a/output/assets/1_1_2_img01.png b/output/assets/1_1_2_img01.png
new file mode 100644
index 0000000..d04d8a1
Binary files /dev/null and b/output/assets/1_1_2_img01.png differ
diff --git a/output/assets/1_1_2_img02.png b/output/assets/1_1_2_img02.png
new file mode 100644
index 0000000..6533ac1
Binary files /dev/null and b/output/assets/1_1_2_img02.png differ
diff --git a/output/assets/1_1_2_img03.png b/output/assets/1_1_2_img03.png
new file mode 100644
index 0000000..347f9c7
Binary files /dev/null and b/output/assets/1_1_2_img03.png differ
diff --git a/output/assets/1_1_3_img01.png b/output/assets/1_1_3_img01.png
new file mode 100644
index 0000000..f5a7ace
Binary files /dev/null and b/output/assets/1_1_3_img01.png differ
diff --git a/output/assets/1_1_3_img02.png b/output/assets/1_1_3_img02.png
new file mode 100644
index 0000000..eb39b34
Binary files /dev/null and b/output/assets/1_1_3_img02.png differ
diff --git a/output/assets/1_2_1_img03.png b/output/assets/1_2_1_img03.png
new file mode 100644
index 0000000..566898d
Binary files /dev/null and b/output/assets/1_2_1_img03.png differ
diff --git a/output/assets/1_2_2_img01.png b/output/assets/1_2_2_img01.png
new file mode 100644
index 0000000..67f3c1f
Binary files /dev/null and b/output/assets/1_2_2_img01.png differ
diff --git a/output/assets/1_2_2_img02.png b/output/assets/1_2_2_img02.png
new file mode 100644
index 0000000..a1caf43
Binary files /dev/null and b/output/assets/1_2_2_img02.png differ
diff --git a/output/assets/1_2_2_img03.png b/output/assets/1_2_2_img03.png
new file mode 100644
index 0000000..031ea68
Binary files /dev/null and b/output/assets/1_2_2_img03.png differ
diff --git a/templates/index.html b/templates/index.html
index 3268249..0be4db3 100644
--- a/templates/index.html
+++ b/templates/index.html
@@ -1073,6 +1073,7 @@
             font-size: 16px;
         }
     </style>
+    <link rel="stylesheet" href="/static/css/editor.css">
 </head>
 <body>
     <!-- 상단 툴바 -->
@@ -1081,10 +1082,10 @@
         
         <div class="toolbar-spacer"></div>
         
-        <button class="toolbar-btn" id="editBtn" onclick="toggleEditMode()">✏️ 편집하기</button>
-        
+        <button class="toolbar-btn" id="editModeBtn" onclick="toggleEditMode()">✏️ 편집하기</button>
+
         <div class="toolbar-divider"></div>
-        
+
         <select class="zoom-select" id="zoomSelect" onchange="setZoom(this.value)">
             <option value="50">50%</option>
             <option value="75">75%</option>
@@ -1092,10 +1093,12 @@
             <option value="125">125%</option>
             <option value="150">150%</option>
         </select>
-        
+
         <div class="toolbar-divider"></div>
-        
+
+        <button class="toolbar-btn" onclick="exportHwp()">📄 HWP 추출</button>
         <button class="toolbar-btn" onclick="saveHtml()">💾 HTML 저장</button>
+        <button class="toolbar-btn" disabled title="준비중">📊 PPT 저장</button>
         <button class="toolbar-btn" onclick="printDoc()">🖨️ PDF/인쇄</button>
     </div>
     
@@ -1299,10 +1302,9 @@
                         </div>
                         
                         <!-- 보고서 -->
-                        <div class="doc-type-item disabled" data-type="report">
-                            <input type="radio" name="docType" disabled>
-                            <span class="label">📄 보고서</span>
-                            <span class="badge">준비중</span>
+                            <div class="doc-type-item" data-type="report" onclick="selectDocType('report')">
+                                <input type="radio" name="docType">
+                                <span class="label">📄 보고서</span>
                             
                             <div class="doc-type-preview">
                                 <div class="preview-thumbnail report">
@@ -1373,15 +1375,15 @@
                         <div class="option-group">
                             <div class="option-item" onclick="selectPageOption('1')">
                                 <input type="radio" name="pages" value="1" id="page1">
-                                <label for="page1">1p (본문만)</label>
+                                <label for="page1"> (본문) 1p</label>
                             </div>
                             <div class="option-item selected" onclick="selectPageOption('2')">
                                 <input type="radio" name="pages" value="2" id="page2" checked>
-                                <label for="page2">1p + 1p 첨부</label>
+                                <label for="page2"> (본문) 1p + (첨부) 1p</label>
                             </div>
                             <div class="option-item" onclick="selectPageOption('n')">
                                 <input type="radio" name="pages" value="n" id="pageN">
-                                <label for="pageN">1p + np 첨부 (자동)</label>
+                                <label for="pageN"> (본문) 1p + (첨부) np</label>
                             </div>
                         </div>
                     </div>
@@ -1393,6 +1395,43 @@
                     </div>
                 </div>
                 
+                <!-- 보고서 옵션 -->
+                <div id="reportOptions" style="display:none;">
+                    <!-- 보고서 구성 -->
+                    <div class="option-section">
+                        <div class="option-title">보고서 구성</div>
+                        <div class="option-group">
+                            <div class="option-item" style="cursor:default;">
+                                <input type="checkbox" id="reportCover" checked>
+                                <label for="reportCover">📘 표지</label>
+                            </div>
+                            <div class="option-item" style="cursor:default;">
+                                <input type="checkbox" id="reportToc" checked>
+                                <label for="reportToc">📑 목차</label>
+                            </div>
+                            <div class="option-item" style="cursor:default;">
+                                <input type="checkbox" id="reportDivider">
+                                <label for="reportDivider">📄 간지</label>
+                            </div>
+                            <div class="option-item" style="cursor:default; opacity:0.6;">
+                                <input type="checkbox" id="reportContent" checked disabled>
+                                <label for="reportContent">📝 내지 (필수)</label>
+                            </div>
+                        </div>
+                    </div>
+                    
+                    <!-- 요청사항 -->
+                    <div class="option-section">
+                        <div class="option-title">요청사항</div>
+                        <textarea class="request-textarea" id="reportInstructionInput" placeholder="예: 요약을 상세하게 작성해줘&#10;예: 표지에 로고 추가"></textarea>
+                    </div>
+                </div>
+
+
+
+
+
+
                 <!-- 생성 버튼 -->
                 <button class="generate-btn" id="generateBtn" onclick="generate()" disabled>
                     <span id="generateBtnText">🚀 생성하기</span>
@@ -1463,7 +1502,6 @@
         let generatedHTML = '';
         let currentDocType = 'briefing';
         let currentPageOption = '2';
-        let isEditing = false;
         let currentZoom = 100;
         let folderPath = '';
         let referenceLinks = [];
@@ -1472,6 +1510,53 @@
         let selectedText = '';
         let selectedRange = null;
 
+        // ===== HWP 추출 =====
+        async function exportHwp() {
+            if (!generatedHTML) {
+                alert('먼저 문서를 생성해주세요.');
+                return;
+            }
+            
+            // 현재 편집된 HTML 가져오기
+            const frame = document.getElementById('previewFrame');
+            const html = frame.contentDocument ? 
+                '<!DOCTYPE html>' + frame.contentDocument.documentElement.outerHTML : 
+                generatedHTML;
+            
+            setStatus('HWP 변환 중...', true);
+            
+            try {
+                const response = await fetch('/export-hwp', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        html: html,
+                        doc_type: currentDocType
+                    })
+                });
+                
+                if (!response.ok) {
+                    const error = await response.json();
+                    throw new Error(error.error || 'HWP 변환 실패');
+                }
+                
+                // 파일 다운로드
+                const blob = await response.blob();
+                const url = URL.createObjectURL(blob);
+                const a = document.createElement('a');
+                a.href = url;
+                a.download = `report_${new Date().toISOString().slice(0,10)}.hwp`;
+                a.click();
+                URL.revokeObjectURL(url);
+                
+                setStatus('HWP 변환 완료', true);
+                
+            } catch (error) {
+                alert('HWP 변환 오류: ' + error.message);
+                setStatus('오류 발생', false);
+            }
+        }
+
         // iframe 로드 후 선택 이벤트 연결
         function setupIframeSelection() {
             const frame = document.getElementById('previewFrame');
@@ -1815,8 +1900,8 @@
 
         // ===== 문서 유형 선택 =====
         function selectDocType(type) {
-            if (type !== 'briefing') {
-                return; // disabled 항목 클릭 무시
+            if (type === 'presentation') {
+                return; // PPT만 disabled
             }
             
             currentDocType = type;
@@ -1827,6 +1912,10 @@
                     item.querySelector('input[type="radio"]').checked = true;
                 }
             });
+            
+            // 옵션 패널 표시/숨김
+            document.getElementById('briefingOptions').style.display = (type === 'briefing') ? 'block' : 'none';
+            document.getElementById('reportOptions').style.display = (type === 'report') ? 'block' : 'none';
         }
 
         // ===== 템플릿 추가 =====
@@ -1846,6 +1935,15 @@
 
         // ===== 생성 =====
         async function generate() {
+            if (currentDocType === 'briefing') {
+                await generateBriefing();
+            } else if (currentDocType === 'report') {
+                await generateReport();
+            }
+        }
+
+        // ===== 기획서 생성 (기존 로직) =====
+        async function generateBriefing() {
             if (!inputContent && !folderPath && referenceLinks.length === 0) {
                 alert('먼저 폴더 위치, 참고 링크, 또는 HTML을 입력해주세요.');
                 return;
@@ -1900,20 +1998,15 @@
                 
                 if (data.success && data.html) {
                     generatedHTML = data.html;
-                    
-                    // 미리보기 표시
                     document.getElementById('placeholder').style.display = 'none';
                     const frame = document.getElementById('previewFrame');
                     frame.classList.add('active');
                     frame.srcdoc = generatedHTML;
-                    setTimeout(setupIframeSelection, 500);
-
-                    // 피드백 바 표시
+                    setTimeout(setupIframeSelection, 500);  // ← 이 줄 추가
                     document.getElementById('feedbackBar').classList.add('show');
-                    
                     setStatus('생성 완료', true);
                 }
-                
+                                
             } catch (error) {
                 alert('생성 오류: ' + error.message);
                 setStatus('오류 발생', false);
@@ -1931,8 +2024,85 @@
             }
         }
 
+        // ===== 보고서 생성 (새로 추가) =====
+        async function generateReport() {
+            if (!folderPath && !inputContent) {
+                alert('폴더 위치 또는 HTML을 입력해주세요.');
+                return;
+            }
+            
+            const btn = document.getElementById('generateBtn');
+            const btnText = document.getElementById('generateBtnText');
+            const spinner = document.getElementById('generateSpinner');
+            
+            btn.disabled = true;
+            btnText.textContent = '생성 중...';
+            spinner.style.display = 'block';
+            resetSteps();
+            
+            // 체크박스 값 수집
+            const options = {
+                content: inputContent,  // ← 추가!
+                folder_path: folderPath,
+                cover: document.getElementById('reportCover').checked,
+                toc: document.getElementById('reportToc').checked,
+                divider: document.getElementById('reportDivider').checked,
+                instruction: document.getElementById('reportInstructionInput').value
+            };
+            
+            setStatus('보고서 생성 중...', true);
+            
+            try {
+                // Step 1~9 진행 표시
+                for (let i = 1; i <= 9; i++) {
+                    updateStep(i, 'running');
+                    await new Promise(r => setTimeout(r, 500));
+                    // TODO: 실제 API 호출
+                    updateStep(i, 'done');
+                }
+                
+                const response = await fetch('/generate-report', {
+                    method: 'POST',
+                    headers: { 'Content-Type': 'application/json' },
+                    body: JSON.stringify({
+                        content: inputContent,  // HTML 내용 추가
+                        folder_path: folderPath,
+                        cover: document.getElementById('reportCover').checked,
+                        toc: document.getElementById('reportToc').checked,
+                        divider: document.getElementById('reportDivider').checked,
+                        instruction: document.getElementById('reportInstructionInput').value
+                    })
+                });
+                
+                const data = await response.json();
+                
+                if (data.error) {
+                    throw new Error(data.error);
+                }
+                
+                if (data.success && data.html) {
+                    generatedHTML = data.html;
+                    document.getElementById('placeholder').style.display = 'none';
+                    const frame = document.getElementById('previewFrame');
+                    frame.classList.add('active');
+                    frame.srcdoc = generatedHTML;
+                    setTimeout(setupIframeSelection, 500);  // ← 추가!
+                    document.getElementById('feedbackBar').classList.add('show');
+                    setStatus('생성 완료', true);
+                }
+                
+            } catch (error) {
+                alert('생성 오류: ' + error.message);
+                setStatus('오류 발생', false);
+            } finally {
+                btn.disabled = false;
+                btnText.textContent = '🚀 생성하기';
+                spinner.style.display = 'none';
+            }
+        }
+
         // ===== 피드백 수정 =====
-async function submitFeedback() {
+        async function submitFeedback() {
             const feedback = document.getElementById('feedbackInput').value.trim();
             if (!feedback) {
                 alert('수정 내용을 입력해주세요.');
@@ -1998,28 +2168,6 @@ async function submitFeedback() {
             }
         }
 
-        // ===== 편집 모드 =====
-        function toggleEditMode() {
-            isEditing = !isEditing;
-            const btn = document.getElementById('editBtn');
-            const formatBar = document.getElementById('formatBar');
-            const frame = document.getElementById('previewFrame');
-            
-            btn.classList.toggle('active', isEditing);
-            formatBar.classList.toggle('active', isEditing);
-            
-            if (frame.contentDocument) {
-                frame.contentDocument.designMode = isEditing ? 'on' : 'off';
-            }
-        }
-
-        function formatText(command) {
-            const frame = document.getElementById('previewFrame');
-            if (frame.contentDocument) {
-                frame.contentDocument.execCommand(command, false, null);
-            }
-        }
-
         // ===== 줌 =====
         function setZoom(value) {
             currentZoom = parseInt(value);
@@ -2094,6 +2242,6 @@ async function submitFeedback() {
         <textarea class="ai-edit-input" id="aiEditInput" rows="3" placeholder="예: 한 줄로 요약해줘&#10;예: 표 형태로 만들어줘"></textarea>
         <button class="ai-edit-btn" onclick="submitAiEdit()">✨ 수정하기</button>
     </div>
-
+    <script src="/static/js/editor.js"></script>
 </body>
 </html>
\ No newline at end of file