v4:코드모듈화_20260123

2026-02-20 11:34:02 +09:00
parent a990081287
commit 17e639ed40
24 changed files with 5412 additions and 1054 deletions
--- a/converters/dkdl.py
+++ b/converters/dkdl.py
@@ -0,0 +1,37 @@
+from pyhwpx import Hwp
+
+hwp = Hwp()
+hwp.FileNew()
+
+# HTML 헤딩 레벨 → 한글 기본 스타일 매핑
+heading_style_map = {
+    'h1': 1,   # 개요 1
+    'h2': 2,   # 개요 2  
+    'h3': 3,   # 개요 3
+    'h4': 4,   # 개요 4
+    'h5': 5,   # 개요 5
+    'h6': 6,   # 개요 6
+}
+
+def apply_heading_style(text, tag):
+    """HTML 태그에 맞는 스타일 적용"""
+    hwp.insert_text(text)
+    hwp.HAction.Run("MoveLineBegin")
+    hwp.HAction.Run("MoveSelLineEnd")
+    
+    # 해당 태그의 스타일 번호로 적용
+    style_num = heading_style_map.get(tag, 0)
+    if style_num:
+        hwp.HAction.Run(f"StyleShortcut{style_num}")
+    
+    hwp.HAction.Run("MoveLineEnd")
+    hwp.BreakPara()
+
+# 테스트
+apply_heading_style("1장 서론", 'h1')
+apply_heading_style("1.1 연구의 배경", 'h2')
+apply_heading_style("1.1.1 세부 내용", 'h3')
+apply_heading_style("본문 텍스트", 'p')  # 일반 텍스트
+
+hwp.SaveAs(r"D:\test_output.hwp")
+print("완료!")
--- a/converters/html_to_hwp.py
+++ b/converters/html_to_hwp.py
@@ -13,6 +13,11 @@ from pyhwpx import Hwp
 from bs4 import BeautifulSoup, NavigableString
 import os, re

+# 스타일 그루핑 시스템 추가
+from converters.style_analyzer import StyleAnalyzer, StyledElement
+from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME
+
+
 # PIL 선택적 import (이미지 크기 확인용)
 try:
    from PIL import Image
@@ -25,9 +30,12 @@ class Config:
    MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
    HEADER_LEN, FOOTER_LEN = 10, 10
    MAX_IMAGE_WIDTH = 150  # mm (최대 이미지 너비)
+    ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets"  # 🆕 추가

 class StyleParser:
    def __init__(self):
+        self.style_map = {}  # 스타일 매핑 (역할 → HwpStyle)
+        self.sty_gen = None  # 스타일 생성기
        self.class_styles = {
            'h1': {'font-size': '20pt', 'color': '#008000'},
            'h2': {'font-size': '16pt', 'color': '#03581d'},
@@ -62,6 +70,34 @@ class StyleParser:
    
    def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']

+# ═══════════════════════════════════════════════════════════════
+# 번호 제거 유틸리티
+# ═══════════════════════════════════════════════════════════════
+
+NUMBERING_PATTERNS = {
+    'H1': re.compile(r'^(\d+)\.\s*'),           # "1. " → ""
+    'H2': re.compile(r'^(\d+)\.(\d+)\s*'),      # "1.1 " → ""
+    'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " → ""
+    'H4': re.compile(r'^[가-하]\.\s*'),          # "가. " → ""
+    'H5': re.compile(r'^(\d+)\)\s*'),           # "1) " → ""
+    'H6': re.compile(r'^\((\d+)\)\s*'),         # "(1) " → ""
+    'H7': re.compile(r'^[①②③④⑤⑥⑦⑧⑨⑩]\s*'),  # "① " → ""
+    'LIST_ITEM': re.compile(r'^[•\-○]\s*'),    # "• " → ""
+}
+
+def strip_numbering(text: str, role: str) -> str:
+    """
+    역할에 따라 텍스트 앞의 번호/기호 제거
+    HWP 개요 기능이 번호를 자동 생성하므로 중복 방지
+    """
+    if not text:
+        return text
+    
+    pattern = NUMBERING_PATTERNS.get(role)
+    if pattern:
+        return pattern.sub('', text).strip()
+    
+    return text.strip()

 class HtmlToHwpConverter:
    def __init__(self, visible=True):
@@ -71,6 +107,8 @@ class HtmlToHwpConverter:
        self.base_path = ""
        self.is_first_h1 = True
        self.image_count = 0
+        self.style_map = {}  # 역할 → 스타일 이름 매핑
+        self.sty_path = None  # .sty 파일 경로
    
    def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
    def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
@@ -155,6 +193,80 @@ class HtmlToHwpConverter:
        except Exception as e:
            print(f"    [경고] 구역 머리말: {e}")

+    # 스타일 적용 관련 (🆕 NEW)
+
+    def _load_style_template(self, sty_path: str):
+        """
+        .sty 스타일 템플릿 로드
+        HWP에서 스타일 불러오기 기능 사용
+        """
+        if not os.path.exists(sty_path):
+            print(f"  [경고] 스타일 파일 없음: {sty_path}")
+            return False
+        
+        try:
+            # HWP 스타일 불러오기
+            self.hwp.HAction.GetDefault("StyleTemplate", self.hwp.HParameterSet.HStyleTemplate.HSet)
+            self.hwp.HParameterSet.HStyleTemplate.filename = sty_path
+            self.hwp.HAction.Execute("StyleTemplate", self.hwp.HParameterSet.HStyleTemplate.HSet)
+            print(f"  ✅ 스타일 템플릿 로드: {sty_path}")
+            return True
+        except Exception as e:
+            print(f"  [경고] 스타일 로드 실패: {e}")
+            return False
+
+
+    def _apply_style_by_name(self, style_name: str):
+        """
+        현재 문단에 스타일 이름으로 적용
+        텍스트 삽입 후 호출
+        """
+        try:
+            # 현재 문단 선택
+            self.hwp.HAction.Run("MoveLineBegin")
+            self.hwp.HAction.Run("MoveSelLineEnd")
+            
+            # 스타일 적용
+            self.hwp.HAction.GetDefault("Style", self.hwp.HParameterSet.HStyle.HSet)
+            self.hwp.HParameterSet.HStyle.StyleName = style_name
+            self.hwp.HAction.Execute("Style", self.hwp.HParameterSet.HStyle.HSet)
+            
+            # 커서 문단 끝으로
+            self.hwp.HAction.Run("MoveLineEnd")
+            
+        except Exception as e:
+            print(f"  [경고] 스타일 적용 실패 '{style_name}': {e}")
+
+
+    def _build_dynamic_style_map(self, elements: list):
+        """HTML 분석 결과 기반 동적 스타일 매핑 생성 (숫자)"""
+        roles = set(elem.role for elem in elements)
+        
+        # 제목 역할 정렬 (H1, H2, H3...)
+        title_roles = sorted([r for r in roles if r.startswith('H') and r[1:].isdigit()],
+                            key=lambda x: int(x[1:]))
+        
+        # 기타 역할
+        other_roles = [r for r in roles if r not in title_roles]
+        
+        # 순차 할당 (개요 1~10)
+        self.style_map = {}
+        style_num = 1
+        
+        for role in title_roles:
+            if style_num <= 10:
+                self.style_map[role] = style_num
+                style_num += 1
+        
+        for role in other_roles:
+            if style_num <= 10:
+                self.style_map[role] = style_num
+                style_num += 1
+        
+        print(f"  📝 동적 스타일 매핑: {self.style_map}")
+        return self.style_map
+
+

    def _set_font(self, size=11, bold=False, color='#000000'):
        self.hwp.set_font(FaceName='맑은 고딕', Height=size, Bold=bold, TextColor=self._rgb(color))
@@ -372,16 +484,22 @@ class HtmlToHwpConverter:
    # ═══════════════════════════════════════════════════════════════
    def _insert_image(self, src, caption=""):
        self.image_count += 1
-        print(f"    📷 이미지 #{self.image_count}: {os.path.basename(src)}")
        
        if not src:
            return
        
-        # 상대경로 → 절대경로
-        if not os.path.isabs(src):
-            full_path = os.path.normpath(os.path.join(self.base_path, src))
-        else:
-            full_path = src
+        # 🆕 assets 폴더에서 먼저 찾기
+        filename = os.path.basename(src)
+        full_path = os.path.join(self.cfg.ASSETS_PATH, filename)
+        
+        # assets에 없으면 기존 방식으로 fallback
+        if not os.path.exists(full_path):
+            if not os.path.isabs(src):
+                full_path = os.path.normpath(os.path.join(self.base_path, src))
+            else:
+                full_path = src
+        
+        print(f"    📷 이미지 #{self.image_count}: {filename}")
        
        if not os.path.exists(full_path):
            print(f"       ❌ 파일 없음: {full_path}")
@@ -450,7 +568,123 @@ class HtmlToHwpConverter:
                
        except Exception as e:
            print(f"       ❌ 오류: {e}")
-    
+
+    def _insert_table_from_element(self, elem: 'StyledElement'):
+        """StyledElement에서 표 삽입 (수정됨)"""
+        table_data = elem.attributes.get('table_data', {})
+        if not table_data:
+            return
+        
+        rows = table_data.get('rows', [])
+        if not rows:
+            return
+        
+        num_rows = len(rows)
+        num_cols = max(len(row) for row in rows) if rows else 1
+        
+        print(f"  → 표 삽입: {num_rows}행 × {num_cols}열")
+        
+        try:
+            # 1. 표 앞에 문단 설정
+            self._set_para('left', 130, before=5, after=0)
+            
+            # 2. 표 생성 (pyhwpx 내장 메서드 사용)
+            self.hwp.create_table(num_rows, num_cols, treat_as_char=True)
+            
+            # 3. 셀별 데이터 입력
+            for row_idx, row in enumerate(rows):
+                for col_idx, cell in enumerate(row):
+                    # 셀 건너뛰기 (병합된 셀)
+                    if col_idx >= len(row):
+                        self.hwp.HAction.Run("TableRightCell")
+                        continue
+                    
+                    cell_text = cell.get('text', '')
+                    is_header = cell.get('is_header', False)
+                    
+                    # 헤더 셀 스타일
+                    if is_header:
+                        self._set_cell_bg('#E8F5E9')
+                        self.hwp.HAction.Run("ParagraphShapeAlignCenter")
+                        self._set_font(9, True, '#006400')
+                    else:
+                        self._set_font(9.5, False, '#333333')
+                    
+                    # 텍스트 입력
+                    self.hwp.insert_text(cell_text)
+                    
+                    # 다음 셀로 (마지막 셀 제외)
+                    if not (row_idx == num_rows - 1 and col_idx == num_cols - 1):
+                        self.hwp.HAction.Run("TableRightCell")
+            
+            # 4. ★ 표 빠져나오기 (핵심!)
+            self.hwp.HAction.Run("Cancel")        # 선택 해제
+            self.hwp.HAction.Run("CloseEx")       # 표 편집 종료
+            self.hwp.HAction.Run("MoveDocEnd")    # 문서 끝으로
+            
+            # 5. 표 뒤 문단
+            self._set_para('left', 130, before=5, after=5)
+            self.hwp.BreakPara()
+            
+            print(f"    ✅ 표 삽입 완료")
+            
+        except Exception as e:
+            print(f"    [오류] 표 삽입 실패: {e}")
+            # 표 안에 갇혔을 경우 탈출 시도
+            try:
+                self.hwp.HAction.Run("Cancel")
+                self.hwp.HAction.Run("CloseEx")
+                self.hwp.HAction.Run("MoveDocEnd")
+            except:
+                pass
+
+    def _move_to_cell(self, row: int, col: int):
+        """표에서 특정 셀로 이동"""
+        # 첫 셀로 이동
+        self.hwp.HAction.Run("TableColBegin")
+        self.hwp.HAction.Run("TableRowBegin")
+        
+        # row만큼 아래로
+        for _ in range(row):
+            self.hwp.HAction.Run("TableLowerCell")
+        
+        # col만큼 오른쪽으로
+        for _ in range(col):
+            self.hwp.HAction.Run("TableRightCell")
+
+    def _apply_cell_style(self, bold=False, bg_color=None, align='left'):
+        """현재 셀 스타일 적용"""
+        # 글자 굵기
+        if bold:
+            self.hwp.HAction.Run("CharShapeBold")
+        
+        # 정렬
+        align_actions = {
+            'left': "ParagraphShapeAlignLeft",
+            'center': "ParagraphShapeAlignCenter",
+            'right': "ParagraphShapeAlignRight",
+        }
+        if align in align_actions:
+            self.hwp.HAction.Run(align_actions[align])
+        
+        # 배경색
+        if bg_color:
+            self._apply_cell_bg(bg_color)
+
+    def _apply_cell_bg(self, color: str):
+        """셀 배경색 적용"""
+        try:
+            color = color.lstrip('#')
+            r, g, b = int(color[0:2], 16), int(color[2:4], 16), int(color[4:6], 16)
+            
+            self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+            self.hwp.HParameterSet.HCellBorderFill.FillAttr.FillType = 1  # 단색
+            self.hwp.HParameterSet.HCellBorderFill.FillAttr.WinBrush.FaceColor = self.hwp.RGBColor(r, g, b)
+            self.hwp.HAction.Execute("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
+        except Exception as e:
+            print(f"    [경고] 셀 배경색: {e}")
+
+
    def _insert_highlight_box(self, elem):
        txt = elem.get_text(strip=True)
        if not txt: return
@@ -551,19 +785,225 @@ class HtmlToHwpConverter:
        print(f"\n✅ 저장: {output_path}")
        print(f"   이미지: {self.image_count}개 처리")
    
+    def convert_with_styles(self, html_path, output_path, sty_path=None):
+        """
+        스타일 그루핑이 적용된 HWP 변환
+        
+        ✅ 수정: 기존 convert() 로직 + 스타일 적용
+        """
+        print("="*60)
+        print("HTML → HWP 변환기 v11 (스타일 그루핑)")
+        print("="*60)
+        
+        self.base_path = os.path.dirname(os.path.abspath(html_path))
+        self.is_first_h1 = True
+        self.image_count = 0
+        
+        # 1. HTML 파일 읽기
+        with open(html_path, 'r', encoding='utf-8') as f:
+            html_content = f.read()
+        
+        # 2. 스타일 분석
+        from converters.style_analyzer import StyleAnalyzer
+        from converters.hwp_style_mapping import HwpStyGenerator
+        
+        analyzer = StyleAnalyzer()
+        elements = analyzer.analyze(html_content)
+        html_styles = analyzer.extract_css_styles(html_content)
+        
+        print(f"\n📊 분석 결과: {len(elements)}개 요소")
+        for role, count in analyzer.get_role_summary().items():
+            print(f"   {role}: {count}")
+        
+        # 3. 스타일 매핑 생성
+        sty_gen = HwpStyGenerator()
+        sty_gen.update_from_html(html_styles)
+        self.style_map = sty_gen.apply_to_hwp(self.hwp)  # Dict[str, HwpStyle]
+        self.sty_gen = sty_gen  # 나중에 사용
+        
+        # 4. ★ 기존 convert() 로직 그대로 사용 ★
+        soup = BeautifulSoup(html_content, 'html.parser')
+        
+        title_tag = soup.find('title')
+        if title_tag:
+            full_title = title_tag.get_text(strip=True)
+            footer_title = full_title.split(':')[0].strip()
+        else:
+            footer_title = ""
+        
+        self.hwp.FileNew()
+        self._setup_page()
+        self._create_footer(footer_title)
+        
+        raw = soup.find(id='raw-container')
+        if raw:
+            cover = raw.find(id='box-cover')
+            if cover:
+                print("  → 표지")
+                for ch in cover.children:
+                    self._process(ch)
+                self.hwp.HAction.Run("BreakPage")
+            
+            toc = raw.find(id='box-toc')
+            if toc:
+                print("  → 목차")
+                self.is_first_h1 = True
+                self._underline_box("목 차", 20, '#008000')
+                self.hwp.BreakPara()
+                self.hwp.BreakPara()
+                self._insert_list(toc.find('ul') or toc)
+                self.hwp.HAction.Run("BreakPage")
+            
+            summary = raw.find(id='box-summary')
+            if summary:
+                print("  → 요약")
+                self.is_first_h1 = True
+                self._process(summary)
+                self.hwp.HAction.Run("BreakPage")
+            
+            content = raw.find(id='box-content')
+            if content:
+                print("  → 본문")
+                self.is_first_h1 = True
+                self._process(content)
+        else:
+            self._process(soup.find('body') or soup)
+        
+        # 5. 저장
+        self.hwp.SaveAs(output_path)
+        print(f"\n✅ 저장: {output_path}")
+        print(f"   이미지: {self.image_count}개 처리")
+
+
+    def _insert_styled_element(self, elem: 'StyledElement'):
+        """스타일이 지정된 요소 삽입 (수정됨)"""
+        role = elem.role
+        text = elem.text
+        
+        # ═══ 특수 요소 처리 ═══
+        
+        # 그림
+        if role == 'FIGURE':
+            src = elem.attributes.get('src', '')
+            if src:
+                self._insert_image(src)
+            return
+        
+        # 표
+        if role == 'TABLE':
+            self._insert_table_from_element(elem)
+            return
+        
+        # 표 셀/캡션은 TABLE에서 처리
+        if role in ['TH', 'TD']:
+            return
+        
+        # 빈 텍스트 스킵
+        if not text:
+            return
+        
+        # ═══ 텍스트 요소 처리 ═══
+        
+        # 번호 제거 (HWP 개요가 자동 생성하면)
+        # clean_text = strip_numbering(text, role)  # 필요시 활성화
+        clean_text = text  # 일단 원본 유지
+        
+        # 1. 스타일 설정 가져오기
+        style_config = self._get_style_config(role)
+        
+        # 2. 문단 모양 먼저 적용
+        self._set_para(
+            align=style_config.get('align', 'justify'),
+            lh=style_config.get('line_height', 160),
+            left=style_config.get('indent_left', 0),
+            indent=style_config.get('indent_first', 0),
+            before=style_config.get('space_before', 0),
+            after=style_config.get('space_after', 0)
+        )
+        
+        # 3. 글자 모양 적용
+        self._set_font(
+            size=style_config.get('font_size', 11),
+            bold=style_config.get('bold', False),
+            color=style_config.get('color', '#000000')
+        )
+        
+        # 4. 텍스트 삽입
+        self.hwp.insert_text(clean_text)
+        
+        # 5. 스타일 적용 (F6 목록에서 참조되도록)
+        style_name = self.style_map.get(role)
+        if style_name:
+            try:
+                self.hwp.HAction.Run("MoveLineBegin")
+                self.hwp.HAction.Run("MoveSelLineEnd")
+                self.hwp.HAction.GetDefault("Style", self.hwp.HParameterSet.HStyle.HSet)
+                self.hwp.HParameterSet.HStyle.StyleName = style_name
+                self.hwp.HAction.Execute("Style", self.hwp.HParameterSet.HStyle.HSet)
+                self.hwp.HAction.Run("MoveLineEnd")
+            except:
+                pass  # 스타일 없으면 무시
+        
+        # 6. 줄바꿈
+        self.hwp.BreakPara()
+
+
+    def _get_style_config(self, role: str) -> dict:
+        """역할에 따른 스타일 설정 반환"""
+        
+        STYLE_CONFIGS = {
+            # 표지
+            'COVER_TITLE': {'font_size': 32, 'bold': True, 'align': 'center', 'color': '#1a365d', 'space_before': 20, 'space_after': 10},
+            'COVER_SUBTITLE': {'font_size': 18, 'bold': False, 'align': 'center', 'color': '#555555'},
+            'COVER_INFO': {'font_size': 12, 'align': 'center', 'color': '#666666'},
+            
+            # 목차
+            'TOC_H1': {'font_size': 12, 'bold': True, 'indent_left': 0},
+            'TOC_H2': {'font_size': 11, 'indent_left': 5},
+            'TOC_H3': {'font_size': 10, 'indent_left': 10, 'color': '#666666'},
+            
+            # 제목 계층
+            'H1': {'font_size': 20, 'bold': True, 'align': 'left', 'color': '#008000', 'space_before': 15, 'space_after': 8},
+            'H2': {'font_size': 16, 'bold': True, 'align': 'left', 'color': '#03581d', 'space_before': 12, 'space_after': 6},
+            'H3': {'font_size': 13, 'bold': True, 'align': 'left', 'color': '#228B22', 'space_before': 10, 'space_after': 5},
+            'H4': {'font_size': 12, 'bold': True, 'align': 'left', 'indent_left': 3, 'space_before': 8, 'space_after': 4},
+            'H5': {'font_size': 11, 'bold': True, 'align': 'left', 'indent_left': 6, 'space_before': 6, 'space_after': 3},
+            'H6': {'font_size': 11, 'bold': False, 'align': 'left', 'indent_left': 9},
+            'H7': {'font_size': 10.5, 'bold': False, 'align': 'left', 'indent_left': 12},
+            
+            # 본문
+            'BODY': {'font_size': 11, 'align': 'justify', 'line_height': 180, 'indent_first': 3},
+            'LIST_ITEM': {'font_size': 11, 'align': 'left', 'indent_left': 5},
+            'HIGHLIGHT_BOX': {'font_size': 10.5, 'align': 'left', 'indent_left': 3},
+            
+            # 표
+            'TH': {'font_size': 9, 'bold': True, 'align': 'center', 'color': '#006400'},
+            'TD': {'font_size': 9.5, 'align': 'left'},
+            'TABLE_CAPTION': {'font_size': 10, 'bold': True, 'align': 'center'},
+            
+            # 그림
+            'FIGURE': {'align': 'center'},
+            'FIGURE_CAPTION': {'font_size': 9.5, 'align': 'center', 'color': '#666666'},
+            
+            # 기타
+            'UNKNOWN': {'font_size': 11, 'align': 'left'},
+        }
+        
+        return STYLE_CONFIGS.get(role, STYLE_CONFIGS['UNKNOWN'])
+
    def close(self):
        try: self.hwp.Quit()
        except: pass

-
 def main():
    html_path = r"D:\for python\survey_test\output\generated\report.html"
-    output_path = r"D:\for python\survey_test\output\generated\report_v12.hwp"
+    output_path = r"D:\for python\survey_test\output\generated\report_styled.hwp"
+    sty_path = r"D:\for python\survey_test\교통영향평가스타일.sty"  # 🆕 추가
    
    try:
        conv = HtmlToHwpConverter(visible=True)
-        conv.convert(html_path, output_path)
-        input("\nEnter를 누르면 HWP가 닫힙니다...")  # ← 선택사항
+        conv.convert_with_styles(html_path, output_path, sty_path)  # 🆕 sty_path 추가
+        input("\nEnter를 누르면 HWP가 닫힙니다...")
        conv.close()
    except Exception as e:
        print(f"\n[에러] {e}")
--- a/converters/html_to_hwp_briefing.py
+++ b/converters/html_to_hwp_briefing.py
--- a/converters/hwp_style_mapping.py
+++ b/converters/hwp_style_mapping.py
@@ -0,0 +1,434 @@
+# -*- coding: utf-8 -*-
+"""
+HWP 스타일 매핑 모듈 v2.0
+HTML 역할(Role) → HWP 스타일 매핑
+
+✅ v2.0 변경사항:
+- pyhwpx API에 맞게 apply_to_hwp() 재작성
+- CharShape/ParaShape 직접 설정 방식
+- 역할 → 개요 스타일 매핑
+"""
+
+from dataclasses import dataclass
+from typing import Dict, Optional
+from enum import Enum
+
+
+class HwpStyleType(Enum):
+    """HWP 스타일 유형"""
+    PARAGRAPH = "paragraph"
+    CHARACTER = "character"
+
+
+@dataclass
+class HwpStyle:
+    """HWP 스타일 정의"""
+    id: int
+    name: str
+    type: HwpStyleType
+    font_size: float
+    font_bold: bool = False
+    font_color: str = "000000"
+    align: str = "justify"
+    line_spacing: float = 160
+    space_before: float = 0
+    space_after: float = 0
+    indent_left: float = 0
+    indent_first: float = 0
+    bg_color: Optional[str] = None
+
+
+# =============================================================================
+# 기본 스타일 템플릿
+# =============================================================================
+DEFAULT_STYLES: Dict[str, HwpStyle] = {
+    # 표지
+    "COVER_TITLE": HwpStyle(
+        id=100, name="표지제목", type=HwpStyleType.PARAGRAPH,
+        font_size=32, font_bold=True, align="center",
+        space_before=20, space_after=10, font_color="1a365d"
+    ),
+    "COVER_SUBTITLE": HwpStyle(
+        id=101, name="표지부제", type=HwpStyleType.PARAGRAPH,
+        font_size=18, font_bold=False, align="center",
+        font_color="555555"
+    ),
+    "COVER_INFO": HwpStyle(
+        id=102, name="표지정보", type=HwpStyleType.PARAGRAPH,
+        font_size=12, align="center", font_color="666666"
+    ),
+    
+    # 목차
+    "TOC_H1": HwpStyle(
+        id=110, name="목차1수준", type=HwpStyleType.PARAGRAPH,
+        font_size=12, font_bold=True, indent_left=0
+    ),
+    "TOC_H2": HwpStyle(
+        id=111, name="목차2수준", type=HwpStyleType.PARAGRAPH,
+        font_size=11, indent_left=20
+    ),
+    "TOC_H3": HwpStyle(
+        id=112, name="목차3수준", type=HwpStyleType.PARAGRAPH,
+        font_size=10, indent_left=40, font_color="666666"
+    ),
+    
+    # 제목 계층 (개요 1~7 매핑)
+    "H1": HwpStyle(
+        id=1, name="개요 1", type=HwpStyleType.PARAGRAPH,
+        font_size=20, font_bold=True, align="left",
+        space_before=30, space_after=15, font_color="1a365d"
+    ),
+    "H2": HwpStyle(
+        id=2, name="개요 2", type=HwpStyleType.PARAGRAPH,
+        font_size=16, font_bold=True, align="left",
+        space_before=20, space_after=10, font_color="2c5282"
+    ),
+    "H3": HwpStyle(
+        id=3, name="개요 3", type=HwpStyleType.PARAGRAPH,
+        font_size=14, font_bold=True, align="left",
+        space_before=15, space_after=8, font_color="2b6cb0"
+    ),
+    "H4": HwpStyle(
+        id=4, name="개요 4", type=HwpStyleType.PARAGRAPH,
+        font_size=12, font_bold=True, align="left",
+        space_before=10, space_after=5, indent_left=10
+    ),
+    "H5": HwpStyle(
+        id=5, name="개요 5", type=HwpStyleType.PARAGRAPH,
+        font_size=11, font_bold=True, align="left",
+        space_before=8, space_after=4, indent_left=20
+    ),
+    "H6": HwpStyle(
+        id=6, name="개요 6", type=HwpStyleType.PARAGRAPH,
+        font_size=11, font_bold=False, align="left",
+        indent_left=30
+    ),
+    "H7": HwpStyle(
+        id=7, name="개요 7", type=HwpStyleType.PARAGRAPH,
+        font_size=10.5, font_bold=False, align="left",
+        indent_left=40
+    ),
+    
+    # 본문
+    "BODY": HwpStyle(
+        id=20, name="바탕글", type=HwpStyleType.PARAGRAPH,
+        font_size=11, align="justify",
+        line_spacing=180, indent_first=10
+    ),
+    "LIST_ITEM": HwpStyle(
+        id=8, name="개요 8", type=HwpStyleType.PARAGRAPH,
+        font_size=11, align="left",
+        indent_left=15, line_spacing=160
+    ),
+    "HIGHLIGHT_BOX": HwpStyle(
+        id=21, name="강조박스", type=HwpStyleType.PARAGRAPH,
+        font_size=10.5, align="left",
+        bg_color="f7fafc", indent_left=10, indent_first=0
+    ),
+    
+    # 표
+    "TABLE": HwpStyle(
+        id=30, name="표", type=HwpStyleType.PARAGRAPH,
+        font_size=10, align="center"
+    ),
+    "TH": HwpStyle(
+        id=11, name="표제목", type=HwpStyleType.PARAGRAPH,
+        font_size=10, font_bold=True, align="center",
+        bg_color="e2e8f0"
+    ),
+    "TD": HwpStyle(
+        id=31, name="표내용", type=HwpStyleType.PARAGRAPH,
+        font_size=10, align="left"
+    ),
+    "TABLE_CAPTION": HwpStyle(
+        id=19, name="표캡션", type=HwpStyleType.PARAGRAPH,
+        font_size=10, font_bold=True, align="center",
+        space_before=5, space_after=3
+    ),
+    
+    # 그림
+    "FIGURE": HwpStyle(
+        id=32, name="그림", type=HwpStyleType.PARAGRAPH,
+        font_size=10, align="center"
+    ),
+    "FIGURE_CAPTION": HwpStyle(
+        id=18, name="그림캡션", type=HwpStyleType.PARAGRAPH,
+        font_size=9.5, align="center",
+        font_color="666666", space_before=5
+    ),
+    
+    # 기타
+    "UNKNOWN": HwpStyle(
+        id=0, name="바탕글", type=HwpStyleType.PARAGRAPH,
+        font_size=10, align="left"
+    ),
+}
+
+# 역할 → 개요 번호 매핑 (StyleShortcut 용)
+ROLE_TO_OUTLINE_NUM = {
+    "H1": 1,
+    "H2": 2,
+    "H3": 3,
+    "H4": 4,
+    "H5": 5,
+    "H6": 6,
+    "H7": 7,
+    "LIST_ITEM": 8,
+    "BODY": 0,  # 바탕글
+    "COVER_TITLE": 0,
+    "COVER_SUBTITLE": 0,
+    "COVER_INFO": 0,
+}
+
+# 역할 → HWP 스타일 이름 매핑
+ROLE_TO_STYLE_NAME = {
+    "H1": "개요 1",
+    "H2": "개요 2",
+    "H3": "개요 3",
+    "H4": "개요 4",
+    "H5": "개요 5",
+    "H6": "개요 6",
+    "H7": "개요 7",
+    "LIST_ITEM": "개요 8",
+    "BODY": "바탕글",
+    "COVER_TITLE": "표지제목",
+    "COVER_SUBTITLE": "표지부제",
+    "TH": "표제목",
+    "TD": "표내용",
+    "TABLE_CAPTION": "표캡션",
+    "FIGURE_CAPTION": "그림캡션",
+    "UNKNOWN": "바탕글",
+}
+
+
+class HwpStyleMapper:
+    """HTML 역할 → HWP 스타일 매퍼"""
+    
+    def __init__(self, custom_styles: Optional[Dict[str, HwpStyle]] = None):
+        self.styles = DEFAULT_STYLES.copy()
+        if custom_styles:
+            self.styles.update(custom_styles)
+    
+    def get_style(self, role: str) -> HwpStyle:
+        return self.styles.get(role, self.styles["UNKNOWN"])
+    
+    def get_style_id(self, role: str) -> int:
+        return self.get_style(role).id
+    
+    def get_all_styles(self) -> Dict[str, HwpStyle]:
+        return self.styles
+
+
+class HwpStyGenerator:
+    """
+    HTML 스타일 → HWP 스타일 적용기
+    
+    pyhwpx API를 사용하여:
+    1. 역할별 스타일 정보 저장
+    2. 텍스트 삽입 시 CharShape/ParaShape 직접 적용
+    3. 개요 스타일 번호 매핑 반환
+    """
+    
+    def __init__(self):
+        self.styles: Dict[str, HwpStyle] = {}
+        self.hwp = None
+    
+    def update_from_html(self, html_styles: Dict[str, Dict]):
+        """HTML에서 추출한 스타일로 업데이트"""
+        for role, style_dict in html_styles.items():
+            if role in DEFAULT_STYLES:
+                base = DEFAULT_STYLES[role]
+                
+                # color 처리 - # 제거
+                color = style_dict.get('color', base.font_color)
+                if isinstance(color, str):
+                    color = color.lstrip('#')
+                
+                self.styles[role] = HwpStyle(
+                    id=base.id,
+                    name=base.name,
+                    type=base.type,
+                    font_size=style_dict.get('font_size', base.font_size),
+                    font_bold=style_dict.get('bold', base.font_bold),
+                    font_color=color,
+                    align=style_dict.get('align', base.align),
+                    line_spacing=style_dict.get('line_spacing', base.line_spacing),
+                    space_before=style_dict.get('space_before', base.space_before),
+                    space_after=style_dict.get('space_after', base.space_after),
+                    indent_left=style_dict.get('indent_left', base.indent_left),
+                    indent_first=style_dict.get('indent_first', base.indent_first),
+                    bg_color=style_dict.get('bg_color', base.bg_color),
+                )
+            else:
+                # 기본 스타일 사용
+                self.styles[role] = DEFAULT_STYLES.get('UNKNOWN')
+        
+        # 누락된 역할은 기본값으로 채움
+        for role in DEFAULT_STYLES:
+            if role not in self.styles:
+                self.styles[role] = DEFAULT_STYLES[role]
+    
+    def apply_to_hwp(self, hwp) -> Dict[str, HwpStyle]:
+        """역할 → HwpStyle 매핑 반환"""
+        self.hwp = hwp
+        
+        # 🚫 스타일 생성 비활성화 (API 문제)
+        # for role, style in self.styles.items():
+        #     self._create_or_update_style(hwp, role, style)
+        
+        if not self.styles:
+            self.styles = DEFAULT_STYLES.copy()
+        
+        print(f"  ✅ 스타일 매핑 완료: {len(self.styles)}개")
+        return self.styles
+
+    def _create_or_update_style(self, hwp, role: str, style: HwpStyle):
+        """HWP에 스타일 생성 또는 수정"""
+        try:
+            # 1. 스타일 편집 모드
+            hwp.HAction.GetDefault("ModifyStyle", hwp.HParameterSet.HStyle.HSet)
+            hwp.HParameterSet.HStyle.StyleName = style.name
+            
+            # 2. 글자 모양
+            color_hex = style.font_color.lstrip('#')
+            if len(color_hex) == 6:
+                r, g, b = int(color_hex[0:2], 16), int(color_hex[2:4], 16), int(color_hex[4:6], 16)
+                text_color = hwp.RGBColor(r, g, b)
+            else:
+                text_color = hwp.RGBColor(0, 0, 0)
+            
+            hwp.HParameterSet.HStyle.CharShape.Height = hwp.PointToHwpUnit(style.font_size)
+            hwp.HParameterSet.HStyle.CharShape.Bold = style.font_bold
+            hwp.HParameterSet.HStyle.CharShape.TextColor = text_color
+            
+            # 3. 문단 모양
+            align_map = {'left': 0, 'center': 1, 'right': 2, 'justify': 3}
+            hwp.HParameterSet.HStyle.ParaShape.Align = align_map.get(style.align, 3)
+            hwp.HParameterSet.HStyle.ParaShape.LineSpacing = int(style.line_spacing)
+            hwp.HParameterSet.HStyle.ParaShape.SpaceBeforePara = hwp.PointToHwpUnit(style.space_before)
+            hwp.HParameterSet.HStyle.ParaShape.SpaceAfterPara = hwp.PointToHwpUnit(style.space_after)
+            
+            # 4. 실행
+            hwp.HAction.Execute("ModifyStyle", hwp.HParameterSet.HStyle.HSet)
+            print(f"    ✓ 스타일 '{style.name}' 정의됨")
+            
+        except Exception as e:
+            print(f"    [경고] 스타일 '{style.name}' 생성 실패: {e}")
+        
+    def get_style(self, role: str) -> HwpStyle:
+        """역할에 해당하는 스타일 반환"""
+        return self.styles.get(role, DEFAULT_STYLES.get('UNKNOWN'))
+    
+    def apply_char_shape(self, hwp, role: str):
+        """현재 선택 영역에 글자 모양 적용"""
+        style = self.get_style(role)
+        
+        try:
+            # RGB 색상 변환
+            color_hex = style.font_color.lstrip('#') if style.font_color else '000000'
+            if len(color_hex) == 6:
+                r = int(color_hex[0:2], 16)
+                g = int(color_hex[2:4], 16)
+                b = int(color_hex[4:6], 16)
+                text_color = hwp.RGBColor(r, g, b)
+            else:
+                text_color = hwp.RGBColor(0, 0, 0)
+            
+            # 글자 모양 설정
+            hwp.HAction.GetDefault("CharShape", hwp.HParameterSet.HCharShape.HSet)
+            hwp.HParameterSet.HCharShape.Height = hwp.PointToHwpUnit(style.font_size)
+            hwp.HParameterSet.HCharShape.Bold = style.font_bold
+            hwp.HParameterSet.HCharShape.TextColor = text_color
+            hwp.HAction.Execute("CharShape", hwp.HParameterSet.HCharShape.HSet)
+            
+        except Exception as e:
+            print(f"    [경고] 글자 모양 적용 실패 ({role}): {e}")
+    
+    def apply_para_shape(self, hwp, role: str):
+        """현재 문단에 문단 모양 적용"""
+        style = self.get_style(role)
+        
+        try:
+            # 정렬
+            align_actions = {
+                'left': "ParagraphShapeAlignLeft",
+                'center': "ParagraphShapeAlignCenter",
+                'right': "ParagraphShapeAlignRight",
+                'justify': "ParagraphShapeAlignJustify"
+            }
+            if style.align in align_actions:
+                hwp.HAction.Run(align_actions[style.align])
+            
+            # 문단 모양 상세 설정
+            hwp.HAction.GetDefault("ParagraphShape", hwp.HParameterSet.HParaShape.HSet)
+            p = hwp.HParameterSet.HParaShape
+            p.LineSpaceType = 0  # 퍼센트
+            p.LineSpacing = int(style.line_spacing)
+            p.LeftMargin = hwp.MiliToHwpUnit(style.indent_left)
+            p.IndentMargin = hwp.MiliToHwpUnit(style.indent_first)
+            p.SpaceBeforePara = hwp.PointToHwpUnit(style.space_before)
+            p.SpaceAfterPara = hwp.PointToHwpUnit(style.space_after)
+            hwp.HAction.Execute("ParagraphShape", p.HSet)
+            
+        except Exception as e:
+            print(f"    [경고] 문단 모양 적용 실패 ({role}): {e}")
+    
+    def apply_style(self, hwp, role: str):
+        """역할에 맞는 전체 스타일 적용 (글자 + 문단)"""
+        self.apply_char_shape(hwp, role)
+        self.apply_para_shape(hwp, role)
+    
+    def export_sty(self, hwp, output_path: str) -> bool:
+        """스타일 파일 내보내기 (현재 미지원)"""
+        print(f"  [알림] .sty 내보내기는 현재 미지원")
+        return False
+
+
+# =============================================================================
+# 번호 제거 유틸리티
+# =============================================================================
+import re
+
+NUMBERING_PATTERNS = {
+    'H1': re.compile(r'^(\d+)\.\s*'),           # "1. " → ""
+    'H2': re.compile(r'^(\d+)\.(\d+)\s*'),      # "1.1 " → ""
+    'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " → ""
+    'H4': re.compile(r'^[가-하]\.\s*'),          # "가. " → ""
+    'H5': re.compile(r'^(\d+)\)\s*'),           # "1) " → ""
+    'H6': re.compile(r'^\((\d+)\)\s*'),         # "(1) " → ""
+    'H7': re.compile(r'^[①②③④⑤⑥⑦⑧⑨⑩]\s*'),  # "① " → ""
+    'LIST_ITEM': re.compile(r'^[•\-○]\s*'),    # "• " → ""
+}
+
+def strip_numbering(text: str, role: str) -> str:
+    """
+    역할에 따라 텍스트 앞의 번호/기호 제거
+    HWP 개요 기능이 번호를 자동 생성하므로 중복 방지
+    """
+    if not text:
+        return text
+    
+    pattern = NUMBERING_PATTERNS.get(role)
+    if pattern:
+        return pattern.sub('', text).strip()
+    
+    return text.strip()
+
+
+if __name__ == "__main__":
+    # 테스트
+    print("=== 스타일 매핑 테스트 ===")
+    
+    gen = HwpStyGenerator()
+    
+    # HTML 스타일 시뮬레이션
+    html_styles = {
+        'H1': {'font_size': 20, 'color': '#1a365d', 'bold': True},
+        'H2': {'font_size': 16, 'color': '#2c5282', 'bold': True},
+        'BODY': {'font_size': 11, 'align': 'justify'},
+    }
+    
+    gen.update_from_html(html_styles)
+    
+    for role, style in gen.styles.items():
+        print(f"{role:15} → size={style.font_size}pt, bold={style.font_bold}, color=#{style.font_color}")
--- a/converters/hwpx_generator.py
+++ b/converters/hwpx_generator.py
@@ -0,0 +1,431 @@
+"""
+HWPX 파일 생성기
+StyleAnalyzer 결과를 받아 스타일이 적용된 HWPX 파일 생성
+"""
+
+import os
+import zipfile
+import xml.etree.ElementTree as ET
+from typing import List, Dict, Optional
+from dataclasses import dataclass
+from pathlib import Path
+
+from style_analyzer import StyleAnalyzer, StyledElement
+from hwp_style_mapping import HwpStyleMapper, HwpStyle, ROLE_TO_STYLE_NAME
+
+
+@dataclass
+class HwpxConfig:
+    """HWPX 생성 설정"""
+    paper_width: int = 59528      # A4 너비 (hwpunit, 1/7200 inch)
+    paper_height: int = 84188     # A4 높이
+    margin_left: int = 8504
+    margin_right: int = 8504
+    margin_top: int = 5668
+    margin_bottom: int = 4252
+    default_font: str = "함초롬바탕"
+    default_font_size: int = 1000  # 10pt (hwpunit)
+
+
+class HwpxGenerator:
+    """HWPX 파일 생성기"""
+    
+    def __init__(self, config: Optional[HwpxConfig] = None):
+        self.config = config or HwpxConfig()
+        self.mapper = HwpStyleMapper()
+        self.used_styles: set = set()
+    
+    def generate(self, elements: List[StyledElement], output_path: str) -> str:
+        """
+        StyledElement 리스트로부터 HWPX 파일 생성
+        
+        Args:
+            elements: StyleAnalyzer로 분류된 요소 리스트
+            output_path: 출력 파일 경로 (.hwpx)
+        
+        Returns:
+            생성된 파일 경로
+        """
+        # 사용된 스타일 수집
+        self.used_styles = {e.role for e in elements}
+        
+        # 임시 디렉토리 생성
+        temp_dir = Path(output_path).with_suffix('.temp')
+        temp_dir.mkdir(parents=True, exist_ok=True)
+        
+        try:
+            # HWPX 구조 생성
+            self._create_mimetype(temp_dir)
+            self._create_meta_inf(temp_dir)
+            self._create_version(temp_dir)
+            self._create_header(temp_dir)
+            self._create_content(temp_dir, elements)
+            self._create_settings(temp_dir)
+            
+            # ZIP으로 압축
+            self._create_hwpx(temp_dir, output_path)
+            
+            return output_path
+            
+        finally:
+            # 임시 파일 정리
+            import shutil
+            if temp_dir.exists():
+                shutil.rmtree(temp_dir)
+    
+    def _create_mimetype(self, temp_dir: Path):
+        """mimetype 파일 생성"""
+        mimetype_path = temp_dir / "mimetype"
+        mimetype_path.write_text("application/hwp+zip")
+    
+    def _create_meta_inf(self, temp_dir: Path):
+        """META-INF/manifest.xml 생성"""
+        meta_dir = temp_dir / "META-INF"
+        meta_dir.mkdir(exist_ok=True)
+        
+        manifest = """<?xml version="1.0" encoding="UTF-8"?>
+<manifest:manifest xmlns:manifest="urn:oasis:names:tc:opendocument:xmlns:manifest:1.0">
+    <manifest:file-entry manifest:full-path="/" manifest:media-type="application/hwp+zip"/>
+    <manifest:file-entry manifest:full-path="version.xml" manifest:media-type="application/xml"/>
+    <manifest:file-entry manifest:full-path="Contents/header.xml" manifest:media-type="application/xml"/>
+    <manifest:file-entry manifest:full-path="Contents/section0.xml" manifest:media-type="application/xml"/>
+    <manifest:file-entry manifest:full-path="settings.xml" manifest:media-type="application/xml"/>
+</manifest:manifest>"""
+        
+        (meta_dir / "manifest.xml").write_text(manifest, encoding='utf-8')
+    
+    def _create_version(self, temp_dir: Path):
+        """version.xml 생성"""
+        version = """<?xml version="1.0" encoding="UTF-8"?>
+<hh:HWPMLVersion xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head" version="1.1"/>"""
+        
+        (temp_dir / "version.xml").write_text(version, encoding='utf-8')
+    
+    def _create_header(self, temp_dir: Path):
+        """Contents/header.xml 생성 (스타일 정의 포함)"""
+        contents_dir = temp_dir / "Contents"
+        contents_dir.mkdir(exist_ok=True)
+        
+        # 스타일별 속성 생성
+        char_props_xml = self._generate_char_properties()
+        para_props_xml = self._generate_para_properties()
+        styles_xml = self._generate_styles_xml()
+        
+        header = f"""<?xml version="1.0" encoding="UTF-8"?>
+<hh:head xmlns:hh="http://www.hancom.co.kr/hwpml/2011/head"
+         xmlns:hc="http://www.hancom.co.kr/hwpml/2011/core"
+         xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph"
+         version="1.5" secCnt="1">
+    <hh:beginNum page="1" footnote="1" endnote="1" pic="1" tbl="1" equation="1"/>
+    <hh:refList>
+        <hh:fontfaces itemCnt="7">
+            <hh:fontface lang="HANGUL" fontCnt="2">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+                <hh:font id="1" face="함초롬돋움" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="LATIN" fontCnt="2">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+                <hh:font id="1" face="함초롬돋움" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="HANJA" fontCnt="2">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+                <hh:font id="1" face="함초롬돋움" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="JAPANESE" fontCnt="1">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="OTHER" fontCnt="1">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="SYMBOL" fontCnt="1">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+            <hh:fontface lang="USER" fontCnt="1">
+                <hh:font id="0" face="맑은 고딕" type="TTF" isEmbedded="0"/>
+            </hh:fontface>
+        </hh:fontfaces>
+        <hh:borderFills itemCnt="2">
+            <hh:borderFill id="1" threeD="0" shadow="0" centerLine="NONE">
+                <hh:slash type="NONE" Crooked="0" isCounter="0"/>
+                <hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
+                <hh:leftBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:rightBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:topBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:bottomBorder type="NONE" width="0.1 mm" color="#000000"/>
+            </hh:borderFill>
+            <hh:borderFill id="2" threeD="0" shadow="0" centerLine="NONE">
+                <hh:slash type="NONE" Crooked="0" isCounter="0"/>
+                <hh:backSlash type="NONE" Crooked="0" isCounter="0"/>
+                <hh:leftBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:rightBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:topBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hh:bottomBorder type="NONE" width="0.1 mm" color="#000000"/>
+                <hc:fillBrush><hc:winBrush faceColor="none" hatchColor="#000000" alpha="0"/></hc:fillBrush>
+            </hh:borderFill>
+        </hh:borderFills>
+{char_props_xml}
+{para_props_xml}
+{styles_xml}
+    </hh:refList>
+    <hh:compatibleDocument targetProgram="HWP201X"/>
+    <hh:docOption>
+        <hh:linkinfo path="" pageInherit="1" footnoteInherit="0"/>
+    </hh:docOption>
+</hh:head>"""
+        
+        (contents_dir / "header.xml").write_text(header, encoding='utf-8')
+    
+    def _generate_char_properties(self) -> str:
+        """글자 속성 XML 생성"""
+        lines = [f'        <hh:charProperties itemCnt="{len(self.used_styles) + 1}">']
+        
+        # 기본 글자 속성 (id=0)
+        lines.append('''            <hh:charPr id="0" height="1000" textColor="#000000" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="1">
+                <hh:fontRef hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
+                <hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
+                <hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
+                <hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
+                <hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
+                <hh:underline type="NONE" shape="SOLID" color="#000000"/>
+                <hh:strikeout shape="NONE" color="#000000"/>
+                <hh:outline type="NONE"/>
+                <hh:shadow type="NONE" color="#B2B2B2" offsetX="10" offsetY="10"/>
+            </hh:charPr>''')
+        
+        # 역할별 글자 속성
+        for idx, role in enumerate(sorted(self.used_styles), start=1):
+            style = self.mapper.get_style(role)
+            height = int(style.font_size * 100)  # pt → hwpunit
+            color = style.font_color.lstrip('#')
+            font_id = "1" if style.font_bold else "0"  # 굵게면 함초롬돋움
+            
+            lines.append(f'''            <hh:charPr id="{idx}" height="{height}" textColor="#{color}" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="1">
+                <hh:fontRef hangul="{font_id}" latin="{font_id}" hanja="{font_id}" japanese="{font_id}" other="{font_id}" symbol="{font_id}" user="{font_id}"/>
+                <hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
+                <hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
+                <hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/>
+                <hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/>
+                <hh:underline type="NONE" shape="SOLID" color="#000000"/>
+                <hh:strikeout shape="NONE" color="#000000"/>
+                <hh:outline type="NONE"/>
+                <hh:shadow type="NONE" color="#B2B2B2" offsetX="10" offsetY="10"/>
+            </hh:charPr>''')
+        
+        lines.append('        </hh:charProperties>')
+        return '\n'.join(lines)
+    
+    def _generate_para_properties(self) -> str:
+        """문단 속성 XML 생성"""
+        lines = [f'        <hh:paraProperties itemCnt="{len(self.used_styles) + 1}">']
+        
+        # 기본 문단 속성 (id=0)
+        lines.append('''            <hh:paraPr id="0" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="0" suppressLineNumbers="0" checked="0">
+                <hh:align horizontal="JUSTIFY" vertical="BASELINE"/>
+                <hh:heading type="NONE" idRef="0" level="0"/>
+                <hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
+                <hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
+                <hp:switch xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph">
+                    <hp:case hp:required-namespace="http://www.hancom.co.kr/hwpml/2016/HwpUnitChar">
+                        <hh:margin><hc:intent value="0" unit="HWPUNIT"/><hc:left value="0" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="0" unit="HWPUNIT"/><hc:next value="0" unit="HWPUNIT"/></hh:margin>
+                        <hh:lineSpacing type="PERCENT" value="160" unit="HWPUNIT"/>
+                    </hp:case>
+                    <hp:default>
+                        <hh:margin><hc:intent value="0" unit="HWPUNIT"/><hc:left value="0" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="0" unit="HWPUNIT"/><hc:next value="0" unit="HWPUNIT"/></hh:margin>
+                        <hh:lineSpacing type="PERCENT" value="160" unit="HWPUNIT"/>
+                    </hp:default>
+                </hp:switch>
+                <hh:border borderFillIDRef="1" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
+            </hh:paraPr>''')
+        
+        # 역할별 문단 속성
+        align_map = {"left": "LEFT", "center": "CENTER", "right": "RIGHT", "justify": "JUSTIFY"}
+        
+        for idx, role in enumerate(sorted(self.used_styles), start=1):
+            style = self.mapper.get_style(role)
+            align_val = align_map.get(style.align, "JUSTIFY")
+            line_spacing = int(style.line_spacing)
+            left_margin = int(style.indent_left * 100)
+            indent = int(style.indent_first * 100)
+            space_before = int(style.space_before * 100)
+            space_after = int(style.space_after * 100)
+            
+            lines.append(f'''            <hh:paraPr id="{idx}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="0" suppressLineNumbers="0" checked="0">
+                <hh:align horizontal="{align_val}" vertical="BASELINE"/>
+                <hh:heading type="NONE" idRef="0" level="0"/>
+                <hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/>
+                <hh:autoSpacing eAsianEng="0" eAsianNum="0"/>
+                <hp:switch xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph">
+                    <hp:case hp:required-namespace="http://www.hancom.co.kr/hwpml/2016/HwpUnitChar">
+                        <hh:margin><hc:intent value="{indent}" unit="HWPUNIT"/><hc:left value="{left_margin}" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="{space_before}" unit="HWPUNIT"/><hc:next value="{space_after}" unit="HWPUNIT"/></hh:margin>
+                        <hh:lineSpacing type="PERCENT" value="{line_spacing}" unit="HWPUNIT"/>
+                    </hp:case>
+                    <hp:default>
+                        <hh:margin><hc:intent value="{indent}" unit="HWPUNIT"/><hc:left value="{left_margin}" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="{space_before}" unit="HWPUNIT"/><hc:next value="{space_after}" unit="HWPUNIT"/></hh:margin>
+                        <hh:lineSpacing type="PERCENT" value="{line_spacing}" unit="HWPUNIT"/>
+                    </hp:default>
+                </hp:switch>
+                <hh:border borderFillIDRef="1" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/>
+            </hh:paraPr>''')
+        
+        lines.append('        </hh:paraProperties>')
+        return '\n'.join(lines)
+    
+    def _generate_styles_xml(self) -> str:
+        """스타일 정의 XML 생성 (charPrIDRef, paraPrIDRef 참조)"""
+        lines = [f'        <hh:styles itemCnt="{len(self.used_styles) + 1}">']
+        
+        # 기본 스타일 (id=0, 바탕글)
+        lines.append('            <hh:style id="0" type="PARA" name="바탕글" engName="Normal" paraPrIDRef="0" charPrIDRef="0" nextStyleIDRef="0" langID="1042" lockForm="0"/>')
+        
+        # 역할별 스타일 (charPrIDRef, paraPrIDRef 참조)
+        for idx, role in enumerate(sorted(self.used_styles), start=1):
+            style = self.mapper.get_style(role)
+            style_name = style.name.replace('<', '&lt;').replace('>', '&gt;')
+            
+            lines.append(f'            <hh:style id="{idx}" type="PARA" name="{style_name}" engName="" paraPrIDRef="{idx}" charPrIDRef="{idx}" nextStyleIDRef="{idx}" langID="1042" lockForm="0"/>')
+        
+        lines.append('        </hh:styles>')
+        return '\n'.join(lines)
+    
+    def _create_content(self, temp_dir: Path, elements: List[StyledElement]):
+        """Contents/section0.xml 생성 (본문 + 스타일 참조)"""
+        contents_dir = temp_dir / "Contents"
+        
+        # 문단 XML 생성
+        paragraphs = []
+        current_table = None
+        
+        # 역할 → 스타일 인덱스 매핑 생성
+        role_to_idx = {role: idx for idx, role in enumerate(sorted(self.used_styles), start=1)}
+        
+        for elem in elements:
+            style = self.mapper.get_style(elem.role)
+            style_idx = role_to_idx.get(elem.role, 0)
+            
+            # 테이블 요소는 특수 처리
+            if elem.role in ["TH", "TD", "TABLE_CAPTION", "TABLE", "FIGURE"]:
+                continue  # 테이블/그림은 별도 처리 필요
+            
+            # 일반 문단
+            para_xml = self._create_paragraph(elem.text, style, style_idx)
+            paragraphs.append(para_xml)
+        
+        section = f"""<?xml version="1.0" encoding="UTF-8"?>
+<hs:sec xmlns:hs="http://www.hancom.co.kr/hwpml/2011/section"
+        xmlns:hc="http://www.hancom.co.kr/hwpml/2011/core">
+{"".join(paragraphs)}
+</hs:sec>"""
+        
+        (contents_dir / "section0.xml").write_text(section, encoding='utf-8')
+    
+    def _create_paragraph(self, text: str, style: HwpStyle, style_idx: int) -> str:
+        """단일 문단 XML 생성"""
+        text = self._escape_xml(text)
+        
+        return f'''
+    <hp:p xmlns:hp="http://www.hancom.co.kr/hwpml/2011/paragraph" 
+          paraPrIDRef="{style_idx}" styleIDRef="{style_idx}" pageBreak="0" columnBreak="0" merged="0">
+        <hp:run charPrIDRef="{style_idx}">
+            <hp:t>{text}</hp:t>
+        </hp:run>
+    </hp:p>'''
+    
+    def _escape_xml(self, text: str) -> str:
+        """XML 특수문자 이스케이프"""
+        return (text
+            .replace("&", "&amp;")
+            .replace("<", "&lt;")
+            .replace(">", "&gt;")
+            .replace('"', "&quot;")
+            .replace("'", "&apos;"))
+    
+    def _create_settings(self, temp_dir: Path):
+        """settings.xml 생성"""
+        settings = """<?xml version="1.0" encoding="UTF-8"?>
+<hs:settings xmlns:hs="http://www.hancom.co.kr/hwpml/2011/settings">
+    <hs:viewSetting>
+        <hs:viewType val="printView"/>
+        <hs:zoom val="100"/>
+    </hs:viewSetting>
+</hs:settings>"""
+        
+        (temp_dir / "settings.xml").write_text(settings, encoding='utf-8')
+    
+    def _create_hwpx(self, temp_dir: Path, output_path: str):
+        """HWPX 파일 생성 (ZIP 압축)"""
+        with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
+            # mimetype은 압축하지 않고 첫 번째로
+            mimetype_path = temp_dir / "mimetype"
+            zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)
+            
+            # 나머지 파일들
+            for root, dirs, files in os.walk(temp_dir):
+                for file in files:
+                    if file == "mimetype":
+                        continue
+                    file_path = Path(root) / file
+                    arcname = file_path.relative_to(temp_dir)
+                    zf.write(file_path, arcname)
+
+
+def convert_html_to_hwpx(html: str, output_path: str) -> str:
+    """
+    HTML → HWPX 변환 메인 함수
+    
+    Args:
+        html: HTML 문자열
+        output_path: 출력 파일 경로
+    
+    Returns:
+        생성된 파일 경로
+    """
+    # 1. HTML 분석 → 역할 분류
+    analyzer = StyleAnalyzer()
+    elements = analyzer.analyze(html)
+    
+    print(f"📊 분석 완료: {len(elements)}개 요소")
+    for role, count in analyzer.get_role_summary().items():
+        print(f"   {role}: {count}")
+    
+    # 2. HWPX 생성
+    generator = HwpxGenerator()
+    result_path = generator.generate(elements, output_path)
+    
+    print(f"✅ 생성 완료: {result_path}")
+    return result_path
+
+
+if __name__ == "__main__":
+    # 테스트
+    test_html = """
+    <html>
+    <body>
+        <div class="box-cover">
+            <h1>건설·토목 측량 DX 실무지침</h1>
+            <h2>드론/UAV·GIS·지형/지반 모델 기반</h2>
+            <p>2024년 1월</p>
+        </div>
+        
+        <h1>1. 개요</h1>
+        <p>본 보고서는 건설 및 토목 분야의 측량 디지털 전환에 대한 실무 지침을 제공합니다.</p>
+        
+        <h2>1.1 배경</h2>
+        <p>최근 드론과 GIS 기술의 발전으로 측량 업무가 크게 변화하고 있습니다.</p>
+        
+        <h3>1.1.1 기술 동향</h3>
+        <p>1) <strong>드론 측량의 발전</strong></p>
+        <p>드론을 활용한 측량은 기존 방식 대비 효율성이 크게 향상되었습니다.</p>
+        
+        <p>(1) <strong>RTK 드론</strong></p>
+        <p>실시간 보정 기능을 갖춘 RTK 드론이 보급되고 있습니다.</p>
+        
+        <ul>
+            <li>고정밀 GPS 수신기 내장</li>
+            <li>센티미터 단위 정확도</li>
+        </ul>
+    </body>
+    </html>
+    """
+    
+    output = "/home/claude/test_output.hwpx"
+    convert_html_to_hwpx(test_html, output)
--- a/converters/style_analyzer.py
+++ b/converters/style_analyzer.py
@@ -0,0 +1,935 @@
+"""
+HTML 스타일 분석기 v3.0
+HTML 요소를 분석하여 역할(Role)을 자동 분류
+
+✅ v3.0 변경사항:
+- 글벗 HTML 구조 완벽 지원 (.sheet, .body-content)
+- 머리말/꼬리말/페이지번호 제거
+- 강력한 중복 콘텐츠 필터링
+- 제목 계층 구조 정확한 인식
+"""
+
+import re
+from bs4 import BeautifulSoup, Tag, NavigableString
+from dataclasses import dataclass
+from typing import List, Dict, Optional, Tuple, Set
+from enum import Enum
+
+
+class DocumentSection(Enum):
+    """문서 섹션 유형"""
+    COVER = "cover"      # 표지
+    TOC = "toc"          # 목차
+    CONTENT = "content"  # 본문
+
+
+@dataclass
+class StyledElement:
+    """스타일이 지정된 요소"""
+    role: str           # 역할 (H1, BODY, TH 등)
+    text: str           # 텍스트 내용
+    tag: str            # 원본 HTML 태그
+    html: str           # 원본 HTML
+    section: str        # 섹션 (cover, toc, content)
+    attributes: Dict    # 추가 속성 (이미지 src 등)
+    
+    def __repr__(self):
+        preview = self.text[:30] + "..." if len(self.text) > 30 else self.text
+        return f"<{self.role}> {preview}"
+
+
+class StyleAnalyzer:
+    """HTML 문서를 분석하여 역할 분류"""
+    
+    # 번호 패턴 정의
+    PATTERNS = {
+        # 장 번호: "제1장", "제2장"
+        "chapter": re.compile(r'^제\s*\d+\s*장'),
+        # 1단계 제목: "1 ", "2 " (숫자+공백, 점 없음)
+        "h1_num": re.compile(r'^(\d+)\s+[가-힣]'),
+        # 대항목: "1.", "2."
+        "h2_num": re.compile(r'^(\d+)\.\s'),
+        # 중항목: "1.1 ", "1.2 "
+        "h3_num": re.compile(r'^(\d+)\.(\d+)\s'),
+        # 소항목: "1.1.1"
+        "h4_num": re.compile(r'^(\d+)\.(\d+)\.(\d+)'),
+        # 세부: "1)", "2)"
+        "h5_paren": re.compile(r'^(\d+)\)\s*'),
+        # 세세부: "(1)", "(2)"
+        "h6_paren": re.compile(r'^\((\d+)\)\s*'),
+        # 가나다: "가.", "나."
+        "h4_korean": re.compile(r'^[가-하]\.\s'),
+        # 가나다 괄호: "가)", "나)"
+        "h5_korean": re.compile(r'^[가-하]\)\s'),
+        # 원문자: "①", "②"
+        "h6_circle": re.compile(r'^[①②③④⑤⑥⑦⑧⑨⑩]'),
+        # 목록: "•", "-", "○"
+        "list_bullet": re.compile(r'^[•\-○]\s'),
+        # 페이지 번호 패턴: "- 1 -", "- 12 -"
+        "page_number": re.compile(r'^-\s*\d+\s*-$'),
+        # 꼬리말 패턴: "문서제목- 1 -"
+        "footer_pattern": re.compile(r'.+[-–]\s*\d+\s*[-–]$'),
+    }
+    
+    # 제거할 텍스트 패턴들
+    REMOVE_PATTERNS = [
+        re.compile(r'^-\s*\d+\s*-$'),                    # "- 1 -"
+        re.compile(r'[-–]\s*\d+\s*[-–]\s*$'),           # "문서제목- 1 -"
+        re.compile(r'^\d+\s*×\s*\d+$'),                  # "643 × 236" (이미지 크기)
+        re.compile(r'^\[이미지 없음:.*\]$'),              # "[이미지 없음: xxx]"
+        re.compile(r'^\[그림\s*\d+-\d+\]$'),              # "[그림 1-1]"
+    ]
+    
+    def __init__(self):
+        self.elements: List[StyledElement] = []
+        self.current_section = DocumentSection.CONTENT
+        self.seen_texts: Set[str] = set()  # 중복 방지용
+        self.document_title = ""  # 문서 제목 (꼬리말 제거용)
+    
+    def analyze(self, html: str) -> List[StyledElement]:
+        """HTML 문서 분석하여 역할 분류된 요소 리스트 반환"""
+        soup = BeautifulSoup(html, 'html.parser')
+        self.elements = []
+        self.seen_texts = set()
+        
+        # 1. 전처리: 불필요한 요소 제거
+        self._preprocess(soup)
+        
+        # 2. 문서 제목 추출 (꼬리말 패턴 감지용)
+        self._extract_document_title(soup)
+        
+        # 3. 섹션 감지 및 순회
+        self._detect_and_process_sections(soup)
+        
+        # 4. 후처리: 중복 및 불필요 요소 제거
+        self._postprocess()
+        
+        return self.elements
+    
+    def _preprocess(self, soup: BeautifulSoup):
+        """HTML 전처리 - 불필요한 요소 제거"""
+        print("  🔧 HTML 전처리 중...")
+        
+        # 1. 스크립트/스타일 태그 제거
+        removed_count = 0
+        for tag in soup(['script', 'style', 'noscript', 'meta', 'link', 'head']):
+            tag.decompose()
+            removed_count += 1
+        
+        if removed_count > 0:
+            print(f"     - script/style 등 {removed_count}개 제거")
+        
+        # 2. 머리말/꼬리말 영역 제거 (글벗 HTML 구조)
+        header_footer_count = 0
+        for selector in ['.page-header', '.page-footer', '.header', '.footer', 
+                        '[class*="header"]', '[class*="footer"]',
+                        '.running-header', '.running-footer']:
+            for elem in soup.select(selector):
+                # 실제 콘텐츠 헤더가 아닌 페이지 헤더만 제거
+                text = elem.get_text(strip=True)
+                if self._is_header_footer_text(text):
+                    elem.decompose()
+                    header_footer_count += 1
+        
+        if header_footer_count > 0:
+            print(f"     - 머리말/꼬리말 {header_footer_count}개 제거")
+        
+        # 3. 숨겨진 요소 제거
+        hidden_count = 0
+        for elem in soup.select('[style*="display:none"], [style*="display: none"]'):
+            elem.decompose()
+            hidden_count += 1
+        for elem in soup.select('[style*="visibility:hidden"], [style*="visibility: hidden"]'):
+            elem.decompose()
+            hidden_count += 1
+        
+        # 4. #raw-container 외부의 .sheet 제거 (글벗 구조)
+        raw_container = soup.find(id='raw-container')
+        if raw_container:
+            print("     - 글벗 구조 감지: #raw-container 우선 사용")
+            # raw-container 외부의 모든 .sheet 제거
+            for sheet in soup.select('.sheet'):
+                if not self._is_descendant_of(sheet, raw_container):
+                    sheet.decompose()
+    
+    def _extract_document_title(self, soup: BeautifulSoup):
+        """문서 제목 추출 (꼬리말 패턴 감지용)"""
+        # 표지에서 제목 찾기
+        cover = soup.find(id='box-cover') or soup.find(class_='box-cover')
+        if cover:
+            h1 = cover.find('h1')
+            if h1:
+                self.document_title = h1.get_text(strip=True)
+                print(f"     - 문서 제목 감지: {self.document_title[:30]}...")
+    
+    def _is_header_footer_text(self, text: str) -> bool:
+        """머리말/꼬리말 텍스트인지 판단"""
+        if not text:
+            return False
+        
+        # 페이지 번호 패턴
+        if self.PATTERNS['page_number'].match(text):
+            return True
+        
+        # "문서제목- 1 -" 패턴
+        if self.PATTERNS['footer_pattern'].match(text):
+            return True
+        
+        # 문서 제목 + 페이지번호 조합
+        if self.document_title and self.document_title in text:
+            if re.search(r'[-–]\s*\d+\s*[-–]', text):
+                return True
+        
+        return False
+    
+    def _should_skip_text(self, text: str) -> bool:
+        """건너뛸 텍스트인지 판단"""
+        if not text:
+            return True
+        
+        # 제거 패턴 체크
+        for pattern in self.REMOVE_PATTERNS:
+            if pattern.match(text):
+                return True
+        
+        # 머리말/꼬리말 체크
+        if self._is_header_footer_text(text):
+            return True
+        
+        # 문서 제목만 있는 줄 (꼬리말에서 온 것)
+        if self.document_title and text.strip() == self.document_title:
+            # 이미 표지에서 처리했으면 스킵
+            if any(e.role == 'COVER_TITLE' and self.document_title in e.text 
+                   for e in self.elements):
+                return True
+        
+        return False
+    
+    def _is_descendant_of(self, element: Tag, ancestor: Tag) -> bool:
+        """element가 ancestor의 자손인지 확인"""
+        parent = element.parent
+        while parent:
+            if parent == ancestor:
+                return True
+            parent = parent.parent
+        return False
+    
+    def _detect_and_process_sections(self, soup: BeautifulSoup):
+        """섹션 감지 및 처리"""
+        
+        # 글벗 구조 (#raw-container) 우선 처리
+        raw = soup.find(id='raw-container')
+        if raw:
+            self._process_geulbeot_structure(raw)
+            return
+        
+        # .sheet 구조 처리 (렌더링된 페이지)
+        sheets = soup.select('.sheet')
+        if sheets:
+            self._process_sheet_structure(sheets)
+            return
+        
+        # 일반 HTML 구조 처리
+        self._process_generic_html(soup)
+    
+    def _process_geulbeot_structure(self, raw: Tag):
+        """글벗 HTML #raw-container 구조 처리"""
+        print("  📄 글벗 #raw-container 구조 처리 중...")
+        
+        # 표지
+        cover = raw.find(id='box-cover')
+        if cover:
+            print("     - 표지 섹션")
+            self.current_section = DocumentSection.COVER
+            self._process_cover(cover)
+        
+        # 목차
+        toc = raw.find(id='box-toc')
+        if toc:
+            print("     - 목차 섹션")
+            self.current_section = DocumentSection.TOC
+            self._process_toc(toc)
+        
+        # 요약
+        summary = raw.find(id='box-summary')
+        if summary:
+            print("     - 요약 섹션")
+            self.current_section = DocumentSection.CONTENT
+            self._process_content_element(summary)
+        
+        # 본문
+        content = raw.find(id='box-content')
+        if content:
+            print("     - 본문 섹션")
+            self.current_section = DocumentSection.CONTENT
+            self._process_content_element(content)
+    
+    def _process_sheet_structure(self, sheets: List[Tag]):
+        """글벗 .sheet 페이지 구조 처리"""
+        print(f"  📄 .sheet 페이지 구조 처리 중... ({len(sheets)}페이지)")
+        
+        for i, sheet in enumerate(sheets):
+            # 페이지 내 body-content만 추출
+            body_content = sheet.select_one('.body-content')
+            if body_content:
+                self._process_content_element(body_content)
+            else:
+                # body-content가 없으면 머리말/꼬리말 제외하고 처리
+                for child in sheet.children:
+                    if isinstance(child, Tag):
+                        classes = child.get('class', [])
+                        class_str = ' '.join(classes) if classes else ''
+                        
+                        # 머리말/꼬리말 스킵
+                        if any(x in class_str.lower() for x in ['header', 'footer']):
+                            continue
+                        
+                        self._process_content_element(child)
+    
+    def _process_generic_html(self, soup: BeautifulSoup):
+        """일반 HTML 구조 처리"""
+        print("  📄 일반 HTML 구조 처리 중...")
+        
+        # 표지
+        cover = soup.find(class_=re.compile(r'cover|title-page|box-cover'))
+        if cover:
+            self.current_section = DocumentSection.COVER
+            self._process_cover(cover)
+        
+        # 목차
+        toc = soup.find(class_=re.compile(r'toc|table-of-contents'))
+        if toc:
+            self.current_section = DocumentSection.TOC
+            self._process_toc(toc)
+        
+        # 본문
+        self.current_section = DocumentSection.CONTENT
+        main_content = soup.find('main') or soup.find('article') or soup.find('body') or soup
+        
+        for child in main_content.children:
+            if isinstance(child, Tag):
+                self._process_content_element(child)
+    
+    def _process_cover(self, cover: Tag):
+        """표지 처리"""
+        # H1 = 제목
+        h1 = cover.find('h1')
+        if h1:
+            text = h1.get_text(strip=True)
+            if text and not self._is_duplicate(text):
+                self.elements.append(StyledElement(
+                    role="COVER_TITLE",
+                    text=text,
+                    tag="h1",
+                    html=str(h1)[:200],
+                    section="cover",
+                    attributes={}
+                ))
+        
+        # H2 = 부제목
+        h2 = cover.find('h2')
+        if h2:
+            text = h2.get_text(strip=True)
+            if text and not self._is_duplicate(text):
+                self.elements.append(StyledElement(
+                    role="COVER_SUBTITLE",
+                    text=text,
+                    tag="h2",
+                    html=str(h2)[:200],
+                    section="cover",
+                    attributes={}
+                ))
+        
+        # P = 정보
+        for p in cover.find_all('p'):
+            text = p.get_text(strip=True)
+            if text and not self._is_duplicate(text):
+                self.elements.append(StyledElement(
+                    role="COVER_INFO",
+                    text=text,
+                    tag="p",
+                    html=str(p)[:200],
+                    section="cover",
+                    attributes={}
+                ))
+    
+    def _process_toc(self, toc: Tag):
+        """목차 처리"""
+        # UL/OL 기반 목차
+        for li in toc.find_all('li'):
+            text = li.get_text(strip=True)
+            if not text or self._is_duplicate(text):
+                continue
+            
+            classes = li.get('class', [])
+            class_str = ' '.join(classes) if classes else ''
+            
+            # 레벨 판단 (구체적 → 일반 순서!)
+            if 'lvl-1' in class_str or 'toc-lvl-1' in class_str:
+                role = "TOC_H1"
+            elif 'lvl-2' in class_str or 'toc-lvl-2' in class_str:
+                role = "TOC_H2"
+            elif 'lvl-3' in class_str or 'toc-lvl-3' in class_str:
+                role = "TOC_H3"
+            elif self.PATTERNS['h4_num'].match(text):   # 1.1.1 먼저!
+                role = "TOC_H3"
+            elif self.PATTERNS['h3_num'].match(text):   # 1.1 그다음
+                role = "TOC_H2"
+            elif self.PATTERNS['h2_num'].match(text):   # 1. 그다음
+                role = "TOC_H1"
+            else:
+                role = "TOC_H1"
+            
+            self.elements.append(StyledElement(
+                role=role,
+                text=text,
+                tag="li",
+                html=str(li)[:200],
+                section="toc",
+                attributes={}
+            ))
+    
+    def _process_content_element(self, element: Tag):
+        """본문 요소 재귀 처리"""
+        if not isinstance(element, Tag):
+            return
+        
+        tag_name = element.name.lower() if element.name else ""
+        classes = element.get('class', [])
+        class_str = ' '.join(classes) if classes else ''
+        
+        # 머리말/꼬리말 클래스 스킵
+        if any(x in class_str.lower() for x in ['header', 'footer', 'page-num']):
+            return
+        
+        # 테이블 특수 처리
+        if tag_name == 'table':
+            self._process_table(element)
+            return
+        
+        # 그림 특수 처리
+        if tag_name in ['figure', 'img']:
+            self._process_figure(element)
+            return
+        
+        # 텍스트 추출
+        text = self._get_direct_text(element)
+        
+        if text:
+            # 건너뛸 텍스트 체크
+            if self._should_skip_text(text):
+                pass  # 자식은 계속 처리
+            elif not self._is_duplicate(text):
+                role = self._classify_role(element, tag_name, classes, text)
+                if role:
+                    self.elements.append(StyledElement(
+                        role=role,
+                        text=text,
+                        tag=tag_name,
+                        html=str(element)[:200],
+                        section=self.current_section.value,
+                        attributes=dict(element.attrs) if element.attrs else {}
+                    ))
+        
+        # 자식 요소 재귀 처리 (컨테이너 태그)
+        if tag_name in ['div', 'section', 'article', 'aside', 'main', 'body', 
+                        'ul', 'ol', 'dl', 'blockquote']:
+            for child in element.children:
+                if isinstance(child, Tag):
+                    self._process_content_element(child)
+    
+    def _get_direct_text(self, element: Tag) -> str:
+        """요소의 직접 텍스트만 추출 (자식 컨테이너 제외)"""
+        # 제목 태그는 전체 텍스트
+        if element.name in ['h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'p', 'li', 'td', 'th', 'caption']:
+            return element.get_text(strip=True)
+        
+        # 컨테이너 태그는 직접 텍스트만
+        texts = []
+        for child in element.children:
+            if isinstance(child, NavigableString):
+                t = str(child).strip()
+                if t:
+                    texts.append(t)
+        
+        return ' '.join(texts)
+    
+    def _is_duplicate(self, text: str) -> bool:
+        """중복 텍스트인지 확인"""
+        if not text:
+            return True
+        
+        # 정규화
+        normalized = re.sub(r'\s+', ' ', text.strip())
+        
+        # 짧은 텍스트는 중복 허용 (번호 등)
+        if len(normalized) < 10:
+            return False
+        
+        # 첫 50자로 체크
+        key = normalized[:50]
+        
+        if key in self.seen_texts:
+            return True
+        
+        self.seen_texts.add(key)
+        return False
+    
+    def _classify_role(self, element: Tag, tag: str, classes: List[str], text: str) -> Optional[str]:
+        """요소의 역할 분류
+        
+        ⚠️ 중요: 패턴 매칭은 반드시 구체적인 것 → 일반적인 것 순서로!
+           1.1.1 → 1.1 → 1. → 1
+           (1) → 1)
+           가) → 가.
+        """
+        
+        class_str = ' '.join(classes) if classes else ''
+        
+        # ============ 제목 태그 (HTML 태그 우선) ============
+        if tag == 'h1':
+            return "H1"
+        if tag == 'h2':
+            return "H2"
+        if tag == 'h3':
+            return "H3"
+        if tag == 'h4':
+            return "H4"
+        if tag == 'h5':
+            return "H5"
+        if tag == 'h6':
+            return "H6"
+        
+        # ============ 본문 (p, div 등) - 번호 패턴으로 분류 ============
+        if tag in ['p', 'div', 'span']:
+            
+            # ------ 숫자.숫자 패턴 (구체적 → 일반 순서!) ------
+            
+            # "1.1.1" 패턴 (가장 구체적 - 먼저 체크!)
+            if self.PATTERNS['h4_num'].match(text):
+                if len(text) < 100:
+                    return "H3"
+                return "BODY"
+            
+            # "1.1 " 패턴
+            if self.PATTERNS['h3_num'].match(text):
+                if len(text) < 100:
+                    return "H2"
+                return "BODY"
+            
+            # "1." 패턴
+            if self.PATTERNS['h2_num'].match(text):
+                if len(text) < 100:
+                    return "H1"
+                return "BODY"
+            
+            # "1 가나다..." 패턴 (숫자+공백+한글)
+            if self.PATTERNS['h1_num'].match(text):
+                return "H1"
+            
+            # ------ 괄호 패턴 (구체적 → 일반 순서!) ------
+            
+            # "(1)" 패턴 (괄호로 감싼 게 더 구체적 - 먼저 체크!)
+            if self.PATTERNS['h6_paren'].match(text):
+                if element.find('strong') or len(text) < 80:
+                    return "H5"
+                return "BODY"
+            
+            # "1)" 패턴
+            if self.PATTERNS['h5_paren'].match(text):
+                if element.find('strong') or len(text) < 80:
+                    return "H4"
+                return "BODY"
+            
+            # ------ 한글 패턴 (구체적 → 일반 순서!) ------
+            
+            # "가)" 패턴 (괄호가 더 구체적 - 먼저 체크!)
+            if self.PATTERNS['h5_korean'].match(text):
+                return "H5"
+            
+            # "가." 패턴
+            if self.PATTERNS['h4_korean'].match(text):
+                return "H4"
+            
+            # ------ 특수 기호 패턴 ------
+            
+            # "①②③" 패턴
+            if self.PATTERNS['h6_circle'].match(text):
+                return "H6"
+            
+            # ------ 기타 ------
+            
+            # 강조 박스
+            if any(x in class_str for x in ['highlight', 'box', 'note', 'tip']):
+                return "HIGHLIGHT_BOX"
+            
+            # 일반 본문
+            return "BODY"
+        
+        # ============ 목록 ============
+        if tag == 'li':
+            return "LIST_ITEM"
+        
+        # ============ 정의 목록 ============
+        if tag == 'dt':
+            return "H5"
+        if tag == 'dd':
+            return "BODY"
+        
+        return "BODY"
+    
+    def _process_table(self, table: Tag):
+        """테이블 처리 - 구조 데이터 포함"""
+        
+        # 캡션
+        caption = table.find('caption')
+        caption_text = ""
+        if caption:
+            caption_text = caption.get_text(strip=True)
+            if caption_text and not self._is_duplicate(caption_text):
+                self.elements.append(StyledElement(
+                    role="TABLE_CAPTION",
+                    text=caption_text,
+                    tag="caption",
+                    html=str(caption)[:100],
+                    section=self.current_section.value,
+                    attributes={}
+                ))
+        
+        # 🆕 표 구조 데이터 수집
+        table_data = {'rows': [], 'caption': caption_text}
+        
+        for tr in table.find_all('tr'):
+            row = []
+            for cell in tr.find_all(['th', 'td']):
+                cell_info = {
+                    'text': cell.get_text(strip=True),
+                    'is_header': cell.name == 'th',
+                    'colspan': int(cell.get('colspan', 1)),
+                    'rowspan': int(cell.get('rowspan', 1)),
+                    'bg_color': self._extract_bg_color(cell),
+                }
+                row.append(cell_info)
+            if row:
+                table_data['rows'].append(row)
+        
+        # 🆕 TABLE 요소로 추가 (개별 TH/TD 대신)
+        if table_data['rows']:
+            self.elements.append(StyledElement(
+                role="TABLE",
+                text=f"[표: {len(table_data['rows'])}행]",
+                tag="table",
+                html=str(table)[:200],
+                section=self.current_section.value,
+                attributes={'table_data': table_data}
+            ))
+
+    def _extract_bg_color(self, element: Tag) -> str:
+        """요소에서 배경색 추출"""
+        style = element.get('style', '')
+        
+        # background-color 추출
+        match = re.search(r'background-color:\s*([^;]+)', style)
+        if match:
+            return self._normalize_color(match.group(1))
+        
+        # bgcolor 속성
+        bgcolor = element.get('bgcolor', '')
+        if bgcolor:
+            return self._normalize_color(bgcolor)
+        
+        return ''
+    
+    def _process_figure(self, element: Tag):
+        """그림 처리"""
+        img = element.find('img') if element.name == 'figure' else element
+        
+        if img and img.name == 'img':
+            src = img.get('src', '')
+            alt = img.get('alt', '')
+            
+            if src:  # src가 있을 때만 추가
+                self.elements.append(StyledElement(
+                    role="FIGURE",
+                    text=alt or "이미지",
+                    tag="img",
+                    html=str(img)[:100],
+                    section=self.current_section.value,
+                    attributes={"src": src, "alt": alt}
+                ))
+        
+        # 캡션
+        if element.name == 'figure':
+            figcaption = element.find('figcaption')
+            if figcaption:
+                text = figcaption.get_text(strip=True)
+                if text and not self._should_skip_text(text):
+                    self.elements.append(StyledElement(
+                        role="FIGURE_CAPTION",
+                        text=text,
+                        tag="figcaption",
+                        html=str(figcaption)[:100],
+                        section=self.current_section.value,
+                        attributes={}
+                    ))
+    
+    def _postprocess(self):
+        """후처리: 불필요 요소 제거"""
+        print(f"  🧹 후처리 중... (처리 전: {len(self.elements)}개)")
+        
+        filtered = []
+        for elem in self.elements:
+            # 빈 텍스트 제거
+            if not elem.text or not elem.text.strip():
+                continue
+            
+            # 머리말/꼬리말 텍스트 제거
+            if self._is_header_footer_text(elem.text):
+                continue
+            
+            # 제거 패턴 체크
+            skip = False
+            for pattern in self.REMOVE_PATTERNS:
+                if pattern.match(elem.text.strip()):
+                    skip = True
+                    break
+            
+            if not skip:
+                filtered.append(elem)
+        
+        self.elements = filtered
+        print(f"     - 처리 후: {len(self.elements)}개")
+    
+    def get_role_summary(self) -> Dict[str, int]:
+        """역할별 요소 수 요약"""
+        summary = {}
+        for elem in self.elements:
+            summary[elem.role] = summary.get(elem.role, 0) + 1
+        return dict(sorted(summary.items()))
+
+
+    def extract_css_styles(self, html: str) -> Dict[str, Dict]:
+        """
+        HTML에서 역할별 CSS 스타일 추출
+        Returns: {역할: {font_size, color, bold, ...}}
+        """
+        soup = BeautifulSoup(html, 'html.parser')
+        role_styles = {}
+        
+        # <style> 태그에서 CSS 파싱
+        style_tag = soup.find('style')
+        if style_tag:
+            css_text = style_tag.string or ''
+            role_styles.update(self._parse_css_rules(css_text))
+        
+        # 인라인 스타일에서 추출 (요소별)
+        for elem in self.elements:
+            if elem.role not in role_styles:
+                role_styles[elem.role] = self._extract_inline_style(elem.html)
+        
+        return role_styles
+
+    def _parse_css_rules(self, css_text: str) -> Dict[str, Dict]:
+        """CSS 텍스트에서 규칙 파싱"""
+        import re
+        rules = {}
+        
+        # h1, h2, .section-title 등의 패턴
+        pattern = r'([^{]+)\{([^}]+)\}'
+        for match in re.finditer(pattern, css_text):
+            selector = match.group(1).strip()
+            properties = match.group(2)
+            
+            style = {}
+            for prop in properties.split(';'):
+                if ':' in prop:
+                    key, value = prop.split(':', 1)
+                    key = key.strip().lower()
+                    value = value.strip()
+                    
+                    if key == 'font-size':
+                        style['font_size'] = self._parse_font_size(value)
+                    elif key == 'color':
+                        style['color'] = self._normalize_color(value)
+                    elif key == 'font-weight':
+                        style['bold'] = value in ['bold', '700', '800', '900']
+                    elif key == 'text-align':
+                        style['align'] = value
+            
+            # 셀렉터 → 역할 매핑
+            role = self._selector_to_role(selector)
+            if role:
+                rules[role] = style
+        
+        return rules
+
+    def _selector_to_role(self, selector: str) -> str:
+        """CSS 셀렉터 → 역할 매핑"""
+        selector = selector.lower().strip()
+        mapping = {
+            'h1': 'H1', 'h2': 'H2', 'h3': 'H3', 'h4': 'H4',
+            '.cover-title': 'COVER_TITLE',
+            '.section-title': 'H1',
+            'th': 'TH', 'td': 'TD',
+            'p': 'BODY',
+        }
+        for key, role in mapping.items():
+            if key in selector:
+                return role
+        return None
+
+    def _parse_font_size(self, value: str) -> float:
+        """폰트 크기 파싱 (pt 단위로 변환)"""
+        import re
+        match = re.search(r'([\d.]+)(pt|px|em|rem)?', value)
+        if match:
+            size = float(match.group(1))
+            unit = match.group(2) or 'pt'
+            if unit == 'px':
+                size = size * 0.75  # px → pt
+            elif unit in ['em', 'rem']:
+                size = size * 11  # 기본 11pt 기준
+            return size
+        return 11.0
+
+    def _normalize_color(self, value: str) -> str:
+        """색상값 정규화 (#RRGGBB)"""
+        import re
+        value = value.strip().lower()
+        
+        # 이미 #rrggbb 형식
+        if re.match(r'^#[0-9a-f]{6}$', value):
+            return value.upper()
+        
+        # #rgb → #rrggbb
+        if re.match(r'^#[0-9a-f]{3}$', value):
+            return f'#{value[1]*2}{value[2]*2}{value[3]*2}'.upper()
+        
+        # rgb(r, g, b)
+        match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', value)
+        if match:
+            r, g, b = int(match.group(1)), int(match.group(2)), int(match.group(3))
+            return f'#{r:02X}{g:02X}{b:02X}'
+        
+        # 색상 이름
+        color_names = {
+            'black': '#000000', 'white': '#FFFFFF',
+            'red': '#FF0000', 'green': '#008000', 'blue': '#0000FF',
+            'navy': '#1A365D',
+        }
+        return color_names.get(value, '#000000')
+
+    def _extract_inline_style(self, html: str) -> Dict:
+        """HTML 요소에서 인라인 스타일 추출"""
+        style = {}
+        
+        # style 속성 찾기
+        match = re.search(r'style\s*=\s*["\']([^"\']+)["\']', html)
+        if match:
+            style_str = match.group(1)
+            for prop in style_str.split(';'):
+                if ':' in prop:
+                    key, value = prop.split(':', 1)
+                    key = key.strip().lower()
+                    value = value.strip()
+                    
+                    if key == 'font-size':
+                        style['font_size'] = self._parse_font_size(value)
+                    elif key == 'color':
+                        style['color'] = self._normalize_color(value)
+                    elif key == 'font-weight':
+                        style['bold'] = value in ['bold', '700', '800', '900']
+                    elif key == 'text-align':
+                        style['align'] = value
+                    elif key == 'background-color':
+                        style['bg_color'] = self._normalize_color(value)
+        
+        return style
+
+    def _extract_bg_color(self, element) -> str:
+        """요소에서 배경색 추출"""
+        if not hasattr(element, 'get'):
+            return ''
+        
+        style = element.get('style', '')
+        
+        # background-color 추출
+        match = re.search(r'background-color:\s*([^;]+)', style)
+        if match:
+            return self._normalize_color(match.group(1))
+        
+        # bgcolor 속성
+        bgcolor = element.get('bgcolor', '')
+        if bgcolor:
+            return self._normalize_color(bgcolor)
+        
+        return ''
+
+
+    def export_for_hwp(self) -> List[Dict]:
+            """HWP 변환용 데이터 내보내기"""
+            return [
+                {
+                    "role": e.role,
+                    "text": e.text,
+                    "tag": e.tag,
+                    "section": e.section,
+                    "attributes": e.attributes
+                }
+                for e in self.elements
+            ]
+
+
+if __name__ == "__main__":
+    # 테스트
+    test_html = """
+    <html>
+    <head>
+        <script>var x = 1;</script>
+        <style>.test { color: red; }</style>
+    </head>
+    <body>
+        <div class="sheet">
+            <div class="page-header">건설·토목 측량 DX 실무지침</div>
+            <div class="body-content">
+                <h1>1 DX 개요와 기본 개념</h1>
+                <h2>1.1 측량 DX 프레임</h2>
+                <h3>1.1.1 측량 DX 발전 단계</h3>
+                <p>1) <strong>Digitization 정의</strong></p>
+                <p>본문 내용입니다. 이것은 충분히 긴 텍스트로 본문으로 인식되어야 합니다.</p>
+                <p>(1) 단계별 정의 및 진화</p>
+                <p>측량 기술의 발전은 장비의 변화와 성과물의 차원에 따라 구분된다.</p>
+            </div>
+            <div class="page-footer">건설·토목 측량 DX 실무지침- 1 -</div>
+        </div>
+        
+        <div class="sheet">
+            <div class="page-header">건설·토목 측량 DX 실무지침</div>
+            <div class="body-content">
+                <p>① 첫 번째 항목</p>
+                <table>
+                    <caption>표 1. 데이터 비교</caption>
+                    <tr><th>구분</th><th>내용</th></tr>
+                    <tr><td>항목1</td><td>설명1</td></tr>
+                </table>
+            </div>
+            <div class="page-footer">건설·토목 측량 DX 실무지침- 2 -</div>
+        </div>
+    </body>
+    </html>
+    """
+    
+    analyzer = StyleAnalyzer()
+    elements = analyzer.analyze(test_html)
+    
+    print("\n" + "="*60)
+    print("분석 결과")
+    print("="*60)
+    for elem in elements:
+        print(f"  {elem.role:18} | {elem.section:7} | {elem.text[:50]}")
+    
+    print("\n" + "="*60)
+    print("역할 요약")
+    print("="*60)
+    for role, count in analyzer.get_role_summary().items():
+        print(f"  {role}: {count}")