v8:문서유형 분석등록 및 추출_20260206

2026-02-20 11:46:52 +09:00
parent db6532b33c
commit c3e9e29205
57 changed files with 22138 additions and 1421 deletions
--- a/handlers/init.py
+++ b/handlers/init.py
@@ -2,4 +2,6 @@
 """
 handlers 패키지
 문서 유형별 처리 로직을 분리하여 관리
-"""
+"""
+
+from .doc_template_analyzer import DocTemplateAnalyzer
--- a/handlers/content_analyzer.py
+++ b/handlers/content_analyzer.py
@@ -0,0 +1,640 @@
+# -*- coding: utf-8 -*-
+"""
+Content Analyzer (Phase 3 — Layer A)
+- template_info + semantic_map → content_prompt.json
+- 각 placeholder의 의미/유형/예시값/작성 패턴 추출
+- Phase 5에서 AI가 새 문서 생성 시 "레시피"로 참조
+
+★ 원칙: 모든 분류는 코드 100% (AI 없음)
+   purpose_hint / audience_hint / tone_hint는 빈 문자열로 남김
+   → 추후 AI enrichment 단계에서 채울 수 있도록 설계
+"""
+
+import re
+
+
+def generate(template_info: dict, semantic_map: dict,
+             parsed: dict = None) -> dict:
+    """
+    content_prompt.json 생성
+
+    Args:
+        template_info: doc_template_analyzer 추출 결과
+        semantic_map: semantic_mapper 분류 결과
+        parsed: HWPX 파싱 원본 (선택)
+
+    Returns:
+        content_prompt.json 구조
+    """
+    placeholders = {}
+    table_guide = {}
+
+    # ① 문서 기본 정보
+    document = _analyze_document(template_info)
+
+    # ② 헤더 placeholders
+    _analyze_header(template_info, placeholders)
+
+    # ③ 푸터 placeholders
+    _analyze_footer(template_info, placeholders)
+
+    # ④ 제목 placeholder
+    _analyze_title(template_info, semantic_map, placeholders)
+
+    # ⑤ 섹션 placeholders
+    _analyze_sections(semantic_map, placeholders, template_info)
+
+    # ⑤-b content_order 기반 문단/이미지 placeholders
+    _analyze_content_order(template_info, semantic_map, placeholders)
+
+    # ⑥ 표 가이드 + placeholders
+    _analyze_tables(template_info, semantic_map,
+                    placeholders, table_guide)
+
+    # ⑦ 작성 패턴
+    writing_guide = _analyze_writing_patterns(template_info, semantic_map)
+
+    return {
+        "version": "1.0",
+        "document": document,
+        "placeholders": placeholders,
+        "table_guide": table_guide,
+        "writing_guide": writing_guide
+    }
+
+
+# ================================================================
+#  문서 기본 정보
+# ================================================================
+
+def _analyze_document(template_info: dict) -> dict:
+    """문서 레벨 정보 추출"""
+    page = template_info.get("page", {})
+    paper = page.get("paper", {})
+
+    return {
+        "paper": paper.get("name", "A4"),
+        "layout": "landscape" if paper.get("landscape") else "portrait",
+        "margins": page.get("margins", {}),
+        "purpose_hint": "",    # AI enrichment 예약
+        "audience_hint": "",   # AI enrichment 예약
+        "tone_hint": ""        # AI enrichment 예약
+    }
+
+
+# ================================================================
+#  텍스트 유형 분류 (코드 100%, AI 없음)
+# ================================================================
+
+def _classify_text(text: str) -> dict:
+    """텍스트 패턴으로 콘텐츠 유형 분류"""
+    text = text.strip()
+    if not text:
+        return {"type": "empty", "pattern": "빈 셀"}
+
+    # 날짜: "2025. 1. 30(금)", "2025-01-30", "2025.01.30"
+    if re.match(r'\d{4}[\.\-/]\s*\d{1,2}[\.\-/]\s*\d{1,2}', text):
+        return {"type": "date", "pattern": "날짜 (YYYY. M. D)"}
+
+    # ★ 직급+이름 (부서보다 먼저!)
+    positions = [
+        '사원', '대리', '과장', '차장', '부장', '이사', '상무', '전무',
+        '연구원', '선임연구원', '책임연구원', '수석연구원',
+        '주임', '계장', '팀장', '실장', '부서장', '센터장'
+    ]
+    for pos in positions:
+        if pos in text:
+            return {"type": "author", "pattern": f"이름 + 직급({pos})"}
+
+    # 부서 (직급 아닌 것만 여기로)
+    if re.search(r'(실|부|국|과|원|처|센터|본부)$', text) and len(text) <= 12:
+        return {"type": "department", "pattern": "조직명"}
+
+    # 팀
+    if re.search(r'팀$', text) and len(text) <= 10:
+        return {"type": "team", "pattern": "팀명"}
+
+    # 페이지 참조: "1p", "2p"
+    if re.match(r'\d+p$', text):
+        return {"type": "page_ref", "pattern": "페이지 참조"}
+
+    # 문서 제목: ~계획(안), ~보고서, ~제안서 등
+    if re.search(r'(계획|보고서|제안서|기획서|결과|방안|현황|분석)'
+                 r'\s*(\(안\))?\s*$', text):
+        return {"type": "doc_title", "pattern": "문서 제목"}
+
+    # 슬로건/비전 (길고 추상적 키워드 포함)
+    if len(text) > 10 and any(k in text for k in
+                              ['함께', '세상', '미래', '가치', '만들어']):
+        return {"type": "slogan", "pattern": "회사 슬로건/비전"}
+
+    # 기본
+    return {"type": "text", "pattern": "자유 텍스트"}
+
+
+# ================================================================
+#  헤더 분석
+# ================================================================
+
+def _analyze_header(template_info: dict, placeholders: dict):
+    """헤더 영역 placeholder 분석"""
+    header = template_info.get("header", {})
+    if not header or not header.get("exists"):
+        return
+
+    if header.get("type") == "table" and header.get("table"):
+        _analyze_table_area(header["table"], "HEADER", "header",
+                            placeholders)
+    else:
+        texts = header.get("texts", [])
+        for i in range(max(len(texts), 1)):
+            ph = f"HEADER_TEXT_{i+1}"
+            example = texts[i] if i < len(texts) else ""
+            info = _classify_text(example)
+            info["example"] = example.strip()
+            info["location"] = "header"
+            placeholders[ph] = info
+
+
+# ================================================================
+#  푸터 분석
+# ================================================================
+
+def _analyze_footer(template_info: dict, placeholders: dict):
+    """푸터 영역 placeholder 분석"""
+    footer = template_info.get("footer", {})
+    if not footer or not footer.get("exists"):
+        return
+
+    if footer.get("type") == "table" and footer.get("table"):
+        _analyze_table_area(footer["table"], "FOOTER", "footer",
+                            placeholders)
+    else:
+        placeholders["PAGE_NUMBER"] = {
+            "type": "page_number",
+            "pattern": "페이지 번호",
+            "example": "1",
+            "location": "footer"
+        }
+
+
+# ================================================================
+#  헤더/푸터 공통: 표 형태 영역 분석
+# ================================================================
+
+def _analyze_table_area(tbl: dict, prefix: str, location: str,
+                        placeholders: dict):
+    """표 형태의 헤더/푸터 → placeholder 매핑
+
+    Args:
+        tbl: header["table"] 또는 footer["table"]
+        prefix: "HEADER" 또는 "FOOTER"
+        location: "header" 또는 "footer"
+        placeholders: 결과 dict (in-place 수정)
+    """
+    rows = tbl.get("rows", [])
+
+    for r_idx, row in enumerate(rows):
+        for c_idx, cell in enumerate(row):
+            lines = cell.get("lines", [])
+
+            if len(lines) > 1:
+                for l_idx, line_text in enumerate(lines):
+                    ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}_LINE_{l_idx+1}"
+                    info = _classify_text(line_text)
+                    info["example"] = line_text.strip()
+                    info["location"] = location
+                    placeholders[ph] = info
+            elif lines:
+                ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}"
+                info = _classify_text(lines[0])
+                info["example"] = lines[0].strip()
+                info["location"] = location
+                placeholders[ph] = info
+            else:
+                ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}"
+                placeholders[ph] = {
+                    "type": "empty",
+                    "pattern": "빈 셀 (로고/여백)",
+                    "example": "",
+                    "location": location
+                }
+
+
+# ================================================================
+#  제목 분석
+# ================================================================
+
+def _analyze_title(template_info: dict, semantic_map: dict,
+                   placeholders: dict):
+    """제목 블록 placeholder 분석
+
+    ★ v1.1: template_manager._build_title_block_html()과 동일한
+       TITLE_R{r}_C{c} 명명 규칙 사용 (범용 매핑)
+    """
+    title_idx = semantic_map.get("title_table")
+    if title_idx is None:
+        return
+
+    tables = template_info.get("tables", [])
+    title_tbl = next((t for t in tables if t["index"] == title_idx), None)
+    if not title_tbl:
+        return
+
+    # 각 셀별로 placeholder 생성 (template과 동일한 이름)
+    for r_idx, row in enumerate(title_tbl.get("rows", [])):
+        for c_idx, cell in enumerate(row):
+            cell_text = cell.get("text", "").strip()
+            if not cell_text:
+                continue  # 빈 셀은 template에서도 placeholder 없음
+
+            ph_name = f"TITLE_R{r_idx+1}_C{c_idx+1}"
+            info = _classify_text(cell_text)
+            if "title" not in info["type"] and "doc_title" not in info["type"]:
+                # 제목표 안의 텍스트가 doc_title이 아닐 수도 있음 (부제 등)
+                # 가장 긴 텍스트만 doc_title로 분류
+                pass
+            info["example"] = cell_text
+            info["location"] = "title_block"
+            placeholders[ph_name] = info
+
+    # 가장 긴 텍스트를 가진 셀을 doc_title로 마킹
+    longest_ph = None
+    longest_len = 0
+    for ph_key in list(placeholders.keys()):
+        if ph_key.startswith("TITLE_R"):
+            ex = placeholders[ph_key].get("example", "")
+            if len(ex) > longest_len:
+                longest_len = len(ex)
+                longest_ph = ph_key
+    if longest_ph:
+        placeholders[longest_ph]["type"] = "doc_title"
+        placeholders[longest_ph]["pattern"] = "문서 제목"
+
+
+# ================================================================
+#  섹션 분석
+# ================================================================
+
+def _analyze_sections(semantic_map: dict, placeholders: dict,
+                      template_info: dict = None):
+    """섹션 placeholder 분석.
+
+    content_order에 문단이 있으면 SECTION_n_CONTENT는 생략
+    (개별 PARA_n이 본문 역할을 대신함).
+    """
+    sections = semantic_map.get("sections", [])
+
+    # content_order에 문단이 있으면 개별 PARA_n이 본문 담당 → CONTENT 불필요
+    has_co_paragraphs = False
+    if template_info:
+        co = template_info.get("content_order", [])
+        has_co_paragraphs = any(c['type'] == 'paragraph' for c in co) if co else False
+
+    if not sections:
+        placeholders["SECTION_1_TITLE"] = {
+            "type": "section_title", "pattern": "섹션 제목",
+            "example": "", "location": "body"
+        }
+        if not has_co_paragraphs:
+            placeholders["SECTION_1_CONTENT"] = {
+                "type": "section_content", "pattern": "섹션 본문",
+                "example": "", "location": "body"
+            }
+        return
+
+    for i, sec in enumerate(sections):
+        s_num = i + 1
+        title_text = sec if isinstance(sec, str) else sec.get("title", "")
+
+        placeholders[f"SECTION_{s_num}_TITLE"] = {
+            "type": "section_title", "pattern": "섹션 제목",
+            "example": title_text, "location": "body"
+        }
+        if not has_co_paragraphs:
+            placeholders[f"SECTION_{s_num}_CONTENT"] = {
+                "type": "section_content", "pattern": "섹션 본문",
+                "example": "", "location": "body"
+            }
+
+# ================================================================
+#  content_order 기반 문단/이미지 분석 (v5.2+)
+# ================================================================
+
+def _analyze_content_order(template_info: dict, semantic_map: dict,
+                           placeholders: dict):
+    """content_order의 paragraph/image → PARA_n, IMAGE_n placeholder 생성.
+
+    content_order가 없거나 문단이 없으면 아무것도 안 함 (legacy 호환).
+    """
+    content_order = template_info.get("content_order")
+    if not content_order:
+        return
+    if not any(c['type'] == 'paragraph' for c in content_order):
+        return
+
+    # 섹션 제목 패턴 (template_manager와 동일)
+    sec_patterns = [
+        re.compile(r'^\d+\.\s+\S'),
+        re.compile(r'^[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]\.\s*\S'),
+        re.compile(r'^제\s*\d+\s*[장절항]\s*\S'),
+    ]
+
+    para_num = 0
+    img_num = 0
+    section_num = 0
+
+    for item in content_order:
+        itype = item['type']
+
+        if itype == 'empty':
+            continue
+
+        # ── 표: _analyze_tables에서 처리 → 건너뛰기 ──
+        if itype == 'table':
+            continue
+
+        # ── 이미지 ──
+        if itype == 'image':
+            img_num += 1
+            placeholders[f"IMAGE_{img_num}"] = {
+                "type": "image",
+                "pattern": "이미지",
+                "example_ref": item.get("binaryItemIDRef", ""),
+                "location": "body"
+            }
+            caption = item.get("text", "")
+            if caption:
+                placeholders[f"IMAGE_{img_num}_CAPTION"] = {
+                    "type": "image_caption",
+                    "pattern": "이미지 캡션",
+                    "example": caption,
+                    "location": "body"
+                }
+            continue
+
+        # ── 문단 ──
+        if itype == 'paragraph':
+            text = item.get('text', '')
+
+            # 섹션 제목 → SECTION_n_TITLE (이미 _analyze_sections에서 등록됐을 수 있음)
+            if any(p.match(text) for p in sec_patterns):
+                section_num += 1
+                ph = f"SECTION_{section_num}_TITLE"
+                if ph not in placeholders:
+                    placeholders[ph] = {
+                        "type": "section_title",
+                        "pattern": "섹션 제목",
+                        "example": text,
+                        "location": "body"
+                    }
+                continue
+
+            # 일반 문단
+            para_num += 1
+            runs = item.get('runs', [])
+
+            if len(runs) > 1:
+                # 다중 run → 각 run별 placeholder
+                for r_idx, run in enumerate(runs):
+                    ph = f"PARA_{para_num}_RUN_{r_idx+1}"
+                    run_text = run.get("text", "")
+                    info = _classify_text(run_text)
+                    info["example"] = run_text[:100] if len(run_text) > 100 else run_text
+                    info["location"] = "body"
+                    info["run_index"] = r_idx + 1
+                    placeholders[ph] = info
+            else:
+                ph = f"PARA_{para_num}"
+                info = _classify_text(text)
+                info["example"] = text[:100] if len(text) > 100 else text
+                info["location"] = "body"
+                placeholders[ph] = info
+
+
+# ================================================================
+#  표 분석 → placeholder + 표 가이드
+# ================================================================
+
+def _analyze_tables(template_info: dict, semantic_map: dict,
+                    placeholders: dict, table_guide: dict):
+    """본문 데이터 표 → placeholder + table_guide"""
+    tables = template_info.get("tables", [])
+    body_indices = semantic_map.get("body_tables", [])
+    table_roles = semantic_map.get("table_roles", {})
+
+    for tbl_num_0, tbl_idx in enumerate(body_indices):
+        tbl_num = tbl_num_0 + 1
+        tbl = next((t for t in tables if t["index"] == tbl_idx), None)
+        if not tbl:
+            continue
+
+        role_info = table_roles.get(tbl_idx, table_roles.get(str(tbl_idx), {}))
+        col_headers = role_info.get("col_headers", [])
+        col_cnt = len(col_headers) if col_headers else tbl.get("colCnt", 0)
+
+        # ── 헤더 placeholder ──
+        for c_idx, h_text in enumerate(col_headers):
+            ph = f"TABLE_{tbl_num}_H_C{c_idx+1}"
+            placeholders[ph] = {
+                "type": "table_header", "pattern": "표 열 제목",
+                "example": h_text, "location": f"table_{tbl_num}"
+            }
+
+        # ── BODY placeholder ──
+        placeholders[f"TABLE_{tbl_num}_BODY"] = {
+            "type": "table_body",
+            "pattern": "표 데이터 행들 (HTML <tr> 반복)",
+            "example": "",
+            "location": f"table_{tbl_num}"
+        }
+
+        # ── 표 가이드 ──
+        table_guide[str(tbl_num)] = {
+            "col_headers": col_headers,
+            "col_count": col_cnt,
+            "row_count": tbl.get("rowCnt", 0),
+            "merge_pattern": _detect_merge_pattern(tbl),
+            "bullet_chars": _detect_bullet_chars(tbl),
+            "example_rows": _extract_example_rows(tbl, role_info),
+            "col_types": _classify_columns(col_headers),
+            "row_bf_pattern": _extract_row_bf_pattern(tbl, role_info),           
+        }
+
+
+def _detect_merge_pattern(tbl: dict) -> dict:
+    """셀 병합 패턴 감지"""
+    pattern = {}
+    for row in tbl.get("rows", []):
+        for cell in row:
+            col = cell.get("colAddr", 0)
+            if cell.get("rowSpan", 1) > 1:
+                pattern.setdefault(f"col_{col}", "row_group")
+            if cell.get("colSpan", 1) > 1:
+                pattern.setdefault(f"col_{col}", "col_span")
+    return pattern
+
+
+def _detect_bullet_chars(tbl: dict) -> list:
+    """표 셀 텍스트에서 불릿 문자 감지"""
+    bullets = set()
+    pats = [
+        (r'^-\s',  '- '),  (r'^·\s',  '· '),  (r'^•\s',  '• '),
+        (r'^▸\s',  '▸ '),  (r'^▶\s',  '▶ '),  (r'^※\s',  '※ '),
+        (r'^◈\s',  '◈ '),  (r'^○\s',  '○ '),  (r'^●\s',  '● '),
+    ]
+    for row in tbl.get("rows", []):
+        for cell in row:
+            for line in cell.get("lines", []):
+                for pat, char in pats:
+                    if re.match(pat, line.strip()):
+                        bullets.add(char)
+    return sorted(bullets)
+
+
+def _extract_example_rows(tbl: dict, role_info: dict) -> list:
+    """데이터 행에서 예시 최대 3행 추출"""
+    rows = tbl.get("rows", [])
+    header_row = role_info.get("header_row")
+    if header_row is None:
+        header_row = -1
+
+    examples = []
+    for r_idx, row in enumerate(rows):
+        if r_idx <= header_row:
+            continue
+        row_data = []
+        for cell in row:
+            text = cell.get("text", "").strip()
+            if len(text) > 80:
+                text = text[:77] + "..."
+            row_data.append(text)
+        examples.append(row_data)
+        if len(examples) >= 3:
+            break
+    return examples
+
+
+def _classify_columns(col_headers: list) -> list:
+    """열 헤더 키워드로 용도 추론"""
+    type_map = {
+        "category": ['구분', '분류', '항목', '카테고리'],
+        "content":  ['내용', '설명', '상세', '세부내용'],
+        "note":     ['비고', '참고', '기타', '메모'],
+        "date":     ['날짜', '일자', '일시', '기간'],
+        "person":   ['담당', '담당자', '작성자', '책임'],
+        "number":   ['수량', '금액', '단가', '합계'],
+    }
+    result = []
+    for c_idx, header in enumerate(col_headers):
+        h = header.strip()
+        col_type = "text"
+        for t, keywords in type_map.items():
+            if h in keywords:
+                col_type = t
+                break
+        result.append({"col": c_idx, "type": col_type, "header": h})
+    return result
+
+def _extract_row_bf_pattern(tbl: dict, role_info: dict) -> list:
+    """첫 데이터행의 셀별 borderFillIDRef → 열별 bf class 패턴.
+
+    AI가 TABLE_BODY <td> 생성 시 class="bf-{id}" 적용하도록 안내.
+    예: [{"col": 0, "bf_class": "bf-12"}, {"col": 1, "bf_class": "bf-8"}, ...]
+    """
+    rows = tbl.get("rows", [])
+    header_row = role_info.get("header_row")
+    if header_row is None:
+        header_row = -1
+
+    # 첫 데이터행 찾기
+    for r_idx, row in enumerate(rows):
+        if r_idx <= header_row:
+            continue
+        pattern = []
+        for cell in row:
+            bf_id = cell.get("borderFillIDRef")
+            pattern.append({
+                "col": cell.get("colAddr", len(pattern)),
+                "bf_class": f"bf-{bf_id}" if bf_id else "",
+                "colSpan": cell.get("colSpan", 1),
+                "rowSpan": cell.get("rowSpan", 1),
+            })
+        return pattern
+
+    return []
+# ================================================================
+#  작성 패턴 분석
+# ================================================================
+
+def _analyze_writing_patterns(template_info: dict,
+                              semantic_map: dict) -> dict:
+    """문서 전체의 작성 패턴 분석"""
+    result = {
+        "bullet_styles": [],
+        "numbering_patterns": [],
+        "avg_line_length": 0,
+        "font_primary": "",
+        "font_size_body": ""
+    }
+
+    # ── 불릿 수집 (모든 표 텍스트) ──
+    all_bullets = set()
+    tables = template_info.get("tables", [])
+    for tbl in tables:
+        for row in tbl.get("rows", []):
+            for cell in row:
+                for line in cell.get("lines", []):
+                    if re.match(r'^[-·•▸▶※◈○●]\s', line.strip()):
+                        all_bullets.add(line.strip()[0] + " ")
+
+    # ── numbering tools 데이터 ──
+    numbering = template_info.get("numbering", {})
+    for num in numbering.get("numberings", []):
+        levels = num.get("levels", [])
+        patterns = [lv.get("pattern", "") for lv in levels[:3]]
+        if patterns:
+            result["numbering_patterns"].append(patterns)
+
+    for b in numbering.get("bullets", []):
+        char = b.get("char", "")
+        if char:
+            all_bullets.add(char + " ")
+
+    result["bullet_styles"] = sorted(all_bullets)
+
+# ── 평균 라인 길이 ──
+    lengths = []
+    for tbl in tables:
+        for row in tbl.get("rows", []):
+            for cell in row:
+                for line in cell.get("lines", []):
+                    if line.strip():
+                        lengths.append(len(line.strip()))
+
+    # content_order 문단 텍스트도 포함
+    content_order = template_info.get("content_order", [])
+    for item in content_order:
+        if item['type'] == 'paragraph':
+            text = item.get('text', '').strip()
+            if text:
+                lengths.append(len(text))
+                # 불릿 감지도 추가
+                if re.match(r'^[-·•▸▶※◈○●]\s', text):
+                    all_bullets.add(text[0] + " ")
+
+    if lengths:
+        result["avg_line_length"] = round(sum(lengths) / len(lengths))
+
+    # ── 주요 폰트 ──
+    fonts = template_info.get("fonts", {})
+    hangul = fonts.get("HANGUL", [])
+    if hangul and isinstance(hangul, list) and len(hangul) > 0:
+        result["font_primary"] = hangul[0].get("face", "")
+
+    # ── 본문 글자 크기 (char_styles id=0 기본) ──
+    char_styles = template_info.get("char_styles", [])
+    if char_styles:
+        result["font_size_body"] = f"{char_styles[0].get('height_pt', 10)}pt"
+
+    return result
--- a/handlers/custom_doc_type.py
+++ b/handlers/custom_doc_type.py
@@ -0,0 +1,555 @@
+# -*- coding: utf-8 -*-
+"""
+사용자 정의 문서 유형 프로세서 (v2.1 - 템플릿 기반)
+- template.html 로드
+- config.json의 구조/가이드 활용
+- 사용자 입력 내용을 템플릿에 정리하여 채움
+- 창작 X, 정리/재구성 O
+
+★ v2.1 변경사항:
+- 한글 포함 placeholder 지원 (TABLE_1_H_구분 등)
+- TABLE_*_BODY / TABLE_*_H_* placeholder 구분 처리
+- 개조식 항목 <ul class="bullet-list"> 래핑
+- 페이지 분량 제한 프롬프트 강화
+- 헤더/푸터 다중행 placeholder 설명 추가
+"""
+
+import json
+import re
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from .template_manager import TemplateManager
+from pathlib import Path
+from .common import call_claude, extract_html
+
+
+# ★ 한글 포함 placeholder 정규식 (영문 + 숫자 + 언더스코어 + 한글)
+PH_PATTERN = re.compile(r'\{\{([A-Za-z0-9_\uAC00-\uD7AF]+)\}\}')
+
+
+class CustomDocTypeProcessor:
+    """사용자 정의 문서 유형 처리기 (템플릿 기반)"""
+    
+    def __init__(self):
+        self.doc_types_user = Path('templates/user/doc_types')
+        self.template_manager = TemplateManager()
+    
+    def load_config(self, doc_type_id: str) -> dict:
+        """config.json 로드"""
+        config_path = self.doc_types_user / doc_type_id / 'config.json'
+        if not config_path.exists():
+            raise FileNotFoundError(f"문서 유형을 찾을 수 없습니다: {doc_type_id}")
+        
+        with open(config_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    def load_content_prompt(self, doc_type_id: str, template_id: str = None) -> dict:
+        """content_prompt.json 로드 (doc_type 우선 → template fallback)"""
+        # ① doc_type 폴더
+        path = self.doc_types_user / doc_type_id / 'content_prompt.json'
+        if path.exists():
+            with open(path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        
+        # ② template 폴더 fallback
+        if template_id:
+            tpl_path = Path('templates/user/templates') / template_id / 'content_prompt.json'
+            if tpl_path.exists():
+                with open(tpl_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+        
+        return {}
+
+    def load_template(self, doc_type_id: str) -> str:
+        """template.html 로드 — template_manager 경유 (분리 구조)"""
+        # ① config에서 template_id 확인
+        config = self.load_config(doc_type_id)
+        tpl_id = config.get("template_id")
+        
+        if tpl_id:
+            # ★ 새 구조: template_manager에서 로드
+            tpl_data = self.template_manager.load_template(tpl_id)
+            if "html" in tpl_data:
+                return tpl_data["html"]
+        
+        # ★ 하위 호환: 레거시 방식 (같은 폴더의 template.html)
+        template_path = self.doc_types_user / doc_type_id / 'template.html'
+        if template_path.exists():
+            with open(template_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        
+        return None
+    
+    def generate(self, content: str, doc_type_id: str, options: dict = None,
+                 image_data: dict = None) -> dict:
+        """문서 생성 - 템플릿 + 사용자 입력
+
+        Args:
+            content: 사용자 입력 텍스트
+            doc_type_id: 문서 유형 ID
+            options: 추가 옵션 (instruction 등)
+            image_data: 이미지 dict {binaryItemIDRef: {"base64": ..., "mime": ...}}
+                        None이면 템플릿 폴더에서 자동 로드 시도
+        """
+        try:
+            config = self.load_config(doc_type_id)
+            template = self.load_template(doc_type_id)
+            
+            if template:
+                # 이미지 데이터 준비
+                if image_data is None:
+                    image_data = self._load_image_data(config)
+                result = self._generate_with_template(
+                    content, config, template, options, image_data
+                )
+            else:
+                result = self._generate_with_guide(content, config, options)
+            
+            return result
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
+    
+    def _generate_with_template(self, content: str, config: dict, 
+                                template: str, options: dict,
+                                image_data: dict = None) -> dict:
+        """템플릿 기반 생성 — content_prompt.json 활용"""
+        
+        context = config.get('context', {})
+        structure = config.get('structure', {})
+        instruction = options.get('instruction', '') if options else ''
+        
+        # ★ content_prompt 로드
+        doc_type_id = config.get('id', '')
+        template_id = config.get('template_id', '')
+        cp = self.load_content_prompt(doc_type_id, template_id)
+        
+        placeholders_info = cp.get('placeholders', {})
+        table_guide = cp.get('table_guide', {})
+        writing_guide = cp.get('writing_guide', {})
+        doc_info = cp.get('document', {})
+        
+        # ★ placeholder 가이드 생성 (type/pattern/example 포함)
+        ph_guide_lines = []
+        for ph_key, ph_info in placeholders_info.items():
+            ph_type = ph_info.get('type', 'text')
+            pattern = ph_info.get('pattern', '')
+            example = ph_info.get('example', '')
+            location = ph_info.get('location', '')
+            
+            line = f"  {ph_key}:"
+            line += f"\n    type: {ph_type}"
+            line += f"\n    pattern: {pattern}"
+            if example:
+                line += f"\n    example: \"{example}\""
+            line += f"\n    location: {location}"
+            ph_guide_lines.append(line)
+        
+        ph_guide = "\n".join(ph_guide_lines) if ph_guide_lines else "(no guide available)"
+        
+        # ★ 표 가이드 생성
+        tbl_guide_lines = []
+        for tbl_num, tbl_info in table_guide.items():
+            headers = tbl_info.get('col_headers', [])
+            col_types = tbl_info.get('col_types', [])
+            merge = tbl_info.get('merge_pattern', {})
+            bullets = tbl_info.get('bullet_chars', [])
+            examples = tbl_info.get('example_rows', [])
+            
+            tbl_guide_lines.append(f"\n### Table {tbl_num}:")
+            tbl_guide_lines.append(f"  Columns: {json.dumps(headers, ensure_ascii=False)}")
+            if col_types:
+                for ct in col_types:
+                    tbl_guide_lines.append(
+                        f"  Col {ct['col']} '{ct['header']}': {ct['type']}")
+            if merge:
+                tbl_guide_lines.append(f"  Merge: {json.dumps(merge, ensure_ascii=False)}")
+                tbl_guide_lines.append(
+                    f"  → row_group means: use rowspan to group rows by that column")
+            if bullets:
+                tbl_guide_lines.append(f"  Bullet chars: {bullets}")
+
+            # ★ row_bf_pattern 추가
+            bf_pattern = tbl_info.get('row_bf_pattern', [])
+            if bf_pattern:
+                tbl_guide_lines.append(f"  Row cell classes (apply to each <td>):")
+                for bp in bf_pattern:
+                    col = bp.get('col', '?')
+                    bf_cls = bp.get('bf_class', '')
+                    cs = bp.get('colSpan', 1)
+                    rs = bp.get('rowSpan', 1)
+                    span_info = ""
+                    if cs > 1: span_info += f" colSpan={cs}"
+                    if rs > 1: span_info += f" rowSpan={rs}"
+                    tbl_guide_lines.append(
+                        f'    col_{col}: class="{bf_cls}"{span_info}')
+
+            if examples:
+                tbl_guide_lines.append(f"  Example rows:")
+                for ex in examples[:2]:
+                    tbl_guide_lines.append(
+                        f"    {json.dumps(ex, ensure_ascii=False)}")
+        
+        tbl_guide = "\n".join(tbl_guide_lines) if tbl_guide_lines else "No table guide"
+        
+        # ★ 페이지 추정
+        page_estimate = structure.get('pageEstimate', 1)
+        
+        # ★ placeholder 키 목록 (from template)
+        placeholders = PH_PATTERN.findall(template)
+        placeholders = list(dict.fromkeys(placeholders))
+        
+        prompt = f"""Fill the template placeholders with reorganized content.
+
+## Document Definition
+{context.get('documentDefinition', 'structured document')}
+
+## Context
+- Type: {context.get('documentType', '')}
+- Purpose: {context.get('purpose', '')}
+- Audience: {context.get('audience', '')}
+- Tone: {context.get('tone', '')}
+- Layout: {doc_info.get('layout', 'portrait')}
+- Page limit: {page_estimate} page(s). Be CONCISE.
+
+## Writing Style
+- Bullet chars: {writing_guide.get('bullet_styles', ['- ', '· '])}
+- Primary font: {writing_guide.get('font_primary', '')}
+- Keep lines ~{writing_guide.get('avg_line_length', 25)} chars average
+
+## Placeholder Guide (type, pattern, example for each)
+{ph_guide}
+
+## Table Structure Guide
+{tbl_guide}
+
+## Input Content
+{content[:6000] if content else '(empty)'}
+
+## Additional Instructions
+{instruction if instruction else 'None'}
+
+## ALL Placeholders to fill (JSON keys):
+{json.dumps(placeholders, ensure_ascii=False)}
+
+## ★ Critical Rules
+1. Output ONLY valid JSON — every placeholder above as a key
+2. HEADER/FOOTER: use the PATTERN and modify the EXAMPLE for new content
+   - department → user's department or keep example
+   - author → user's name or keep example  
+   - date → today's date in same format
+   - slogan → keep exactly as example
+3. TITLE: create title matching doc_title pattern from input content
+4. TABLE_*_H_*: plain text column headers (use col_headers from guide)
+5. TABLE_*_BODY: HTML <tr> rows only (no <table> wrapper)
+   - Follow merge_pattern: row_group → use rowspan
+   - Use bullet_chars from guide inside cells
+   - Match example_rows structure
+5b. TABLE_*_BODY <td>: apply class from 'Row cell classes' guide\n
+   - e.g. <td class=\"bf-12\">content</td>\n
+6. SECTION_*_CONTENT: use bullet style from writing guide
+7. Empty string "" for inapplicable placeholders
+8. Do NOT invent content — reorganize input only
+9. PARA_*: reorganize input text for each paragraph placeholder
+   - Keep the meaning, improve clarity and structure
+   - PARA_n_RUN_m: if a paragraph has multiple runs, fill each run separately
+10. IMAGE_*: output exactly "KEEP_ORIGINAL" (image is auto-inserted from source)
+11. IMAGE_*_CAPTION: write a concise caption describing the image context
+12. Total volume: {page_estimate} page(s)
+
+Output ONLY valid JSON:"""
+
+        try:
+            response = call_claude(
+                "You fill document template placeholders with reorganized content. "
+                "Output valid JSON only. Respect the template structure exactly.",
+                prompt,
+                max_tokens=6000
+            )
+            
+            fill_data = self._extract_json(response)
+            
+            if not fill_data:
+                return {'error': 'JSON extraction failed', 'raw': response[:500]}
+            
+            html = self._fill_template(template, fill_data, image_data)
+            
+            return {'success': True, 'html': html}
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
+    
+    def _fill_template(self, template: str, data: dict,
+                       image_data: dict = None) -> str:
+        """템플릿에 데이터 채우기
+
+        Args:
+            template: HTML 템플릿
+            data: AI가 채운 placeholder → value dict
+            image_data: 이미지 dict {binaryItemIDRef: {"base64": ..., "mime": ...}}
+        """
+        html = template
+
+        # ★ content_prompt에서 IMAGE_n → binaryItemIDRef 매핑 빌드
+        image_ref_map = self._build_image_ref_map(data, image_data)
+
+        for key, value in data.items():
+            placeholder = '{{' + key + '}}'
+
+            # ── IMAGE_n: 원본 이미지 삽입 ──
+            if re.match(r'^IMAGE_\d+$', key):
+                img_tag = image_ref_map.get(key, '')
+                html = html.replace(placeholder, img_tag)
+                continue
+
+            if isinstance(value, str) and value.strip():
+                # ★ 개조식 내용 처리: · 또는 - 로 시작하는 항목
+                lines = value.strip().split('\n')
+                is_bullet_list = sum(
+                    1 for l in lines
+                    if l.strip().startswith('·') or l.strip().startswith('-')
+                ) > len(lines) * 0.5
+
+                if is_bullet_list and len(lines) > 1:
+                    # ★ v2.2: inline context (<p><span> 안)에서는 <ul> 금지
+                    # PARA_*, SECTION_*_TITLE, HEADER_*, FOOTER_*, TITLE_*, *_RUN_*
+                    # 이들은 <p> 또는 <td> 안에 있어 block 요소 삽입 시 HTML 깨짐
+                    _is_inline = re.match(
+                        r'^(PARA_|SECTION_\d+_TITLE|HEADER_|FOOTER_|TITLE_|.*_RUN_)',
+                        key
+                    )
+                    if _is_inline:
+                        # <br> 줄바꿈으로 구조 보존
+                        clean_lines = []
+                        for item in lines:
+                            item = item.strip()
+                            if item.startswith('·'):
+                                item = item[1:].strip()
+                            elif item.startswith('-'):
+                                item = item[1:].strip()
+                            if item:
+                                clean_lines.append(f'· {item}')
+                        value = '<br>\n'.join(clean_lines)
+                    else:
+                        # <div> 안 (SECTION_*_CONTENT 등) → <ul><li> 허용
+                        li_items = []
+                        for item in lines:
+                            item = item.strip()
+                            if item.startswith('·'):
+                                item = item[1:].strip()
+                            elif item.startswith('-'):
+                                item = item[1:].strip()
+                            if item:
+                                li_items.append(f'<li>{item}</li>')
+                        value = '<ul class="bullet-list">\n' + '\n'.join(li_items) + '\n</ul>'
+
+            html = html.replace(placeholder, str(value) if value else '')
+
+        # ★ 남은 placeholder 정리 (한글 포함)
+        html = PH_PATTERN.sub('', html)
+
+        return html
+
+    def _build_image_ref_map(self, data: dict, image_data: dict = None) -> dict:
+        """IMAGE_n placeholder → <img> 태그 매핑 생성.
+
+        content_prompt.json의 placeholders에서 IMAGE_n의 example_ref
+        (= binaryItemIDRef)를 찾고, image_data에서 base64를 가져옴.
+        """
+        ref_map = {}
+        if not image_data:
+            return ref_map
+
+        # content_prompt placeholders에서 IMAGE_n → ref 매핑
+        # (generate 호출 시 content_prompt를 아직 안 가지고 있으므로
+        #  template HTML의 data-ref 속성 또는 순서 매칭으로 해결)
+        # 방법: template에서 IMAGE_1, IMAGE_2... 순서와
+        #       image_data의 키 순서를 매칭
+
+        # image_data 키 목록 (BinData 등장 순서)
+        img_refs = sorted(image_data.keys())
+
+        img_num = 0
+        for ref in img_refs:
+            img_num += 1
+            key = f"IMAGE_{img_num}"
+            img_info = image_data[ref]
+
+            b64 = img_info.get("base64", "")
+            mime = img_info.get("mime", "image/png")
+
+            if b64:
+                ref_map[key] = (
+                    f'<img src="data:{mime};base64,{b64}" '
+                    f'alt="{ref}" style="max-width:100%; height:auto;">'
+                )
+            else:
+                # base64 없으면 파일 경로 참조
+                file_path = img_info.get("path", "")
+                if file_path:
+                    ref_map[key] = (
+                        f'<img src="{file_path}" '
+                        f'alt="{ref}" style="max-width:100%; height:auto;">'
+                    )
+                else:
+                    ref_map[key] = f'<!-- image not found: {ref} -->'
+
+        return ref_map
+
+    def _load_image_data(self, config: dict) -> dict:
+        """템플릿 폴더에서 images.json 로드 (BinData 추출 결과).
+
+        images.json 구조:
+        {
+            "IMG001": {"base64": "iVBOR...", "mime": "image/png"},
+            "IMG002": {"base64": "...", "mime": "image/jpeg"}
+        }
+
+        또는 이미지 파일이 직접 저장된 경우 경로를 반환.
+        """
+        tpl_id = config.get("template_id", "")
+        if not tpl_id:
+            return {}
+
+        tpl_path = Path('templates/user/templates') / tpl_id
+
+        # ① images.json (base64 저장 방식)
+        images_json = tpl_path / 'images.json'
+        if images_json.exists():
+            try:
+                with open(images_json, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except:
+                pass
+
+        # ② images/ 폴더 (파일 저장 방식)
+        images_dir = tpl_path / 'images'
+        if images_dir.exists():
+            result = {}
+            mime_map = {
+                '.png': 'image/png', '.jpg': 'image/jpeg',
+                '.jpeg': 'image/jpeg', '.gif': 'image/gif',
+                '.bmp': 'image/bmp', '.svg': 'image/svg+xml',
+                '.wmf': 'image/x-wmf', '.emf': 'image/x-emf',
+            }
+            for img_file in sorted(images_dir.iterdir()):
+                if img_file.suffix.lower() in mime_map:
+                    ref = img_file.stem  # 파일명 = binaryItemIDRef
+                    result[ref] = {
+                        "path": str(img_file),
+                        "mime": mime_map.get(img_file.suffix.lower(), "image/png")
+                    }
+            return result
+
+        return {}
+    
+    def _extract_json(self, response: str) -> dict:
+        """응답에서 JSON 추출"""
+        # ```json ... ``` 블록 찾기
+        match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group(1))
+            except:
+                pass
+        
+        # 가장 큰 { } 블록 찾기
+        brace_depth = 0
+        start = -1
+        for i, ch in enumerate(response):
+            if ch == '{':
+                if brace_depth == 0:
+                    start = i
+                brace_depth += 1
+            elif ch == '}':
+                brace_depth -= 1
+                if brace_depth == 0 and start >= 0:
+                    try:
+                        return json.loads(response[start:i+1])
+                    except:
+                        start = -1
+        
+        return None
+    
+    def _generate_with_guide(self, content: str, config: dict, options: dict) -> dict:
+        """가이드 기반 생성 (템플릿 없을 때)"""
+        
+        context = config.get('context', {})
+        structure = config.get('structure', {})
+        layout = config.get('layout', {})
+        style = config.get('style', {})
+        
+        instruction = options.get('instruction', '') if options else ''
+        
+        # 섹션 구조 설명
+        sections = layout.get('sections', [])
+        sections_desc = ""
+        for i, sec in enumerate(sections, 1):
+            sections_desc += f"""
+{i}. {sec.get('name', f'섹션{i}')}
+   - 작성 스타일: {sec.get('writingStyle', '혼합')}
+   - 불릿: {'있음' if sec.get('hasBulletIcon') else '없음'}
+   - 표: {'있음' if sec.get('hasTable') else '없음'}
+   - 내용: {sec.get('contentDescription', '')}
+"""
+        
+        page_estimate = structure.get('pageEstimate', 1)
+        
+        system_prompt = f"""당신은 "{context.get('documentType', '문서')}" 작성 전문가입니다.
+
+## 문서 특성
+- 목적: {context.get('purpose', '')}
+- 대상: {context.get('audience', '')}
+- 톤: {context.get('tone', '')}
+- 전체 스타일: {structure.get('writingStyle', '혼합')}
+- 분량: 약 {page_estimate}페이지
+
+## 문서 구조
+{sections_desc}
+
+## 작성 원칙
+{chr(10).join('- ' + p for p in structure.get('writingPrinciples', []))}
+
+## 주의사항
+{chr(10).join('- ' + m for m in structure.get('commonMistakes', []))}
+
+## 핵심!
+- 사용자 입력을 **정리/재구성**하세요
+- **새로 창작하지 마세요**
+- 분석된 문서 구조를 그대로 따르세요
+- 개조식 섹션은 "· " 불릿 사용
+- 분량을 {page_estimate}페이지 내로 제한하세요"""
+
+        user_prompt = f"""다음 내용을 "{context.get('documentType', '문서')}" 양식으로 정리해주세요.
+
+## 입력 내용
+{content[:6000] if content else '(내용 없음)'}
+
+## 추가 요청
+{instruction if instruction else '없음'}
+
+## 출력 형식
+완전한 A4 규격 HTML 문서로 출력하세요.
+- <!DOCTYPE html>로 시작
+- UTF-8 인코딩
+- @page {{ size: A4 }} CSS 포함
+- 폰트: {style.get('font', {}).get('name', '맑은 고딕')}
+- 머릿말/꼬리말 포함
+- 약 {page_estimate}페이지 분량
+
+HTML만 출력하세요."""
+
+        try:
+            response = call_claude(system_prompt, user_prompt, max_tokens=6000)
+            html = extract_html(response)
+            
+            if not html:
+                return {'error': 'HTML 생성 실패'}
+            
+            return {'success': True, 'html': html}
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
--- a/handlers/doc_template_analyzer.py
+++ b/handlers/doc_template_analyzer.py
@@ -0,0 +1,150 @@
+# -*- coding: utf-8 -*-
+"""
+문서 템플릿 분석기 v5.1 (오케스트레이터)
+
+역할: tools/ 모듈을 조합하여 HWPX → 템플릿 정보 추출
+- 직접 파싱 로직 없음 (모두 tools에 위임)
+- 디폴트값 생성 없음 (tools가 None 반환하면 결과에서 제외)
+- 사용자 추가 사항(config.json) → 템플릿에도 반영
+
+구조:
+  tools/
+    page_setup.py   §7  용지/여백
+    font.py         §3  글꼴
+    char_style.py   §4  글자 모양
+    para_style.py   §5  문단 모양
+    border_fill.py  §2  테두리/배경
+    table.py        §6  표
+    header_footer.py §8 머리말/꼬리말
+    section.py      §9  구역 정의
+    style_def.py        스타일 정의
+    numbering.py        번호매기기/글머리표
+    image.py            이미지
+"""
+
+import json
+from pathlib import Path
+from typing import Optional
+
+from .tools import (
+    page_setup,
+    font,
+    char_style,
+    para_style,
+    border_fill,
+    table,
+    header_footer,
+    section,
+    style_def,
+    numbering,
+    image,
+    content_order,
+)
+
+
+class DocTemplateAnalyzer:
+    """HWPX → 템플릿 추출 오케스트레이터"""
+
+    # ================================================================
+    #  Phase 1: 추출 (모든 tools 호출)
+    # ================================================================
+
+    def analyze(self, parsed: dict) -> dict:
+        """HWPX parsed 결과에서 템플릿 구조 추출.
+
+        Args:
+            parsed: processor.py가 HWPX를 파싱한 결과 dict.
+                    raw_xml, section_xml, header_xml, footer_xml,
+                    tables, paragraphs 등 포함.
+
+        Returns:
+            추출된 항목만 포함하는 dict (None인 항목은 제외).
+        """
+        raw_xml = parsed.get("raw_xml", {})
+
+        extractors = {
+            "page":         lambda: page_setup.extract(raw_xml, parsed),
+            "fonts":        lambda: font.extract(raw_xml, parsed),
+            "char_styles":  lambda: char_style.extract(raw_xml, parsed),
+            "para_styles":  lambda: para_style.extract(raw_xml, parsed),
+            "border_fills": lambda: border_fill.extract(raw_xml, parsed),
+            "tables":       lambda: table.extract(raw_xml, parsed),
+            "header":       lambda: header_footer.extract_header(raw_xml, parsed),
+            "footer":       lambda: header_footer.extract_footer(raw_xml, parsed),
+            "section":      lambda: section.extract(raw_xml, parsed),
+            "styles":       lambda: style_def.extract(raw_xml, parsed),
+            "numbering":    lambda: numbering.extract(raw_xml, parsed),
+            "images":       lambda: image.extract(raw_xml, parsed),
+            "content_order":lambda: content_order.extract(raw_xml, parsed),
+        }
+
+        result = {}
+        for key, extractor in extractors.items():
+            try:
+                value = extractor()
+                if value is not None:
+                    result[key] = value
+            except Exception as e:
+                # 개별 tool 실패 시 로그만, 전체 중단 안 함
+                result.setdefault("_errors", []).append(
+                    f"{key}: {type(e).__name__}: {e}"
+                )
+
+        return result
+
+
+    # ================================================================
+    #  Phase 2: 사용자 추가 사항 병합
+    # ================================================================
+
+    def merge_user_config(self, template_info: dict,
+                          config: dict) -> dict:
+        """config.json의 사용자 요구사항을 template_info에 병합.
+
+        사용자가 문서 유형 추가 시 지정한 커스텀 사항을 반영:
+        - 색상 오버라이드
+        - 글꼴 오버라이드
+        - 제목 크기 오버라이드
+        - 기타 레이아웃 커스텀
+
+        이 병합 결과는 style.json에 저장되고,
+        이후 template.html 생성 시에도 반영됨.
+
+        Args:
+            template_info: analyze()의 결과
+            config: config.json 내용
+
+        Returns:
+            병합된 template_info (원본 수정됨)
+        """
+        user_overrides = config.get("user_overrides", {})
+        if not user_overrides:
+            return template_info
+
+        # 모든 사용자 오버라이드를 template_info에 기록
+        template_info["user_overrides"] = user_overrides
+
+        return template_info
+
+    # ================================================================
+    #  Phase 3: template_info → style.json 저장
+    # ================================================================
+
+    def save_style(self, template_info: dict,
+                   save_path: Path) -> Path:
+        """template_info를 style.json으로 저장.
+
+        Args:
+            template_info: analyze() + merge_user_config() 결과
+            save_path: 저장 경로 (예: templates/user/{doc_type}/style.json)
+
+        Returns:
+            저장된 파일 경로
+        """
+        save_path = Path(save_path)
+        save_path.parent.mkdir(parents=True, exist_ok=True)
+
+        with open(save_path, 'w', encoding='utf-8') as f:
+            json.dump(template_info, f, ensure_ascii=False, indent=2)
+
+        return save_path
--- a/handlers/doc_type_analyzer.py
+++ b/handlers/doc_type_analyzer.py
--- a/handlers/semantic_mapper.py
+++ b/handlers/semantic_mapper.py
@@ -0,0 +1,382 @@
+# -*- coding: utf-8 -*-
+"""
+Semantic Mapper v1.0
+
+HWPX tools 추출 결과(template_info)에서 각 요소의 "의미"를 판별.
+
+역할:
+  - 표 분류: 헤더표 / 푸터표 / 제목블록 / 데이터표
+  - 섹션 감지: 본문 텍스트에서 섹션 패턴 탐색
+  - 스타일 매핑 준비: charPr→HTML태그, borderFill→CSS클래스 (Phase 2에서 구현)
+
+입력: template_info (DocTemplateAnalyzer.analyze()), parsed (HWPX 파싱 결과)
+출력: semantic_map dict → semantic_map.json으로 저장
+
+★ 위치: template_manager.py, doc_template_analyzer.py 와 같은 디렉토리
+★ 호출: template_manager.extract_and_save() 내에서 analyze() 직후
+"""
+
+import re
+
+
+# ================================================================
+#  메인 엔트리포인트
+# ================================================================
+
+def generate(template_info: dict, parsed: dict) -> dict:
+    """semantic_map 생성 — 모든 판별 로직 조합.
+
+    Args:
+        template_info: DocTemplateAnalyzer.analyze() 결과
+        parsed: HWPX 파서 결과 (raw_xml, section_xml, paragraphs 등)
+
+    Returns:
+        {
+            "version": "1.0",
+            "table_roles":  { "0": {"role": "footer_table", ...}, ... },
+            "body_tables":  [3],        # 본문에 들어갈 표 index 목록
+            "title_table":  2,          # 제목 블록 index (없으면 None)
+            "sections":     [...],      # 감지된 섹션 목록
+            "style_mappings": {...},    # Phase 2용 스타일 매핑 (현재 빈 구조)
+        }
+    """
+    tables = template_info.get("tables", [])
+    header = template_info.get("header")
+    footer = template_info.get("footer")
+
+    # ① 표 역할 분류
+    table_roles = _classify_tables(tables, header, footer)
+
+    # ② 본문 전용 표 / 제목 블록 추출
+    body_tables = sorted(
+        idx for idx, info in table_roles.items()
+        if info["role"] == "data_table"
+    )
+    title_table = next(
+        (idx for idx, info in table_roles.items()
+         if info["role"] == "title_block"),
+        None
+    )
+
+    # ③ 섹션 감지
+    sections = _detect_sections(parsed)
+
+    # ④ 스타일 매핑 (Phase 2에서 구현, 현재는 빈 구조)
+    style_mappings = _prepare_style_mappings(template_info)
+
+    return {
+        "version": "1.0",
+        "table_roles": table_roles,
+        "body_tables": body_tables,
+        "title_table": title_table,
+        "sections": sections,
+        "style_mappings": style_mappings,
+    }
+
+
+# ================================================================
+#  표 분류
+# ================================================================
+
+def _classify_tables(tables: list, header: dict | None,
+                     footer: dict | None) -> dict:
+    """각 표의 역할 판별: header_table / footer_table / title_block / data_table
+
+    판별 순서:
+      Pass 1 — header/footer 텍스트 매칭
+      Pass 2 — 제목 블록 패턴 (1행, 좁은+넓은 열 구조)
+      Pass 3 — 나머지 → 데이터 표
+    """
+    header_texts = _collect_hf_texts(header)
+    footer_texts = _collect_hf_texts(footer)
+
+    roles = {}
+    classified = set()
+
+    # ── Pass 1: header/footer 매칭 ──
+    for tbl in tables:
+        idx = tbl["index"]
+        tbl_texts = _collect_table_texts(tbl)
+        if not tbl_texts:
+            continue
+
+        # header 매칭
+        if header_texts:
+            overlap = len(tbl_texts & header_texts)
+            if overlap > 0 and overlap / max(len(tbl_texts), 1) >= 0.5:
+                roles[idx] = {
+                    "role": "header_table",
+                    "match_source": "header",
+                    "matched_texts": list(tbl_texts & header_texts),
+                }
+                classified.add(idx)
+                continue
+
+        # footer 매칭
+        if footer_texts:
+            overlap = len(tbl_texts & footer_texts)
+            if overlap > 0 and overlap / max(len(tbl_texts), 1) >= 0.5:
+                roles[idx] = {
+                    "role": "footer_table",
+                    "match_source": "footer",
+                    "matched_texts": list(tbl_texts & footer_texts),
+                }
+                classified.add(idx)
+                continue
+
+    # ── Pass 2: 제목 블록 탐지 ──
+    for tbl in tables:
+        idx = tbl["index"]
+        if idx in classified:
+            continue
+
+        if _is_title_block(tbl):
+            title_text = _extract_longest_text(tbl)
+            roles[idx] = {
+                "role": "title_block",
+                "title_text": title_text,
+            }
+            classified.add(idx)
+            continue
+
+    # ── Pass 3: 나머지 → 데이터 표 ──
+    for tbl in tables:
+        idx = tbl["index"]
+        if idx in classified:
+            continue
+
+        col_headers = _detect_table_headers(tbl)
+        roles[idx] = {
+            "role": "data_table",
+            "header_row": 0 if col_headers else None,
+            "col_headers": col_headers,
+            "row_count": tbl.get("rowCnt", 0),
+            "col_count": tbl.get("colCnt", 0),
+        }
+
+    return roles
+
+
+# ── 표 분류 보조 함수 ──
+
+def _collect_hf_texts(hf_info: dict | None) -> set:
+    """header/footer의 table 셀 텍스트 수집"""
+    if not hf_info or not hf_info.get("table"):
+        return set()
+    texts = set()
+    for row in hf_info["table"].get("rows", []):
+        for cell in row:
+            t = cell.get("text", "").strip()
+            if t:
+                texts.add(t)
+    return texts
+
+
+def _collect_table_texts(tbl: dict) -> set:
+    """표의 모든 셀 텍스트 수집"""
+    texts = set()
+    for row in tbl.get("rows", []):
+        for cell in row:
+            t = cell.get("text", "").strip()
+            if t:
+                texts.add(t)
+    return texts
+
+
+def _extract_longest_text(tbl: dict) -> str:
+    """표에서 가장 긴 텍스트 추출 (제목 블록용)"""
+    longest = ""
+    for row in tbl.get("rows", []):
+        for cell in row:
+            t = cell.get("text", "").strip()
+            if len(t) > len(longest):
+                longest = t
+    return longest
+
+
+def _is_title_block(tbl: dict) -> bool:
+    """제목 블록 패턴 판별.
+
+    조건 (하나라도 충족):
+    A) 1행 2열, 왼쪽 열 비율 ≤ 10% (불릿아이콘 + 제목)
+    B) 1행 1열, 텍스트 길이 5~100자 (제목 단독)
+    """
+    if tbl.get("rowCnt", 0) != 1:
+        return False
+
+    col_cnt = tbl.get("colCnt", 0)
+    col_pcts = tbl.get("colWidths_pct", [])
+
+    # 패턴 A: 좁은 왼쪽 + 넓은 오른쪽
+    if col_cnt == 2 and len(col_pcts) >= 2:
+        if col_pcts[0] <= 10:
+            return True
+
+    # 패턴 B: 단일 셀 제목
+    if col_cnt == 1:
+        rows = tbl.get("rows", [])
+        if rows and rows[0]:
+            text = rows[0][0].get("text", "")
+            if 5 < len(text) < 100:
+                return True
+
+    return False
+
+
+def _detect_table_headers(tbl: dict) -> list:
+    """표 첫 행의 컬럼 헤더 텍스트 반환.
+
+    헤더 판별: 첫 행의 모든 텍스트가 짧음 (20자 이하)
+    """
+    rows = tbl.get("rows", [])
+    if not rows or len(rows) < 2:
+        return []
+
+    first_row = rows[0]
+    headers = []
+    for cell in first_row:
+        t = cell.get("text", "").strip()
+        headers.append(t)
+
+    # 전부 짧은 텍스트이면 헤더행
+    if headers and all(len(h) <= 20 for h in headers if h):
+        non_empty = [h for h in headers if h]
+        if non_empty:  # 최소 1개는 텍스트가 있어야
+            return headers
+
+    return []
+
+
+# ================================================================
+#  섹션 감지
+# ================================================================
+
+_SECTION_PATTERNS = [
+    (r'^(\d+)\.\s+(.+)',                  "numbered"),        # "1. 개요"
+    (r'^[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ][\.\s]+(.+)',      "roman"),           # "Ⅰ. 개요"
+    (r'^제\s*(\d+)\s*([장절항])\s*(.+)',    "korean_formal"),   # "제1장 개요"
+    (r'^[▶►▸●◆■□◎★☆]\s*(.+)',             "bullet_heading"),  # "▶ 개요"
+]
+
+
+def _detect_sections(parsed: dict) -> list:
+    """parsed 텍스트에서 섹션 제목 패턴 탐색.
+
+    Returns:
+        [
+            {"index": 1, "title": "▶ 개요", "pattern_type": "bullet_heading"},
+            {"index": 2, "title": "▶ 발표 구성(안)", "pattern_type": "bullet_heading"},
+            ...
+        ]
+    """
+    paragraphs = _extract_paragraphs(parsed)
+    sections = []
+    sec_idx = 0
+
+    for text in paragraphs:
+        text = text.strip()
+        if not text or len(text) > 100:
+            # 너무 긴 텍스트는 제목이 아님
+            continue
+
+        for pat, pat_type in _SECTION_PATTERNS:
+            m = re.match(pat, text)
+            if m:
+                # numbered 패턴: 숫자가 100 이상이면 섹션 번호가 아님 (연도 등 제외)
+                if pat_type == "numbered" and int(m.group(1)) > 99:
+                    continue
+                sec_idx += 1
+                sections.append({
+                    "index": sec_idx,
+                    "title": text,
+                    "pattern_type": pat_type,
+                })
+                break
+
+    return sections
+
+
+def _extract_paragraphs(parsed: dict) -> list:
+    """parsed에서 텍스트 단락 추출.
+
+    우선순위:
+    1. parsed["paragraphs"] (파서가 직접 제공)
+    2. section_xml의 <hp:t> 태그에서 추출
+    """
+    paragraphs = parsed.get("paragraphs", [])
+    if paragraphs:
+        return [
+            p.get("text", "") if isinstance(p, dict) else str(p)
+            for p in paragraphs
+        ]
+
+    # section_xml에서 <hp:t> 추출
+    section_xml = ""
+    raw_xml = parsed.get("raw_xml", {})
+    for key, val in raw_xml.items():
+        if "section" in key.lower() and isinstance(val, str):
+            section_xml = val
+            break
+
+    if not section_xml:
+        section_xml = parsed.get("section_xml", "")
+
+    if section_xml:
+        return [
+            t.strip()
+            for t in re.findall(r'<hp:t>([^<]+)</hp:t>', section_xml)
+            if t.strip()
+        ]
+
+    return []
+
+
+# ================================================================
+#  스타일 매핑 (Phase 2에서 확장)
+# ================================================================
+
+def _prepare_style_mappings(template_info: dict) -> dict:
+    """스타일 매핑 빈 구조 생성.
+
+    Phase 2에서 이 구조를 채움:
+    - char_styles → CSS font/color rules
+    - border_fills → CSS border/background rules
+    - para_styles → CSS margin/alignment rules
+    """
+    mappings = {
+        "char_pr": {},
+        "border_fill": {},
+        "para_pr": {},
+    }
+
+    # border_fills가 있으면 기본 매핑 생성
+    border_fills = template_info.get("border_fills", {})
+    for bf_id, bf_data in border_fills.items():
+        # ★ 실제 키 구조 대응 (bg→background, sides→css/직접키)
+        bg = bf_data.get("background", bf_data.get("bg", ""))
+        
+        # borders: css dict 또는 직접 키에서 추출
+        borders = {}
+        css_dict = bf_data.get("css", {})
+        if css_dict:
+            for prop, val in css_dict.items():
+                if prop.startswith("border-") and val and val != "none":
+                    borders[prop] = val
+        else:
+            # fallback: 직접 side 키
+            for side in ("top", "bottom", "left", "right"):
+                si = bf_data.get(side, {})
+                if isinstance(si, dict) and si.get("type", "NONE").upper() != "NONE":
+                    borders[f"border-{side}"] = (
+                        f"{si.get('width','0.1mm')} "
+                        f"{si.get('type','solid').lower()} "
+                        f"{si.get('color','#000')}"
+                    )
+        
+        mappings["border_fill"][str(bf_id)] = {
+            "css_class": f"bf-{bf_id}",
+            "bg": bg,
+            "borders": borders,
+        }
+
+    return mappings
--- a/handlers/style_generator.py
+++ b/handlers/style_generator.py
@@ -0,0 +1,824 @@
+# -*- coding: utf-8 -*-
+"""
+Style Generator v2.1  (Phase 4 — 하드코딩 제거)
+
+template_info의 tools 추출값 → CSS 문자열 생성.
+
+★ v2.1 변경사항:
+  - 하드코딩 간격 → 추출값 대체:
+    · .doc-header margin-bottom → page.margins.header에서 계산
+    · .doc-footer margin-top   → page.margins.footer에서 계산
+    · .title-block margin/padding → title paraPr spacing에서 유도
+  - .img-wrap, .img-caption CSS 추가 (content_order 이미지 지원)
+
+★ v2.0 변경사항 (v1.0 대비):
+  - charPr 28개 전체 → .cpr-{id} CSS 클래스 생성
+  - paraPr 23개 전체 → .ppr-{id} CSS 클래스 생성
+  - styles 12개 → .sty-{id} CSS 클래스 (charPr + paraPr 조합)
+  - fontRef → 실제 폰트명 해석 (font_map 빌드)
+  - 제목 블록: 하드코딩 제거 → 실제 추출 데이터 사용
+  - 줄간격: paraPr별 line-height 개별 적용
+  - 여백: @page는 인쇄용, .page는 화면용 (이중 적용 제거)
+  - bf CSS: NONE-only borderFill도 클래스 생성 (border: none 명시)
+  - 텍스트 색상: charPr별 color 반영
+  - 폰트: charPr별 fontRef → 실제 font-family 해석
+
+★ 원칙: hwpx_domain_guide.md §1~§8 매핑 규칙 100% 준수
+★ 원칙: 하드코딩 값 0개. 모든 CSS 값은 template_info에서 유래.
+"""
+
+HU_TO_MM = 25.4 / 7200  # 1 HWPUNIT = 1/7200 inch → mm
+
+
+# ================================================================
+#  메인 엔트리포인트
+# ================================================================
+
+def generate_css(template_info: dict, semantic_map: dict = None) -> str:
+    """template_info + semantic_map → CSS 문자열 전체 생성."""
+    # font_map 빌드 (charPr CSS에서 재사용)
+    fm = _build_font_map(template_info)
+
+    parts = [
+        _page_css(template_info),
+        _body_css(template_info, fm),
+        _layout_css(template_info),
+        _header_footer_css(template_info),
+        _title_block_css(template_info, fm, semantic_map),
+        _section_css(template_info),
+        _table_base_css(template_info),
+        _border_fill_css(template_info),
+        _char_pr_css(template_info, fm),
+        _para_pr_css(template_info),
+        _named_style_css(template_info),
+        _table_detail_css(template_info, semantic_map),
+    ]
+    return "\n\n".join(p for p in parts if p)
+
+
+# ================================================================
+#  @page (인쇄 전용)
+# ================================================================
+
+def _page_css(ti: dict) -> str:
+    page = ti.get("page", {})
+    paper = page.get("paper", {})
+    margins = page.get("margins", {})
+
+    w = paper.get("width_mm", 210)
+    h = paper.get("height_mm", 297)
+    mt = margins.get("top", "20mm")
+    mb = margins.get("bottom", "20mm")
+    ml = margins.get("left", "20mm")
+    mr = margins.get("right", "20mm")
+
+    return (
+        "@page {\n"
+        f"  size: {w}mm {h}mm;\n"
+        f"  margin: {mt} {mr} {mb} {ml};\n"
+        "}\n"
+        "@media screen {\n"
+        "  @page { margin: 0; }\n"  # 화면에서는 .page padding만 사용
+        "}"
+    )
+
+
+# ================================================================
+#  body
+# ================================================================
+
+def _body_css(ti: dict, fm: dict) -> str:
+    """바탕글 스타일 기준 body CSS"""
+    # '바탕글' 스타일 → charPr → fontRef → 실제 폰트
+    base_charpr = _resolve_style_charpr(ti, "바탕글")
+    base_parapr = _resolve_style_parapr(ti, "바탕글")
+
+    # 폰트
+    font_family = _charpr_font_family(base_charpr, fm)
+    # 크기
+    size_pt = base_charpr.get("height_pt", 10.0)
+    # 색상
+    color = base_charpr.get("textColor", "#000000")
+    # 줄간격
+    line_height = _parapr_line_height(base_parapr)
+    # 정렬
+    # body에는 정렬 넣지 않음 (paraPr별로)
+
+    return (
+        "body {\n"
+        f"  font-family: {font_family};\n"
+        f"  font-size: {size_pt}pt;\n"
+        f"  line-height: {line_height};\n"
+        f"  color: {color};\n"
+        "  margin: 0; padding: 0;\n"
+        "}"
+    )
+
+
+# ================================================================
+#  .page 레이아웃 (화면 전용 — 여백은 여기서만)
+# ================================================================
+
+def _layout_css(ti: dict) -> str:
+    page = ti.get("page", {})
+    paper = page.get("paper", {})
+    margins = page.get("margins", {})
+
+    w = paper.get("width_mm", 210)
+    ml = _mm(margins.get("left", "20mm"))
+    mr = _mm(margins.get("right", "20mm"))
+    body_w = w - ml - mr
+
+    mt = margins.get("top", "20mm")
+    mb = margins.get("bottom", "20mm")
+    m_left = margins.get("left", "20mm")
+    m_right = margins.get("right", "20mm")
+
+    return (
+        ".page {\n"
+        f"  width: {body_w:.0f}mm;\n"
+        "  margin: 0 auto;\n"
+        f"  padding: {mt} {m_right} {mb} {m_left};\n"
+        "}"
+    )
+
+
+# ================================================================
+#  헤더 / 푸터
+# ================================================================
+
+def _header_footer_css(ti: dict) -> str:
+    page = ti.get("page", {})
+    margins = page.get("margins", {})
+
+    # 헤더 margin-bottom: page.margins.header에서 유도
+    # 푸터 margin-top: page.margins.footer에서 유도
+    hdr_margin = margins.get("header", "")
+    ftr_margin = margins.get("footer", "")
+
+    hdr_mb = f"{_mm(hdr_margin) * 0.3:.1f}mm" if hdr_margin else "4mm"
+    ftr_mt = f"{_mm(ftr_margin) * 0.4:.1f}mm" if ftr_margin else "6mm"
+
+    lines = [
+        "/* 헤더/푸터 */",
+        f".doc-header {{ margin-bottom: {hdr_mb}; }}",
+        f".doc-footer {{ margin-top: {ftr_mt}; }}",
+        ".doc-header table, .doc-footer table {",
+        "  width: 100%; border-collapse: collapse;",
+        "}",
+    ]
+
+    hdr_padding = _hf_cell_padding(ti.get("header"))
+    ftr_padding = _hf_cell_padding(ti.get("footer"))
+
+    lines.append(
+        f".doc-header td {{ {hdr_padding} vertical-align: middle; }}"
+    )
+    lines.append(
+        f".doc-footer td {{ {ftr_padding} vertical-align: middle; }}"
+    )
+    return "\n".join(lines)
+
+
+# ================================================================
+#  제목 블록 — ★ 하드코딩 제거, 실제 데이터 사용
+# ================================================================
+
+def _title_block_css(ti: dict, fm: dict, sm: dict = None) -> str:
+    """제목 블록 CSS — title_table의 실제 셀 데이터에서 추출"""
+    tables = ti.get("tables", [])
+
+    # semantic_map에서 title_table 인덱스 가져오기
+    title_idx = None
+    if sm:
+        title_idx = sm.get("title_table")
+
+    title_tbl = None
+    if title_idx is not None:
+        title_tbl = next((t for t in tables if t["index"] == title_idx), None)
+
+    # 못 찾으면 1행 표 중 텍스트 있는 것 검색
+    if not title_tbl:
+        for t in tables:
+            rows = t.get("rows", [])
+            if rows and len(rows) == 1:
+                for cell in rows[0]:
+                    if cell.get("text", "").strip():
+                        title_tbl = t
+                        break
+            if title_tbl:
+                break
+
+    lines = ["/* 제목 블록 */"]
+
+    if title_tbl:
+        # 텍스트 있는 셀에서 charPr, paraPr, bf 추출
+        title_charpr = None
+        title_parapr = None
+        title_bf_id = None
+
+        for row in title_tbl.get("rows", []):
+            for cell in row:
+                if cell.get("text", "").strip():
+                    # ★ primaryCharPrIDRef 사용 (table_v2 추출)
+                    cpr_id = cell.get("primaryCharPrIDRef")
+                    if cpr_id is not None:
+                        title_charpr = next(
+                            (c for c in ti.get("char_styles", [])
+                             if c.get("id") == cpr_id), None
+                        )
+                    ppr_id = cell.get("primaryParaPrIDRef")
+                    if ppr_id is not None:
+                        title_parapr = next(
+                            (p for p in ti.get("para_styles", [])
+                             if p.get("id") == ppr_id), None
+                        )
+                    title_bf_id = cell.get("borderFillIDRef")
+                    break
+            if title_charpr:
+                break
+
+        # charPr 못 찾으면 폴백 (charPrIDRef가 없는 구버전 table.py)
+        if not title_charpr:
+            title_charpr = _find_title_charpr(ti)
+
+        # CSS 생성
+        font_family = _charpr_font_family(title_charpr, fm) if title_charpr else "'맑은 고딕', sans-serif"
+        size_pt = title_charpr.get("height_pt", 15.0) if title_charpr else 15.0
+        bold = title_charpr.get("bold", False) if title_charpr else False
+        color = title_charpr.get("textColor", "#000000") if title_charpr else "#000000"
+
+        # 줄간격
+        line_height = _parapr_line_height(title_parapr) if title_parapr else "180%"
+        align = _parapr_align(title_parapr) if title_parapr else "center"
+
+        # ★ margin/padding — paraPr 또는 page.margins에서 유도
+        title_after_mm = "4mm"  # 기본값
+        title_padding = "4mm 0"  # 기본값
+        if title_parapr:
+            margin_info = title_parapr.get("margin", {})
+            after_hu = margin_info.get("after_hu", 0)
+            if after_hu:
+                title_after_mm = f"{after_hu * HU_TO_MM:.1f}mm"
+            before_hu = margin_info.get("before_hu", 0)
+            if before_hu or after_hu:
+                b_mm = before_hu * HU_TO_MM if before_hu else 4
+                a_mm = after_hu * HU_TO_MM if after_hu else 0
+                title_padding = f"{b_mm:.1f}mm 0 {a_mm:.1f}mm 0"
+
+        lines.append(f".title-block {{ margin-bottom: {title_after_mm}; }}")
+        lines.append(".title-table { width: 100%; border-collapse: collapse; }")
+        lines.append(
+            f".title-block h1 {{\n"
+            f"  font-family: {font_family};\n"
+            f"  font-size: {size_pt}pt;\n"
+            f"  font-weight: {'bold' if bold else 'normal'};\n"
+            f"  color: {color};\n"
+            f"  text-align: {align};\n"
+            f"  line-height: {line_height};\n"
+            f"  margin: 0; padding: {title_padding};\n"
+            f"}}"
+        )
+
+        # bf 적용 (파란 하단선 등)
+        if title_bf_id:
+            bf_data = ti.get("border_fills", {}).get(str(title_bf_id), {})
+            css_dict = bf_data.get("css", {})
+            bf_rules = []
+            for prop, val in css_dict.items():
+                if val and val.lower() != "none":
+                    bf_rules.append(f"  {prop}: {val};")
+            if bf_rules:
+                lines.append(
+                    f".title-block {{\n"
+                    + "\n".join(bf_rules)
+                    + "\n}"
+                )
+    else:
+        lines.append(".title-block { margin-bottom: 4mm; }")
+        lines.append(".title-table { width: 100%; border-collapse: collapse; }")
+        lines.append(
+            ".title-block h1 {\n"
+            "  font-size: 15pt; font-weight: normal;\n"
+            "  text-align: center; margin: 0; padding: 4mm 0;\n"
+            "}"
+        )
+
+    return "\n".join(lines)
+
+
+# ================================================================
+#  섹션 — 하드코딩 제거
+# ================================================================
+
+def _section_css(ti: dict) -> str:
+    """섹션 CSS — '#큰아이콘' 또는 '개요1' 스타일에서 추출"""
+    lines = ["/* 섹션 */"]
+
+    # 섹션 제목: '#큰아이콘' 또는 가장 큰 bold charPr
+    title_charpr = _resolve_style_charpr(ti, "#큰아이콘")
+    if not title_charpr or title_charpr.get("id") == 0:
+        title_charpr = _resolve_style_charpr(ti, "개요1")
+    if not title_charpr or title_charpr.get("id") == 0:
+        # 폴백: bold인 charPr 중 가장 큰 것
+        for cs in sorted(ti.get("char_styles", []),
+                         key=lambda x: x.get("height_pt", 0), reverse=True):
+            if cs.get("bold"):
+                title_charpr = cs
+                break
+
+    if title_charpr:
+        size = title_charpr.get("height_pt", 11)
+        bold = title_charpr.get("bold", True)
+        color = title_charpr.get("textColor", "#000000")
+        lines.append(
+            f".section-title {{\n"
+            f"  font-size: {size}pt;\n"
+            f"  font-weight: {'bold' if bold else 'normal'};\n"
+            f"  color: {color};\n"
+            f"  margin-bottom: 3mm;\n"
+            f"}}"
+        )
+    else:
+        lines.append(
+            ".section-title { font-weight: bold; margin-bottom: 3mm; }"
+        )
+
+    lines.append(".section { margin-bottom: 6mm; }")
+    lines.append(".section-content { text-align: justify; }")
+
+    # content_order 기반 본문용 스타일
+    lines.append("/* 이미지/문단 (content_order) */")
+    lines.append(
+        ".img-wrap { text-align: center; margin: 3mm 0; }"
+    )
+    lines.append(
+        ".img-wrap img { max-width: 100%; height: auto; }"
+    )
+    lines.append(
+        ".img-caption { font-size: 9pt; color: #666; margin-top: 1mm; }"
+    )
+
+    return "\n".join(lines)
+
+
+# ================================================================
+#  데이터 표 기본 CSS
+# ================================================================
+
+def _table_base_css(ti: dict) -> str:
+    """표 기본 — '표내용' 스타일 charPr에서 추출"""
+    tbl_charpr = _resolve_style_charpr(ti, "표내용")
+    tbl_parapr = _resolve_style_parapr(ti, "표내용")
+
+    size_pt = tbl_charpr.get("height_pt", 9.0) if tbl_charpr else 9.0
+    line_height = _parapr_line_height(tbl_parapr) if tbl_parapr else "160%"
+    align = _parapr_align(tbl_parapr) if tbl_parapr else "justify"
+
+    border_fills = ti.get("border_fills", {})
+    if border_fills:
+        # bf-{id} 클래스가 셀별 테두리를 담당 → 기본값은 none
+        # (하드코딩 border를 넣으면 bf 클래스보다 specificity가 높아 덮어씀)
+        border_rule = "border: none;"
+    else:
+        # border_fills 추출 실패 시에만 폴백
+        border_rule = "border: 1px solid #000;"
+
+    return (
+        "/* 데이터 표 */\n"
+        ".data-table {\n"
+        "  width: 100%; border-collapse: collapse; margin: 4mm 0;\n"
+        "}\n"
+        ".data-table th, .data-table td {\n"
+        f"  {border_rule}\n"
+        f"  font-size: {size_pt}pt;\n"
+        f"  line-height: {line_height};\n"
+        f"  text-align: {align};\n"
+        "  vertical-align: middle;\n"
+        "}\n"
+        ".data-table th {\n"
+        "  font-weight: bold; text-align: center;\n"
+        "}"
+    )
+
+
+# ================================================================
+#  borderFill → .bf-{id} CSS 클래스
+# ================================================================
+
+def _border_fill_css(ti: dict) -> str:
+    """★ v2.0: NONE-only bf도 클래스 생성 (border: none 명시)"""
+    border_fills = ti.get("border_fills", {})
+    if not border_fills:
+        return ""
+
+    parts = ["/* borderFill → CSS 클래스 */"]
+
+    for bf_id, bf in border_fills.items():
+        rules = []
+
+        css_dict = bf.get("css", {})
+        for prop, val in css_dict.items():
+            if val:
+                # NONE도 포함 (border: none 명시)
+                rules.append(f"  {prop}: {val};")
+
+        # background
+        if "background-color" not in css_dict:
+            bg = bf.get("background", "")
+            if bg and bg.lower() not in ("", "none", "transparent",
+                                          "#ffffff", "#fff"):
+                rules.append(f"  background-color: {bg};")
+
+        if rules:
+            parts.append(f".bf-{bf_id} {{\n" + "\n".join(rules) + "\n}")
+
+    return "\n".join(parts) if len(parts) > 1 else ""
+
+
+# ================================================================
+#  ★ NEW: charPr → .cpr-{id} CSS 클래스
+# ================================================================
+
+def _char_pr_css(ti: dict, fm: dict) -> str:
+    """charPr 전체 → 개별 CSS 클래스 생성.
+    
+    각 .cpr-{id}에 font-family, font-size, font-weight, color 등 포함.
+    HTML에서 <span class="cpr-5"> 등으로 참조.
+    """
+    char_styles = ti.get("char_styles", [])
+    if not char_styles:
+        return ""
+
+    parts = ["/* charPr → CSS 클래스 (글자 모양) */"]
+
+    for cs in char_styles:
+        cid = cs.get("id")
+        rules = []
+
+        # font-family
+        ff = _charpr_font_family(cs, fm)
+        if ff:
+            rules.append(f"  font-family: {ff};")
+
+        # font-size
+        pt = cs.get("height_pt")
+        if pt:
+            rules.append(f"  font-size: {pt}pt;")
+
+        # bold
+        if cs.get("bold"):
+            rules.append("  font-weight: bold;")
+
+        # italic
+        if cs.get("italic"):
+            rules.append("  font-style: italic;")
+
+        # color
+        color = cs.get("textColor", "#000000")
+        if color and color.lower() != "#000000":
+            rules.append(f"  color: {color};")
+
+        # underline — type이 NONE이 아닌 실제 밑줄만
+        underline = cs.get("underline", "NONE")
+        ACTIVE_UNDERLINE = {"BOTTOM", "CENTER", "TOP", "SIDE"}
+        if underline in ACTIVE_UNDERLINE:
+            rules.append("  text-decoration: underline;")
+
+        # strikeout — shape="NONE" 또는 "3D"는 취소선 아님
+        # 실제 취소선: CONTINUOUS, DASH, DOT 등 선 스타일만
+        strikeout = cs.get("strikeout", "NONE")
+        ACTIVE_STRIKEOUT = {"CONTINUOUS", "DASH", "DOT", "DASH_DOT",
+                            "DASH_DOT_DOT", "LONG_DASH", "DOUBLE"}
+        if strikeout in ACTIVE_STRIKEOUT:
+            rules.append("  text-decoration: line-through;")
+
+        # ── 자간 (letter-spacing) ──
+        # HWPX spacing은 % 단위: letter-spacing = height_pt × spacing / 100
+        spacing_pct = cs.get("spacing", {}).get("hangul", 0)
+        if spacing_pct != 0 and pt:
+            ls_val = round(pt * spacing_pct / 100, 2)
+            rules.append(f"  letter-spacing: {ls_val}pt;")
+
+        # ── 장평 (scaleX) ──
+        # HWPX ratio는 글자 폭 비율 (100=기본). CSS transform으로 변환
+        ratio_pct = cs.get("ratio", {}).get("hangul", 100)
+        if ratio_pct != 100:
+            rules.append(f"  transform: scaleX({ratio_pct / 100});")
+            rules.append("  display: inline-block;")  # scaleX 적용 필수
+
+        if rules:
+            parts.append(f".cpr-{cid} {{\n" + "\n".join(rules) + "\n}")
+
+    return "\n".join(parts) if len(parts) > 1 else ""
+
+
+# ================================================================
+#  ★ NEW: paraPr → .ppr-{id} CSS 클래스
+# ================================================================
+
+def _para_pr_css(ti: dict) -> str:
+    """paraPr 전체 → 개별 CSS 클래스 생성.
+    
+    각 .ppr-{id}에 text-align, line-height, text-indent, margin 등 포함.
+    HTML에서 <p class="ppr-3"> 등으로 참조.
+    """
+    para_styles = ti.get("para_styles", [])
+    if not para_styles:
+        return ""
+
+    parts = ["/* paraPr → CSS 클래스 (문단 모양) */"]
+
+    for ps in para_styles:
+        pid = ps.get("id")
+        rules = []
+
+        # text-align
+        align = _parapr_align(ps)
+        if align:
+            rules.append(f"  text-align: {align};")
+
+        # line-height
+        lh = _parapr_line_height(ps)
+        if lh:
+            rules.append(f"  line-height: {lh};")
+
+        # text-indent
+        margin = ps.get("margin", {})
+        indent_hu = margin.get("indent_hu", 0)
+        if indent_hu:
+            indent_mm = indent_hu * HU_TO_MM
+            rules.append(f"  text-indent: {indent_mm:.1f}mm;")
+
+        # margin-left
+        left_hu = margin.get("left_hu", 0)
+        if left_hu:
+            left_mm = left_hu * HU_TO_MM
+            rules.append(f"  margin-left: {left_mm:.1f}mm;")
+
+        # margin-right
+        right_hu = margin.get("right_hu", 0)
+        if right_hu:
+            right_mm = right_hu * HU_TO_MM
+            rules.append(f"  margin-right: {right_mm:.1f}mm;")
+
+        # spacing before/after
+        before = margin.get("before_hu", 0)
+        if before:
+            rules.append(f"  margin-top: {before * HU_TO_MM:.1f}mm;")
+        after = margin.get("after_hu", 0)
+        if after:
+            rules.append(f"  margin-bottom: {after * HU_TO_MM:.1f}mm;")
+
+        if rules:
+            parts.append(f".ppr-{pid} {{\n" + "\n".join(rules) + "\n}")
+
+    return "\n".join(parts) if len(parts) > 1 else ""
+
+
+# ================================================================
+#  ★ NEW: named style → .sty-{id} CSS 클래스
+# ================================================================
+
+def _named_style_css(ti: dict) -> str:
+    """styles 목록 → .sty-{id} CSS 클래스.
+    
+    각 style은 charPrIDRef + paraPrIDRef 조합.
+    → .sty-{id} = .cpr-{charPrIDRef} + .ppr-{paraPrIDRef} 의미.
+    HTML에서 class="sty-0" 또는 class="cpr-5 ppr-11" 로 참조.
+    """
+    styles = ti.get("styles", [])
+    if not styles:
+        return ""
+
+    parts = ["/* named styles */"]
+
+    for s in styles:
+        sid = s.get("id")
+        name = s.get("name", "")
+        cpr_id = s.get("charPrIDRef")
+        ppr_id = s.get("paraPrIDRef")
+
+        # 주석으로 매핑 기록
+        parts.append(
+            f"/* .sty-{sid} '{name}' = cpr-{cpr_id} + ppr-{ppr_id} */"
+        )
+
+    return "\n".join(parts)
+
+
+# ================================================================
+#  표 상세 CSS (열 너비, 셀 패딩)
+# ================================================================
+
+def _table_detail_css(ti: dict, sm: dict = None) -> str:
+    if not sm:
+        return ""
+
+    body_indices = sm.get("body_tables", [])
+    tables = ti.get("tables", [])
+    if not body_indices or not tables:
+        return ""
+
+    parts = ["/* 표 상세 (tools 추출값) */"]
+
+    for tbl_num, tbl_idx in enumerate(body_indices, 1):
+        tbl = next((t for t in tables if t["index"] == tbl_idx), None)
+        if not tbl:
+            continue
+
+        cls = f"tbl-{tbl_num}"
+
+        # 열 너비
+        col_pcts = tbl.get("colWidths_pct", [])
+        if col_pcts:
+            for c_idx, pct in enumerate(col_pcts):
+                parts.append(
+                    f".{cls} col:nth-child({c_idx + 1}) {{ width: {pct}%; }}"
+                )
+
+        # 셀 패딩
+        cm = _first_cell_margin(tbl)
+        if cm:
+            ct = cm.get("top", 0) * HU_TO_MM
+            cb = cm.get("bottom", 0) * HU_TO_MM
+            cl = cm.get("left", 0) * HU_TO_MM
+            cr = cm.get("right", 0) * HU_TO_MM
+            parts.append(
+                f".{cls} td, .{cls} th {{\n"
+                f"  padding: {ct:.1f}mm {cr:.1f}mm {cb:.1f}mm {cl:.1f}mm;\n"
+                f"}}"
+            )
+
+        # 헤더행 높이
+        first_row = tbl.get("rows", [[]])[0]
+        if first_row:
+            h_hu = first_row[0].get("height_hu", 0)
+            if h_hu > 0:
+                h_mm = h_hu * HU_TO_MM
+                parts.append(
+                    f".{cls} thead th {{ height: {h_mm:.1f}mm; }}"
+                )
+
+    return "\n".join(parts) if len(parts) > 1 else ""
+
+
+# ================================================================
+#  보조 함수
+# ================================================================
+
+def _build_font_map(ti: dict) -> dict:
+    """fonts → {(lang, id): face_name} 딕셔너리"""
+    fm = {}
+    for lang, flist in ti.get("fonts", {}).items():
+        if isinstance(flist, list):
+            for f in flist:
+                fm[(lang, f.get("id", 0))] = f.get("face", "")
+    return fm
+
+
+def _charpr_font_family(charpr: dict, fm: dict) -> str:
+    """charPr의 fontRef → 실제 font-family CSS 값"""
+    if not charpr:
+        return "'맑은 고딕', sans-serif"
+
+    fr = charpr.get("fontRef", {})
+    hangul_id = fr.get("hangul", 0)
+    latin_id = fr.get("latin", 0)
+
+    hangul_face = fm.get(("HANGUL", hangul_id), "")
+    latin_face = fm.get(("LATIN", latin_id), "")
+
+    faces = []
+    if hangul_face:
+        faces.append(f"'{hangul_face}'")
+    if latin_face and latin_face != hangul_face:
+        faces.append(f"'{latin_face}'")
+    faces.append("sans-serif")
+
+    return ", ".join(faces)
+
+
+def _resolve_style_charpr(ti: dict, style_name: str) -> dict:
+    """스타일 이름 → charPr dict 해석"""
+    styles = ti.get("styles", [])
+    char_styles = ti.get("char_styles", [])
+
+    for s in styles:
+        if s.get("name") == style_name:
+            cpr_id = s.get("charPrIDRef")
+            for cs in char_styles:
+                if cs.get("id") == cpr_id:
+                    return cs
+
+    # 못 찾으면 charPr[0] (바탕글 기본)
+    return char_styles[0] if char_styles else {}
+
+
+def _resolve_style_parapr(ti: dict, style_name: str) -> dict:
+    """스타일 이름 → paraPr dict 해석"""
+    styles = ti.get("styles", [])
+    para_styles = ti.get("para_styles", [])
+
+    for s in styles:
+        if s.get("name") == style_name:
+            ppr_id = s.get("paraPrIDRef")
+            for ps in para_styles:
+                if ps.get("id") == ppr_id:
+                    return ps
+
+    return para_styles[0] if para_styles else {}
+
+
+def _find_title_charpr(ti: dict) -> dict:
+    """제목용 charPr 추론 (primaryCharPrIDRef 없을 때 폴백).
+    
+    헤드라인 폰트 or 가장 큰 크기 기준.
+    """
+    headline_keywords = ["헤드라인", "headline", "제목", "title"]
+    fm = _build_font_map(ti)
+    
+    best = {}
+    best_pt = 0
+    for cs in ti.get("char_styles", []):
+        pt = cs.get("height_pt", 0)
+        fr = cs.get("fontRef", {})
+        hangul_id = fr.get("hangul", 0)
+        face = fm.get(("HANGUL", hangul_id), "").lower()
+        
+        # 헤드라인 폰트면 우선
+        if any(kw in face for kw in headline_keywords):
+            if pt > best_pt:
+                best_pt = pt
+                best = cs
+    
+    # 헤드라인 폰트 못 찾으면 가장 큰 것
+    if not best:
+        for cs in ti.get("char_styles", []):
+            pt = cs.get("height_pt", 0)
+            if pt > best_pt:
+                best_pt = pt
+                best = cs
+    
+    return best
+
+
+def _parapr_line_height(parapr: dict) -> str:
+    """paraPr → CSS line-height"""
+    if not parapr:
+        return "160%"
+    ls = parapr.get("lineSpacing", {})
+    ls_type = ls.get("type", "PERCENT")
+    ls_val = ls.get("value", 160)
+    if ls_type == "PERCENT":
+        return f"{ls_val}%"
+    elif ls_type == "FIXED":
+        return f"{ls_val / 100:.1f}pt"
+    else:
+        return f"{ls_val}%"
+
+
+def _parapr_align(parapr: dict) -> str:
+    """paraPr → CSS text-align"""
+    if not parapr:
+        return "justify"
+    align = parapr.get("align", "JUSTIFY")
+    return {
+        "JUSTIFY": "justify", "LEFT": "left", "RIGHT": "right",
+        "CENTER": "center", "DISTRIBUTE": "justify",
+        "DISTRIBUTE_SPACE": "justify"
+    }.get(align, "justify")
+
+
+def _hf_cell_padding(hf_info: dict | None) -> str:
+    if not hf_info or not hf_info.get("table"):
+        return "padding: 2px 4px;"
+    rows = hf_info["table"].get("rows", [])
+    if not rows or not rows[0]:
+        return "padding: 2px 4px;"
+    cm = rows[0][0].get("cellMargin", {})
+    if not cm:
+        return "padding: 2px 4px;"
+    ct = cm.get("top", 0) * HU_TO_MM
+    cb = cm.get("bottom", 0) * HU_TO_MM
+    cl = cm.get("left", 0) * HU_TO_MM
+    cr = cm.get("right", 0) * HU_TO_MM
+    return f"padding: {ct:.1f}mm {cr:.1f}mm {cb:.1f}mm {cl:.1f}mm;"
+
+
+def _first_cell_margin(tbl: dict) -> dict | None:
+    for row in tbl.get("rows", []):
+        for cell in row:
+            cm = cell.get("cellMargin")
+            if cm:
+                return cm
+    return None
+
+
+def _mm(val) -> float:
+    if isinstance(val, (int, float)):
+        return float(val)
+    try:
+        return float(str(val).replace("mm", "").strip())
+    except (ValueError, TypeError):
+        return 20.0
--- a/handlers/template/html_table_template_css.txt
+++ b/handlers/template/html_table_template_css.txt
--- a/handlers/template_manager.py
+++ b/handlers/template_manager.py
--- a/handlers/tools/init.py
+++ b/handlers/tools/init.py
@@ -0,0 +1,51 @@
+# -*- coding: utf-8 -*-
+"""
+HWPX 템플릿 추출 도구 모음
+
+각 모듈은 HWPX XML에서 특정 항목을 코드 기반으로 추출한다.
+- 추출 실패 시 None 반환 (디폴트값 절대 생성 안 함)
+- 모든 단위 변환은 hwpx_utils 사용
+- hwpx_domain_guide.md 기준 준수
+
+모듈 목록:
+    page_setup   : §7  용지/여백 (pagePr + margin)
+    font         : §3  글꼴 (fontface → font)
+    char_style   : §4  글자 모양 (charPr)
+    para_style   : §5  문단 모양 (paraPr)
+    border_fill  : §2  테두리/배경 (borderFill)
+    table        : §6  표 (tbl, tc)
+    header_footer: §8  머리말/꼬리말 (headerFooter)
+    section      : §9  구역 정의 (secPr)
+    style_def    :     스타일 정의 (styles)
+    numbering    :     번호매기기/글머리표
+    image        :     이미지/그리기 객체
+    content_order: 본문 콘텐츠 순서 (section*.xml)
+"""
+
+from . import page_setup
+from . import font
+from . import char_style
+from . import para_style
+from . import border_fill
+from . import table
+from . import header_footer
+from . import section
+from . import style_def
+from . import numbering
+from . import image
+from . import content_order
+
+__all__ = [
+    "page_setup",
+    "font",
+    "char_style",
+    "para_style",
+    "border_fill",
+    "table",
+    "header_footer",
+    "section",
+    "style_def",
+    "numbering",
+    "image",
+    "content_order"
+]
--- a/handlers/tools/border_fill.py
+++ b/handlers/tools/border_fill.py
@@ -0,0 +1,127 @@
+# -*- coding: utf-8 -*-
+"""
+§2 테두리/배경(BorderFill) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:borderFill id="3" threeD="0" shadow="0" centerLine="NONE" ...>
+    <hh:leftBorder type="SOLID" width="0.12 mm" color="#000000"/>
+    <hh:rightBorder type="SOLID" width="0.12 mm" color="#000000"/>
+    <hh:topBorder type="SOLID" width="0.12 mm" color="#000000"/>
+    <hh:bottomBorder type="SOLID" width="0.12 mm" color="#000000"/>
+    <hh:diagonal type="SOLID" width="0.1 mm" color="#000000"/>
+    <hc:fillBrush>
+      <hc:winBrush faceColor="#EDEDED" hatchColor="#FFE7E7E7" alpha="0"/>
+    </hc:fillBrush>
+  </hh:borderFill>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import BORDER_TYPE_TO_CSS, hwpx_border_to_css
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """§2 borderFill 전체 추출 → id별 dict.
+
+    Returns:
+        {
+            3: {
+                "id": 3,
+                "left":   {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
+                "right":  {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
+                "top":    {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
+                "bottom": {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
+                "diagonal": {"type": "SOLID", "width": "0.1 mm", "color": "#000000"},
+                "background": "#EDEDED",      # fillBrush faceColor
+                "css": {                       # 편의: 미리 변환된 CSS
+                    "border-left": "0.12mm solid #000000",
+                    ...
+                    "background-color": "#EDEDED",
+                }
+            },
+            ...
+        }
+        또는 추출 실패 시 None
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    blocks = re.findall(
+        r'<hh:borderFill\b([^>]*)>(.*?)</hh:borderFill>',
+        header_xml, re.DOTALL
+    )
+
+    if not blocks:
+        return None
+
+    result = {}
+    for attrs_str, inner in blocks:
+        id_m = re.search(r'\bid="(\d+)"', attrs_str)
+        if not id_m:
+            continue
+        bf_id = int(id_m.group(1))
+
+        item = {"id": bf_id}
+
+        # 4방향 + diagonal
+        for side, tag in [
+            ("left",     "leftBorder"),
+            ("right",    "rightBorder"),
+            ("top",      "topBorder"),
+            ("bottom",   "bottomBorder"),
+            ("diagonal", "diagonal"),
+        ]:
+            # 태그 전체를 먼저 찾고, 속성을 개별 추출 (순서 무관)
+            tag_m = re.search(rf'<hh:{tag}\b([^/]*?)/?>', inner)
+            if tag_m:
+                tag_attrs = tag_m.group(1)
+                t = re.search(r'\btype="([^"]+)"', tag_attrs)
+                w = re.search(r'\bwidth="([^"]+)"', tag_attrs)
+                c = re.search(r'\bcolor="([^"]+)"', tag_attrs)
+                item[side] = {
+                    "type": t.group(1) if t else "NONE",
+                    "width": w.group(1).replace(" ", "") if w else "0.12mm",
+                    "color": c.group(1) if c else "#000000",
+                }
+
+        # 배경 (fillBrush > winBrush faceColor)
+        bg_m = re.search(
+            r'<hc:winBrush\b[^>]*\bfaceColor="([^"]+)"', inner
+        )
+        if bg_m:
+            face = bg_m.group(1)
+            if face and face.lower() != "none":
+                item["background"] = face
+
+        # CSS 편의 변환
+        css = {}
+        for side in ["left", "right", "top", "bottom"]:
+            border_data = item.get(side)
+            if border_data:
+                css[f"border-{side}"] = hwpx_border_to_css(border_data)
+            else:
+                css[f"border-{side}"] = "none"
+            # border_data가 없으면 CSS에도 넣지 않음
+
+        if "background" in item:
+            css["background-color"] = item["background"]
+
+        if css:
+            item["css"] = css
+
+        result[bf_id] = item
+
+    return result if result else None
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/char_style.py
+++ b/handlers/tools/char_style.py
@@ -0,0 +1,133 @@
+# -*- coding: utf-8 -*-
+"""
+§4 글자 모양(CharShape) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:charPr id="0" height="1000" textColor="#000000" shadeColor="none"
+             useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="2">
+    <hh:fontRef hangul="7" latin="6" hanja="6" .../>
+    <hh:ratio hangul="100" latin="100" .../>
+    <hh:spacing hangul="0" latin="0" .../>
+    <hh:relSz hangul="100" latin="100" .../>
+    <hh:offset hangul="0" latin="0" .../>
+    <hh:bold/>                              <!-- 존재하면 bold -->
+    <hh:italic/>                            <!-- 존재하면 italic -->
+    <hh:underline type="NONE" shape="SOLID" color="#000000"/>
+    <hh:strikeout shape="NONE" color="#000000"/>
+  </hh:charPr>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import charsize_to_pt
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> list | None:
+    """§4 charPr 전체 목록 추출.
+
+    Returns:
+        [
+            {
+                "id": 0,
+                "height_pt": 10.0,
+                "textColor": "#000000",
+                "bold": False,
+                "italic": False,
+                "underline": "NONE",
+                "strikeout": "NONE",
+                "fontRef": {"hangul": 7, "latin": 6, ...},
+                "ratio": {"hangul": 100, "latin": 100, ...},
+                "spacing": {"hangul": 0, "latin": 0, ...},
+                "borderFillIDRef": 2,
+            },
+            ...
+        ]
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    # charPr 블록 추출 (self-closing이 아닌 블록)
+    blocks = re.findall(
+        r'<hh:charPr\b([^>]*)>(.*?)</hh:charPr>',
+        header_xml, re.DOTALL
+    )
+
+    if not blocks:
+        return None
+
+    result = []
+    for attrs_str, inner in blocks:
+        item = {}
+
+        # 속성 파싱
+        id_m = re.search(r'\bid="(\d+)"', attrs_str)
+        if id_m:
+            item["id"] = int(id_m.group(1))
+
+        height_m = re.search(r'\bheight="(\d+)"', attrs_str)
+        if height_m:
+            item["height_pt"] = charsize_to_pt(int(height_m.group(1)))
+
+        color_m = re.search(r'\btextColor="([^"]+)"', attrs_str)
+        if color_m:
+            item["textColor"] = color_m.group(1)
+
+        shade_m = re.search(r'\bshadeColor="([^"]+)"', attrs_str)
+        if shade_m and shade_m.group(1) != "none":
+            item["shadeColor"] = shade_m.group(1)
+
+        bf_m = re.search(r'\bborderFillIDRef="(\d+)"', attrs_str)
+        if bf_m:
+            item["borderFillIDRef"] = int(bf_m.group(1))
+
+        # bold / italic (태그 존재 여부로 판단)
+        item["bold"] = bool(re.search(r'<hh:bold\s*/?>',  inner))
+        item["italic"] = bool(re.search(r'<hh:italic\s*/?>',  inner))
+
+        # fontRef
+        fr = re.search(r'<hh:fontRef\b([^/]*)/>', inner)
+        if fr:
+            item["fontRef"] = _parse_lang_attrs(fr.group(1))
+
+        # ratio
+        ra = re.search(r'<hh:ratio\b([^/]*)/>', inner)
+        if ra:
+            item["ratio"] = _parse_lang_attrs(ra.group(1))
+
+        # spacing
+        sp = re.search(r'<hh:spacing\b([^/]*)/>', inner)
+        if sp:
+            item["spacing"] = _parse_lang_attrs(sp.group(1))
+
+        # underline
+        ul = re.search(r'<hh:underline\b[^>]*\btype="([^"]+)"', inner)
+        if ul:
+            item["underline"] = ul.group(1)
+
+        # strikeout
+        so = re.search(r'<hh:strikeout\b[^>]*\bshape="([^"]+)"', inner)
+        if so:
+            item["strikeout"] = so.group(1)
+
+        result.append(item)
+
+    return result if result else None
+
+
+def _parse_lang_attrs(attrs_str: str) -> dict:
+    """hangul="7" latin="6" ... → {"hangul": 7, "latin": 6, ...}"""
+    pairs = re.findall(r'(\w+)="(-?\d+)"', attrs_str)
+    return {k: int(v) for k, v in pairs}
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/content_order.py
+++ b/handlers/tools/content_order.py
@@ -0,0 +1,529 @@
+# -*- coding: utf-8 -*-
+"""
+content_order.py — HWPX section*.xml 본문 콘텐츠 순서 추출
+
+기존 12개 tool이 header.xml의 "정의(definition)"를 추출하는 반면,
+이 tool은 section0.xml의 "본문(content)" 순서를 추출한다.
+
+추출 결과는 template_manager._build_body_html()이
+원본 순서 그대로 HTML을 조립하는 데 사용된다.
+
+콘텐츠 유형:
+  - paragraph : 일반 텍스트 문단
+  - table     : 표 (<hp:tbl>)
+  - image     : 이미지 (<hp:pic>)
+  - empty     : 빈 문단 (줄바꿈 역할)
+
+참조: hwpx_domain_guide.md §6(표), §7(본문 구조)
+"""
+
+import re
+import logging
+
+logger = logging.getLogger(__name__)
+
+# ================================================================
+#  네임스페이스
+# ================================================================
+# HWPX는 여러 네임스페이스를 사용한다.
+# section*.xml: hp: (본문), ha: (속성)
+# header.xml:   hh: (헤더 정의)
+# 실제 파일에서 네임스페이스 URI가 다를 수 있으므로 로컬명 기반 탐색도 병행한다.
+
+DEFAULT_NS = {
+    'hp': 'http://www.hancom.co.kr/hwpml/2011/paragraph',
+    'ha': 'http://www.hancom.co.kr/hwpml/2011/attributes',
+    'hh': 'http://www.hancom.co.kr/hwpml/2011/head',
+    'hc': 'http://www.hancom.co.kr/hwpml/2011/core',
+}
+
+
+# ================================================================
+#  공개 API
+# ================================================================
+
+def extract(raw_xml, parsed, ns=None):
+    """section*.xml에서 본문 콘텐츠 순서를 추출한다.
+
+    Args:
+        raw_xml (dict): 원본 XML 문자열 딕셔너리.
+                        raw_xml.get("section0") 등으로 section XML에 접근.
+        parsed (dict): processor.py가 HWPX를 파싱한 전체 결과 dict.
+                       parsed.get("section_xml") 등으로 parsed Element에 접근.
+        ns (dict, optional): 네임스페이스 매핑. None이면 자동 감지.
+
+    Returns:
+        list[dict]: 콘텐츠 순서 리스트. 각 항목은 다음 키를 포함:
+            - type: "paragraph" | "table" | "image" | "empty"
+            - index: 전체 순서 내 인덱스 (0부터)
+            - paraPrIDRef: 문단모양 참조 ID (str or None)
+            - styleIDRef: 스타일 참조 ID (str or None)
+            + type별 추가 키 (아래 참조)
+        추출 실패 시 None 반환 (analyzer가 결과에서 제외함).
+    """
+    # ── section XML 찾기 ──
+    # raw_xml dict에서 section 원본 문자열 추출
+    section_raw = None
+    if isinstance(raw_xml, dict):
+        # 키 이름은 프로젝트마다 다를 수 있음: section0, section_xml 등
+        for key in ['section0', 'section_xml', 'section0.xml']:
+            if key in raw_xml:
+                section_raw = raw_xml[key]
+                break
+        # 못 찾으면 "section"으로 시작하는 첫 번째 키
+        if section_raw is None:
+            for key, val in raw_xml.items():
+                if key.startswith('section') and isinstance(val, str):
+                    section_raw = val
+                    break
+    elif isinstance(raw_xml, str):
+        section_raw = raw_xml
+
+    # parsed dict에서 section Element 또는 문자열 추출
+    section_parsed = None
+    if isinstance(parsed, dict):
+        for key in ['section_xml', 'section0', 'section_parsed', 'section0_parsed']:
+            val = parsed.get(key)
+            if val is None:
+                continue
+            if isinstance(val, str):
+                # 문자열이면 section_raw로 활용 (table.py와 동일)
+                if section_raw is None:
+                    section_raw = val
+            elif not isinstance(val, dict):
+                # Element 객체로 추정
+                section_parsed = val
+                break
+        # fallback: raw_xml 문자열을 직접 파싱
+        if section_parsed is None and section_raw:
+            import xml.etree.ElementTree as ET
+            try:
+                section_parsed = ET.fromstring(section_raw)
+            except ET.ParseError:
+                logger.warning("section XML 파싱 실패")
+                return None
+    else:
+        # parsed 자체가 Element일 수 있음 (직접 호출 시)
+        section_parsed = parsed
+
+    if section_parsed is None:
+        logger.warning("section XML을 찾을 수 없음 — content_order 추출 생략")
+        return None
+
+    if ns is None:
+        ns = _detect_namespaces(section_raw or '', section_parsed)
+
+    # <hp:p> 엘리먼트 수집 — secPr 내부는 제외
+    paragraphs = _collect_body_paragraphs(section_parsed, ns)
+
+    content_order = []
+    table_idx = 0
+    image_idx = 0
+
+    for p_elem in paragraphs:
+        para_pr_id = _get_attr(p_elem, 'paraPrIDRef')
+        style_id = _get_attr(p_elem, 'styleIDRef')
+
+        base = {
+            'index': len(content_order),
+            'paraPrIDRef': para_pr_id,
+            'styleIDRef': style_id,
+        }
+
+        # ── (1) 표 확인 ──
+        tbl = _find_element(p_elem, 'tbl', ns)
+        if tbl is not None:
+            tbl_info = _extract_table_info(tbl, ns)
+            content_order.append({
+                **base,
+                'type': 'table',
+                'table_idx': table_idx,
+                **tbl_info,
+            })
+            table_idx += 1
+            continue
+
+        # ── (2) 이미지 확인 ──
+        pic = _find_element(p_elem, 'pic', ns)
+        if pic is not None:
+            img_info = _extract_image_info(pic, p_elem, ns)
+            content_order.append({
+                **base,
+                'type': 'image',
+                'image_idx': image_idx,
+                **img_info,
+            })
+            image_idx += 1
+            continue
+
+        # ── (3) 텍스트 문단 / 빈 문단 ──
+        text = _collect_text(p_elem, ns)
+        runs_info = _extract_runs_info(p_elem, ns)
+
+        if not text.strip():
+            content_order.append({
+                **base,
+                'type': 'empty',
+            })
+        else:
+            content_order.append({
+                **base,
+                'type': 'paragraph',
+                'text': text,
+                'charPrIDRef': runs_info.get('first_charPrIDRef'),
+                'runs': runs_info.get('runs', []),
+            })
+
+    logger.info(
+        "content_order 추출 완료: %d items "
+        "(paragraphs=%d, tables=%d, images=%d, empty=%d)",
+        len(content_order),
+        sum(1 for c in content_order if c['type'] == 'paragraph'),
+        table_idx,
+        image_idx,
+        sum(1 for c in content_order if c['type'] == 'empty'),
+    )
+
+    return content_order
+
+
+# ================================================================
+#  본문 <hp:p> 수집 — secPr 내부 제외
+# ================================================================
+
+def _collect_body_paragraphs(root, ns):
+    """<hp:sec> 직계 <hp:p> 만 수집한다.
+    
+    secPr, headerFooter 내부의 <hp:p>는 본문이 아니므로 제외.
+    subList 내부(셀 안 문단)도 제외 — 표는 통째로 하나의 항목.
+    """
+    paragraphs = []
+
+    # 방법 1: sec 직계 자식 중 p 태그만
+    sec = _find_element(root, 'sec', ns)
+    if sec is None:
+        # 루트 자체가 sec일 수 있음
+        sec = root
+
+    for child in sec:
+        tag = _local_tag(child)
+        if tag == 'p':
+            paragraphs.append(child)
+
+    # 직계 자식에서 못 찾았으면 fallback: 전체 탐색 (but secPr/subList 제외)
+    if not paragraphs:
+        paragraphs = _collect_paragraphs_fallback(root, ns)
+
+    return paragraphs
+
+
+def _collect_paragraphs_fallback(root, ns):
+    """fallback: 전체에서 <hp:p>를 찾되, secPr/headerFooter/subList 내부는 제외"""
+    skip_tags = {'secPr', 'headerFooter', 'subList', 'tc'}
+    result = []
+
+    def _walk(elem, skip=False):
+        if skip:
+            return
+        tag = _local_tag(elem)
+        if tag in skip_tags:
+            return
+        if tag == 'p':
+            # 부모가 sec이거나 루트 직계인 경우만
+            result.append(elem)
+            return  # p 내부의 하위 p는 수집하지 않음
+        for child in elem:
+            _walk(child)
+
+    _walk(root)
+    return result
+
+
+# ================================================================
+#  표 정보 추출
+# ================================================================
+
+def _extract_table_info(tbl, ns):
+    """<hp:tbl> 에서 기본 메타 정보 추출"""
+    info = {
+        'rowCnt': _get_attr(tbl, 'rowCnt'),
+        'colCnt': _get_attr(tbl, 'colCnt'),
+        'borderFillIDRef': _get_attr(tbl, 'borderFillIDRef'),
+    }
+
+    # 열 너비
+    col_sz = _find_element(tbl, 'colSz', ns)
+    if col_sz is not None:
+        width_list_elem = _find_element(col_sz, 'widthList', ns)
+        if width_list_elem is not None and width_list_elem.text:
+            info['colWidths'] = width_list_elem.text.strip().split()
+
+    return info
+
+
+# ================================================================
+#  이미지 정보 추출
+# ================================================================
+
+def _extract_image_info(pic, p_elem, ns):
+    """<hp:pic> 에서 이미지 참조 정보 추출"""
+    info = {
+        'binaryItemIDRef': None,
+        'text': '',  # 이미지와 같은 문단에 있는 텍스트 (캡션 등)
+    }
+
+    # img 태그에서 binaryItemIDRef
+    img = _find_element(pic, 'img', ns)
+    if img is not None:
+        info['binaryItemIDRef'] = _get_attr(img, 'binaryItemIDRef')
+
+    # imgRect에서 크기 정보
+    img_rect = _find_element(pic, 'imgRect', ns)
+    if img_rect is not None:
+        info['imgRect'] = {
+            'x': _get_attr(img_rect, 'x'),
+            'y': _get_attr(img_rect, 'y'),
+            'w': _get_attr(img_rect, 'w'),
+            'h': _get_attr(img_rect, 'h'),
+        }
+
+    # 같은 문단 내 텍스트 (pic 바깥의 run들)
+    info['text'] = _collect_text_outside(p_elem, pic, ns)
+
+    return info
+
+
+# ================================================================
+#  텍스트 수집
+# ================================================================
+
+def _collect_text(p_elem, ns):
+    """<hp:p> 내 모든 <hp:t> 텍스트를 순서대로 합침
+    
+    주의: t.tail은 XML 들여쓰기 공백이므로 수집하지 않는다.
+    HWPX에서 실제 텍스트는 항상 <hp:t>...</hp:t> 안에 있다.
+    """
+    parts = []
+    for t in _find_all_elements(p_elem, 't', ns):
+        if t.text:
+            parts.append(t.text)
+    return ''.join(parts)
+
+
+def _collect_text_outside(p_elem, exclude_elem, ns):
+    """p_elem 내에서 exclude_elem(예: pic) 바깥의 텍스트만 수집"""
+    parts = []
+
+    def _walk(elem):
+        if elem is exclude_elem:
+            return
+        tag = _local_tag(elem)
+        if tag == 't' and elem.text:
+            parts.append(elem.text)
+        for child in elem:
+            _walk(child)
+
+    _walk(p_elem)
+    return ''.join(parts)
+
+
+# ================================================================
+#  Run 정보 추출
+# ================================================================
+
+def _extract_runs_info(p_elem, ns):
+    """<hp:p> 내 <hp:run> 들의 charPrIDRef와 텍스트 추출
+    
+    Returns:
+        {
+            'first_charPrIDRef': str or None,
+            'runs': [
+                {'charPrIDRef': '8', 'text': '1. SamanPro...'},
+                {'charPrIDRef': '24', 'text': '포장설계...'},
+            ]
+        }
+    """
+    runs = []
+    first_char_pr = None
+
+    for run_elem in _find_direct_runs(p_elem, ns):
+        char_pr = _get_attr(run_elem, 'charPrIDRef')
+        if first_char_pr is None and char_pr is not None:
+            first_char_pr = char_pr
+
+        text_parts = []
+        for t in _find_all_elements(run_elem, 't', ns):
+            if t.text:
+                text_parts.append(t.text)
+
+        if text_parts:
+            runs.append({
+                'charPrIDRef': char_pr,
+                'text': ''.join(text_parts),
+            })
+
+    return {
+        'first_charPrIDRef': first_char_pr,
+        'runs': runs,
+    }
+
+
+def _find_direct_runs(p_elem, ns):
+    """<hp:p> 직계 <hp:run>만 찾음 (subList 내부 제외)"""
+    results = []
+    for child in p_elem:
+        tag = _local_tag(child)
+        if tag == 'run':
+            results.append(child)
+    return results
+
+
+# ================================================================
+#  네임스페이스 감지
+# ================================================================
+
+def _detect_namespaces(raw_xml, parsed):
+    """XML에서 실제 사용된 네임스페이스 URI를 감지한다.
+    
+    HWPX 버전에 따라 네임스페이스 URI가 다를 수 있다:
+    - 2011 버전: http://www.hancom.co.kr/hwpml/2011/paragraph
+    - 2016 버전: http://www.hancom.co.kr/hwpml/2016/paragraph (일부)
+    """
+    ns = dict(DEFAULT_NS)
+
+    if raw_xml:
+        # xmlns:hp="..." 패턴으로 실제 URI 추출
+        for prefix in ['hp', 'ha', 'hh', 'hc']:
+            pattern = rf'xmlns:{prefix}="([^"]+)"'
+            match = re.search(pattern, raw_xml)
+            if match:
+                ns[prefix] = match.group(1)
+
+    return ns
+
+
+# ================================================================
+#  XML 유틸리티 — 네임스페이스 불가지론적 탐색
+# ================================================================
+
+def _local_tag(elem):
+    """'{namespace}localname' → 'localname'"""
+    tag = elem.tag
+    if '}' in tag:
+        return tag.split('}', 1)[1]
+    return tag
+
+
+def _get_attr(elem, attr_name):
+    """속성값 가져오기. 네임스페이스 유무 모두 시도."""
+    # 직접 속성명
+    val = elem.get(attr_name)
+    if val is not None:
+        return val
+
+    # 네임스페이스 접두사가 붙은 속성 시도
+    for full_attr in elem.attrib:
+        if full_attr.endswith(attr_name):
+            return elem.attrib[full_attr]
+
+    return None
+
+
+def _find_element(parent, local_name, ns):
+    """자식 중 로컬명이 일치하는 첫 번째 엘리먼트를 찾는다.
+    
+    네임스페이스 prefix 시도 후, 실패하면 로컬명 직접 비교.
+    """
+    # 1차: 네임스페이스 prefix로 탐색
+    for prefix in ['hp', 'hh', 'hc', 'ha']:
+        uri = ns.get(prefix, '')
+        found = parent.find(f'{{{uri}}}{local_name}')
+        if found is not None:
+            return found
+
+    # 2차: 직계 자식 로컬명 비교
+    for child in parent:
+        if _local_tag(child) == local_name:
+            return child
+
+    # 3차: 재귀 탐색 (1단계만)
+    for child in parent:
+        for grandchild in child:
+            if _local_tag(grandchild) == local_name:
+                return grandchild
+
+    return None
+
+
+def _find_all_elements(parent, local_name, ns):
+    """하위 전체에서 로컬명이 일치하는 모든 엘리먼트를 찾는다."""
+    results = []
+
+    def _walk(elem):
+        if _local_tag(elem) == local_name:
+            results.append(elem)
+        for child in elem:
+            _walk(child)
+
+    _walk(parent)
+    return results
+
+
+# ================================================================
+#  편의 함수
+# ================================================================
+
+def summarize(content_order):
+    """content_order 리스트를 사람이 읽기 쉬운 요약으로 변환"""
+    lines = []
+    for item in content_order:
+        idx = item['index']
+        t = item['type']
+
+        if t == 'paragraph':
+            text_preview = item['text'][:50]
+            if len(item['text']) > 50:
+                text_preview += '...'
+            lines.append(
+                f"[{idx:3d}] P  paraPr={item['paraPrIDRef']:<4s} "
+                f"charPr={item.get('charPrIDRef', '-'):<4s} "
+                f"\"{text_preview}\""
+            )
+        elif t == 'table':
+            lines.append(
+                f"[{idx:3d}] T  table_idx={item['table_idx']} "
+                f"({item.get('rowCnt', '?')}×{item.get('colCnt', '?')})"
+            )
+        elif t == 'image':
+            ref = item.get('binaryItemIDRef', '?')
+            caption = item.get('text', '')[:30]
+            lines.append(
+                f"[{idx:3d}] I  image_idx={item['image_idx']} "
+                f"ref={ref} \"{caption}\""
+            )
+        elif t == 'empty':
+            lines.append(f"[{idx:3d}] _  (empty)")
+
+    return '\n'.join(lines)
+
+
+def get_stats(content_order):
+    """content_order 통계 반환"""
+    type_map = {
+        'paragraph': 'paragraphs',
+        'table': 'tables',
+        'image': 'images',
+        'empty': 'empty',
+    }
+    stats = {
+        'total': len(content_order),
+        'paragraphs': 0,
+        'tables': 0,
+        'images': 0,
+        'empty': 0,
+    }
+    for item in content_order:
+        key = type_map.get(item['type'])
+        if key:
+            stats[key] += 1
+    return stats
--- a/handlers/tools/font.py
+++ b/handlers/tools/font.py
@@ -0,0 +1,82 @@
+# -*- coding: utf-8 -*-
+"""
+§3 글꼴(FaceName) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:fontface lang="HANGUL" fontCnt="9">
+    <hh:font id="0" face="돋움" type="TTF" isEmbedded="0">
+    <hh:font id="1" face="맑은 고딕" type="TTF" isEmbedded="0">
+  </hh:fontface>
+  <hh:fontface lang="LATIN" fontCnt="9">
+    <hh:font id="0" face="돋움" type="TTF" isEmbedded="0">
+  </hh:fontface>
+
+디폴트값 생성 안 함. 추출 실패 시 None 반환.
+"""
+
+import re
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """§3 fontface에서 언어별 글꼴 정의 추출.
+
+    Returns:
+        {
+            "HANGUL": [{"id": 0, "face": "돋움", "type": "TTF"}, ...],
+            "LATIN":  [{"id": 0, "face": "돋움", "type": "TTF"}, ...],
+            "HANJA":  [...],
+            ...
+        }
+        또는 추출 실패 시 None
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    result = {}
+
+    # fontface 블록을 lang별로 추출
+    fontface_blocks = re.findall(
+        r'<hh:fontface\b[^>]*\blang="([^"]+)"[^>]*>(.*?)</hh:fontface>',
+        header_xml, re.DOTALL
+    )
+
+    if not fontface_blocks:
+        return None
+
+    for lang, block_content in fontface_blocks:
+        fonts = []
+        font_matches = re.finditer(
+            r'<hh:font\b[^>]*'
+            r'\bid="(\d+)"[^>]*'
+            r'\bface="([^"]+)"[^>]*'
+            r'\btype="([^"]+)"',
+            block_content
+        )
+        for fm in font_matches:
+            fonts.append({
+                "id": int(fm.group(1)),
+                "face": fm.group(2),
+                "type": fm.group(3),
+            })
+
+        if fonts:
+            result[lang] = fonts
+
+    return result if result else None
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    """header.xml 문자열을 가져온다."""
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+
+    if isinstance(raw_xml, str):
+        return raw_xml
+
+    return None
--- a/handlers/tools/header_footer.py
+++ b/handlers/tools/header_footer.py
@@ -0,0 +1,200 @@
+# -*- coding: utf-8 -*-
+"""
+§8 머리말/꼬리말(HeaderFooter) 추출
+
+HWPX 실제 태그 (section0.xml):
+  <hp:headerFooter ...>
+    <!-- 내용은 section XML 내 또는 별도 header/footer 영역 -->
+  </hp:headerFooter>
+
+  머리말/꼬리말 안에 표가 있는 경우:
+  - 표의 셀에 다중행 텍스트가 포함될 수 있음
+  - 각 셀의 colSpan, rowSpan, width, borderFillIDRef 등 추출 필요
+
+secPr 내 속성:
+  <hp:visibility hideFirstHeader="0" hideFirstFooter="0" .../>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import hwpunit_to_mm
+
+
+def extract_header(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """머리말 구조 추출.
+
+    Returns:
+        {
+            "exists": True,
+            "type": "table" | "text",
+            "hidden": False,
+            "table": { ... } | None,    # 표가 있는 경우
+            "texts": ["부서명", ...],
+        }
+    """
+    return _extract_hf(raw_xml, parsed, "header")
+
+
+def extract_footer(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """꼬리말 구조 추출."""
+    return _extract_hf(raw_xml, parsed, "footer")
+
+
+def _extract_hf(raw_xml: dict, parsed: dict, hf_type: str) -> dict | None:
+    """header 또는 footer 추출 공통 로직"""
+    # 1) parsed에서 직접 제공된 header/footer XML
+    hf_xml = None
+    if parsed:
+        key = f"page_{hf_type}_xml"
+        hf_xml = parsed.get(key, "")
+
+    # 2) section XML에서 headerFooter 블록 탐색
+    section_xml = _get_section_xml(raw_xml, parsed)
+
+    if not hf_xml and section_xml:
+        # headerFooter 태그에서 header/footer 구분
+        hf_blocks = re.findall(
+            r'<hp:headerFooter\b([^>]*)>(.*?)</hp:headerFooter>',
+            section_xml, re.DOTALL
+        )
+        for attrs, inner in hf_blocks:
+            # type 속성으로 구분 (HEADER / FOOTER)
+            type_m = re.search(r'\btype="([^"]+)"', attrs)
+            if type_m:
+                if type_m.group(1).upper() == hf_type.upper():
+                    hf_xml = inner
+                    break
+
+    if not hf_xml or not hf_xml.strip():
+        return None  # 해당 머리말/꼬리말 없음
+
+    result = {"exists": True}
+
+    # hidden 여부
+    if section_xml:
+        hide_key = f"hideFirst{'Header' if hf_type == 'header' else 'Footer'}"
+        hide_m = re.search(rf'\b{hide_key}="(\d+)"', section_xml)
+        if hide_m:
+            result["hidden"] = bool(int(hide_m.group(1)))
+
+    # 텍스트 추출
+    texts = re.findall(r'<hp:t>([^<]*)</hp:t>', hf_xml)
+    clean_texts = [t.strip() for t in texts if t.strip()]
+    if clean_texts:
+        result["texts"] = clean_texts
+
+    # 표 존재 여부
+    tbl_match = re.search(
+        r'<hp:tbl\b([^>]*)>(.*?)</hp:tbl>',
+        hf_xml, re.DOTALL
+    )
+    if tbl_match:
+        result["type"] = "table"
+        result["table"] = _parse_hf_table(tbl_match.group(1), tbl_match.group(2))
+    else:
+        result["type"] = "text"
+
+    return result
+
+
+def _parse_hf_table(tbl_attrs: str, tbl_inner: str) -> dict:
+    """머리말/꼬리말 내 표 파싱"""
+    table = {}
+
+    # rowCnt, colCnt
+    for attr in ["rowCnt", "colCnt"]:
+        m = re.search(rf'\b{attr}="(\d+)"', tbl_attrs)
+        if m:
+            table[attr] = int(m.group(1))
+
+    # 열 너비
+    wl = re.search(r'<hp:widthList>([^<]+)</hp:widthList>', tbl_inner)
+    if wl:
+        try:
+            widths = [int(w) for w in wl.group(1).strip().split()]
+            table["colWidths_hu"] = widths
+            total = sum(widths) or 1
+            table["colWidths_pct"] = [round(w / total * 100) for w in widths]
+        except ValueError:
+            pass
+
+    # 행/셀
+    rows = []
+    tr_blocks = re.findall(r'<hp:tr\b[^>]*>(.*?)</hp:tr>', tbl_inner, re.DOTALL)
+    for tr in tr_blocks:
+        cells = []
+        tc_blocks = re.finditer(
+            r'<hp:tc\b([^>]*)>(.*?)</hp:tc>', tr, re.DOTALL
+        )
+        for tc in tc_blocks:
+            cell = _parse_hf_cell(tc.group(1), tc.group(2))
+            cells.append(cell)
+        rows.append(cells)
+
+    if rows:
+        table["rows"] = rows
+
+    return table
+
+
+def _parse_hf_cell(tc_attrs: str, tc_inner: str) -> dict:
+    """머리말/꼬리말 셀 파싱"""
+    cell = {}
+
+    # borderFillIDRef
+    bf = re.search(r'\bborderFillIDRef="(\d+)"', tc_attrs)
+    if bf:
+        cell["borderFillIDRef"] = int(bf.group(1))
+
+    # cellAddr
+    addr = re.search(
+        r'<hp:cellAddr\b[^>]*\bcolAddr="(\d+)"[^>]*\browAddr="(\d+)"',
+        tc_inner
+    )
+    if addr:
+        cell["colAddr"] = int(addr.group(1))
+        cell["rowAddr"] = int(addr.group(2))
+
+    # cellSpan
+    span = re.search(r'<hp:cellSpan\b([^/]*)/?>', tc_inner)
+    if span:
+        cs = re.search(r'\bcolSpan="(\d+)"', span.group(1))
+        rs = re.search(r'\browSpan="(\d+)"', span.group(1))
+        if cs:
+            cell["colSpan"] = int(cs.group(1))
+        if rs:
+            cell["rowSpan"] = int(rs.group(1))
+
+    # cellSz
+    sz = re.search(r'<hp:cellSz\b([^/]*)/?>', tc_inner)
+    if sz:
+        w = re.search(r'\bwidth="(\d+)"', sz.group(1))
+        if w:
+            cell["width_hu"] = int(w.group(1))
+
+    # 셀 텍스트 (다중행)
+    paras = re.findall(r'<hp:p\b[^>]*>(.*?)</hp:p>', tc_inner, re.DOTALL)
+    lines = []
+    for p in paras:
+        p_texts = re.findall(r'<hp:t>([^<]*)</hp:t>', p)
+        line = " ".join(t.strip() for t in p_texts if t.strip())
+        if line:
+            lines.append(line)
+
+    if lines:
+        cell["text"] = " ".join(lines)
+        cell["lines"] = lines
+
+    return cell
+
+
+def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("section_xml"):
+        return parsed["section_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "section" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/image.py
+++ b/handlers/tools/image.py
@@ -0,0 +1,98 @@
+# -*- coding: utf-8 -*-
+"""
+이미지/그리기 객체(ShapeObject) 추출
+
+HWPX 실제 태그 (section0.xml):
+  <hp:pic id="..." zOrder="..." ...>
+    <hp:offset x="0" y="0"/>
+    <hp:orgSz width="..." height="..."/>
+    <hp:curSz width="..." height="..."/>
+    <hp:imgRect>
+      <hp:pt x="..." y="..."/>  <!-- 4개 꼭짓점 -->
+    </hp:imgRect>
+    <hp:imgClip .../>
+    <hp:img binaryItemIDRef="image1.JPG" .../>
+  </hp:pic>
+
+  또는 그리기 객체:
+  <hp:container id="..." ...>
+    <hp:offset x="..." y="..."/>
+    ...
+  </hp:container>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import hwpunit_to_mm
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> list | None:
+    """이미지/그리기 객체 추출.
+
+    Returns:
+        [
+            {
+                "type": "image",
+                "binaryItemRef": "image1.JPG",
+                "width_hu": 28346, "height_hu": 14173,
+                "width_mm": 100.0, "height_mm": 50.0,
+                "offset": {"x": 0, "y": 0},
+            },
+            ...
+        ]
+    """
+    section_xml = _get_section_xml(raw_xml, parsed)
+    if not section_xml:
+        return None
+
+    result = []
+
+    # <hp:pic> 블록
+    pic_blocks = re.finditer(
+        r'<hp:pic\b([^>]*)>(.*?)</hp:pic>',
+        section_xml, re.DOTALL
+    )
+    for pm in pic_blocks:
+        pic_inner = pm.group(2)
+        item = {"type": "image"}
+
+        # binaryItemRef
+        img = re.search(r'<hp:img\b[^>]*\bbinaryItemIDRef="([^"]+)"', pic_inner)
+        if img:
+            item["binaryItemRef"] = img.group(1)
+
+        # curSz (현재 크기)
+        csz = re.search(
+            r'<hp:curSz\b[^>]*\bwidth="(\d+)"[^>]*\bheight="(\d+)"',
+            pic_inner
+        )
+        if csz:
+            w, h = int(csz.group(1)), int(csz.group(2))
+            item["width_hu"] = w
+            item["height_hu"] = h
+            item["width_mm"] = round(hwpunit_to_mm(w), 1)
+            item["height_mm"] = round(hwpunit_to_mm(h), 1)
+
+        # offset
+        off = re.search(
+            r'<hp:offset\b[^>]*\bx="(-?\d+)"[^>]*\by="(-?\d+)"',
+            pic_inner
+        )
+        if off:
+            item["offset"] = {"x": int(off.group(1)), "y": int(off.group(2))}
+
+        result.append(item)
+
+    return result if result else None
+
+
+def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("section_xml"):
+        return parsed["section_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "section" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/numbering.py
+++ b/handlers/tools/numbering.py
@@ -0,0 +1,136 @@
+# -*- coding: utf-8 -*-
+"""
+번호매기기(Numbering) / 글머리표(Bullet) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:numbering id="1" start="0">
+    <hh:paraHead start="1" level="1" align="LEFT" useInstWidth="1"
+                 autoIndent="1" widthAdjust="0" textOffsetType="PERCENT"
+                 textOffset="50" numFormat="DIGIT" charPrIDRef="4294967295"
+                 checkable="0">^1.</hh:paraHead>
+    <hh:paraHead start="1" level="2" ... numFormat="HANGUL_SYLLABLE">^2.</hh:paraHead>
+  </hh:numbering>
+
+  <hh:bullet id="1" char="-" useImage="0">
+    <hh:paraHead level="0" align="LEFT" .../>
+  </hh:bullet>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """번호매기기 + 글머리표 정의 추출.
+
+    Returns:
+        {
+            "numberings": [
+                {
+                    "id": 1, "start": 0,
+                    "levels": [
+                        {"level": 1, "numFormat": "DIGIT", "pattern": "^1.",
+                         "align": "LEFT"},
+                        {"level": 2, "numFormat": "HANGUL_SYLLABLE", "pattern": "^2."},
+                        ...
+                    ]
+                }
+            ],
+            "bullets": [
+                {"id": 1, "char": "-", "useImage": False}
+            ]
+        }
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    result = {}
+
+    # ── 번호매기기 ──
+    numbering_blocks = re.findall(
+        r'<hh:numbering\b([^>]*)>(.*?)</hh:numbering>',
+        header_xml, re.DOTALL
+    )
+    if numbering_blocks:
+        nums = []
+        for attrs, inner in numbering_blocks:
+            num = {}
+            id_m = re.search(r'\bid="(\d+)"', attrs)
+            if id_m:
+                num["id"] = int(id_m.group(1))
+            start_m = re.search(r'\bstart="(\d+)"', attrs)
+            if start_m:
+                num["start"] = int(start_m.group(1))
+
+            # paraHead 레벨들
+            levels = []
+            heads = re.finditer(
+                r'<hh:paraHead\b([^>]*)>([^<]*)</hh:paraHead>',
+                inner
+            )
+            for h in heads:
+                h_attrs = h.group(1)
+                h_pattern = h.group(2).strip()
+                level = {}
+
+                lv = re.search(r'\blevel="(\d+)"', h_attrs)
+                if lv:
+                    level["level"] = int(lv.group(1))
+
+                fmt = re.search(r'\bnumFormat="([^"]+)"', h_attrs)
+                if fmt:
+                    level["numFormat"] = fmt.group(1)
+
+                al = re.search(r'\balign="([^"]+)"', h_attrs)
+                if al:
+                    level["align"] = al.group(1)
+
+                if h_pattern:
+                    level["pattern"] = h_pattern
+
+                if level:
+                    levels.append(level)
+
+            if levels:
+                num["levels"] = levels
+            nums.append(num)
+
+        if nums:
+            result["numberings"] = nums
+
+    # ── 글머리표 ──
+    bullet_blocks = re.findall(
+        r'<hh:bullet\b([^>]*)>(.*?)</hh:bullet>',
+        header_xml, re.DOTALL
+    )
+    if bullet_blocks:
+        bullets = []
+        for attrs, inner in bullet_blocks:
+            bullet = {}
+            id_m = re.search(r'\bid="(\d+)"', attrs)
+            if id_m:
+                bullet["id"] = int(id_m.group(1))
+            char_m = re.search(r'\bchar="([^"]*)"', attrs)
+            if char_m:
+                bullet["char"] = char_m.group(1)
+            img_m = re.search(r'\buseImage="(\d+)"', attrs)
+            if img_m:
+                bullet["useImage"] = bool(int(img_m.group(1)))
+            bullets.append(bullet)
+
+        if bullets:
+            result["bullets"] = bullets
+
+    return result if result else None
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/page_setup.py
+++ b/handlers/tools/page_setup.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+"""
+§7 용지 설정 추출 (pagePr + margin)
+
+HWPX 실제 태그:
+  <hp:pagePr landscape="WIDELY" width="59528" height="84188" gutterType="LEFT_ONLY">
+  <hp:margin header="4251" footer="4251" gutter="0"
+             left="5669" right="5669" top="2834" bottom="2834"/>
+
+디폴트값 생성 안 함. 추출 실패 시 None 반환.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import hwpunit_to_mm, mm_format, detect_paper_size
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """§7 pagePr + margin에서 용지/여백 정보 추출.
+
+    Returns:
+        {
+            "paper": {"name": "A4", "width_mm": 210.0, "height_mm": 297.0,
+                       "landscape": True/False},
+            "margins": {"top": "10.0mm", "bottom": "10.0mm",
+                        "left": "20.0mm", "right": "20.0mm",
+                        "header": "15.0mm", "footer": "15.0mm",
+                        "gutter": "0.0mm"}
+        }
+        또는 추출 실패 시 None
+    """
+    section_xml = _get_section_xml(raw_xml, parsed)
+    if not section_xml:
+        return None
+
+    result = {}
+
+    # ── 용지 크기 ─────────────────────────────────
+    page_match = re.search(
+        r'<hp:pagePr\b[^>]*'
+        r'\bwidth="(\d+)"[^>]*'
+        r'\bheight="(\d+)"',
+        section_xml
+    )
+    if not page_match:
+        # 속성 순서가 다를 수 있음
+        page_match = re.search(
+            r'<hp:pagePr\b[^>]*'
+            r'\bheight="(\d+)"[^>]*'
+            r'\bwidth="(\d+)"',
+            section_xml
+        )
+        if page_match:
+            h_hu, w_hu = int(page_match.group(1)), int(page_match.group(2))
+        else:
+            return None
+    else:
+        w_hu, h_hu = int(page_match.group(1)), int(page_match.group(2))
+
+    landscape_match = re.search(
+        r'<hp:pagePr\b[^>]*\blandscape="([^"]+)"', section_xml
+    )
+    is_landscape = False
+    if landscape_match:
+        is_landscape = landscape_match.group(1) == "WIDELY"
+
+    paper_name = detect_paper_size(w_hu, h_hu)
+
+    result["paper"] = {
+        "name": paper_name,
+        "width_mm": round(hwpunit_to_mm(w_hu), 1),
+        "height_mm": round(hwpunit_to_mm(h_hu), 1),
+        "landscape": is_landscape,
+    }
+
+    # ── 여백 ──────────────────────────────────────
+    margin_match = re.search(r'<hp:margin\b([^/]*)/>', section_xml)
+    if not margin_match:
+        return result  # 용지 크기는 있으나 여백은 없을 수 있음
+
+    attrs_str = margin_match.group(1)
+    margins = {}
+    for key in ["top", "bottom", "left", "right", "header", "footer", "gutter"]:
+        m = re.search(rf'\b{key}="(\d+)"', attrs_str)
+        if m:
+            margins[key] = mm_format(int(m.group(1)))
+
+    if margins:
+        result["margins"] = margins
+
+    return result
+
+
+def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    """section XML 문자열을 가져온다."""
+    # parsed에서 직접 제공
+    if parsed and parsed.get("section_xml"):
+        return parsed["section_xml"]
+
+    # raw_xml dict에서 section 파일 찾기
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "section" in name.lower() and isinstance(content, str):
+                return content
+
+    # raw_xml이 문자열이면 그대로
+    if isinstance(raw_xml, str):
+        return raw_xml
+
+    return None
--- a/handlers/tools/para_style.py
+++ b/handlers/tools/para_style.py
@@ -0,0 +1,185 @@
+# -*- coding: utf-8 -*-
+"""
+§5 문단 모양(ParaShape) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:paraPr id="0" tabPrIDRef="1" condense="0" ...>
+    <hh:align horizontal="JUSTIFY" vertical="BASELINE"/>
+    <hh:heading type="NONE" idRef="0" level="0"/>
+    <hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD"
+                     widowOrphan="0" keepWithNext="0" keepLines="0"
+                     pageBreakBefore="0" lineWrap="BREAK"/>
+    <hp:case ...>
+      <hh:margin>
+        <hc:intent value="-1310" unit="HWPUNIT"/>
+        <hc:left value="0" unit="HWPUNIT"/>
+        <hc:right value="0" unit="HWPUNIT"/>
+        <hc:prev value="0" unit="HWPUNIT"/>
+        <hc:next value="0" unit="HWPUNIT"/>
+      </hh:margin>
+      <hh:lineSpacing type="PERCENT" value="130" unit="HWPUNIT"/>
+    </hp:case>
+    <hh:border borderFillIDRef="2" .../>
+  </hh:paraPr>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import hwpunit_to_mm
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> list | None:
+    """§5 paraPr 전체 목록 추출.
+
+    Returns:
+        [
+            {
+                "id": 0,
+                "align": "JUSTIFY",
+                "verticalAlign": "BASELINE",
+                "heading": {"type": "NONE", "idRef": 0, "level": 0},
+                "breakSetting": {
+                    "widowOrphan": False, "keepWithNext": False,
+                    "keepLines": False, "pageBreakBefore": False,
+                    "lineWrap": "BREAK",
+                    "breakLatinWord": "KEEP_WORD",
+                    "breakNonLatinWord": "KEEP_WORD"
+                },
+                "margin": {
+                    "indent_hu": -1310, "left_hu": 0, "right_hu": 0,
+                    "before_hu": 0, "after_hu": 0,
+                },
+                "lineSpacing": {"type": "PERCENT", "value": 130},
+                "borderFillIDRef": 2,
+                "tabPrIDRef": 1,
+            },
+            ...
+        ]
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    blocks = re.findall(
+        r'<hh:paraPr\b([^>]*)>(.*?)</hh:paraPr>',
+        header_xml, re.DOTALL
+    )
+
+    if not blocks:
+        return None
+
+    result = []
+    for attrs_str, inner in blocks:
+        item = {}
+
+        # id
+        id_m = re.search(r'\bid="(\d+)"', attrs_str)
+        if id_m:
+            item["id"] = int(id_m.group(1))
+
+        # tabPrIDRef
+        tab_m = re.search(r'\btabPrIDRef="(\d+)"', attrs_str)
+        if tab_m:
+            item["tabPrIDRef"] = int(tab_m.group(1))
+
+        # align
+        al = re.search(r'<hh:align\b[^>]*\bhorizontal="([^"]+)"', inner)
+        if al:
+            item["align"] = al.group(1)
+
+        val = re.search(r'<hh:align\b[^>]*\bvertical="([^"]+)"', inner)
+        if val:
+            item["verticalAlign"] = val.group(1)
+
+        # heading
+        hd = re.search(
+            r'<hh:heading\b[^>]*\btype="([^"]+)"[^>]*'
+            r'\bidRef="(\d+)"[^>]*\blevel="(\d+)"', inner
+        )
+        if hd:
+            item["heading"] = {
+                "type": hd.group(1),
+                "idRef": int(hd.group(2)),
+                "level": int(hd.group(3)),
+            }
+
+        # breakSetting
+        bs = re.search(r'<hh:breakSetting\b([^/]*)/?>', inner)
+        if bs:
+            bstr = bs.group(1)
+            item["breakSetting"] = {
+                "widowOrphan": _bool_attr(bstr, "widowOrphan"),
+                "keepWithNext": _bool_attr(bstr, "keepWithNext"),
+                "keepLines": _bool_attr(bstr, "keepLines"),
+                "pageBreakBefore": _bool_attr(bstr, "pageBreakBefore"),
+                "lineWrap": _str_attr(bstr, "lineWrap"),
+                "breakLatinWord": _str_attr(bstr, "breakLatinWord"),
+                "breakNonLatinWord": _str_attr(bstr, "breakNonLatinWord"),
+            }
+
+        # margin (hp:case 블록 내 첫 번째 사용 — HwpUnitChar case 우선)
+        case_block = re.search(
+            r'<hp:case\b[^>]*required-namespace="[^"]*HwpUnitChar[^"]*"[^>]*>'
+            r'(.*?)</hp:case>',
+            inner, re.DOTALL
+        )
+        margin_src = case_block.group(1) if case_block else inner
+
+        margin = {}
+        for tag, key in [
+            ("intent", "indent_hu"),
+            ("left", "left_hu"),
+            ("right", "right_hu"),
+            ("prev", "before_hu"),
+            ("next", "after_hu"),
+        ]:
+            m = re.search(
+                rf'<hc:{tag}\b[^>]*\bvalue="(-?\d+)"', margin_src
+            )
+            if m:
+                margin[key] = int(m.group(1))
+
+        if margin:
+            item["margin"] = margin
+
+        # lineSpacing
+        ls = re.search(
+            r'<hh:lineSpacing\b[^>]*\btype="([^"]+)"[^>]*\bvalue="(\d+)"',
+            margin_src
+        )
+        if ls:
+            item["lineSpacing"] = {
+                "type": ls.group(1),
+                "value": int(ls.group(2)),
+            }
+
+        # borderFillIDRef
+        bf = re.search(r'<hh:border\b[^>]*\bborderFillIDRef="(\d+)"', inner)
+        if bf:
+            item["borderFillIDRef"] = int(bf.group(1))
+
+        result.append(item)
+
+    return result if result else None
+
+
+def _bool_attr(s: str, name: str) -> bool | None:
+    m = re.search(rf'\b{name}="(\d+)"', s)
+    return bool(int(m.group(1))) if m else None
+
+
+def _str_attr(s: str, name: str) -> str | None:
+    m = re.search(rf'\b{name}="([^"]+)"', s)
+    return m.group(1) if m else None
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/section.py
+++ b/handlers/tools/section.py
@@ -0,0 +1,120 @@
+# -*- coding: utf-8 -*-
+"""
+§9 구역 정의(Section) 추출
+
+HWPX 실제 태그 (section0.xml):
+  <hp:secPr id="" textDirection="HORIZONTAL" spaceColumns="1134"
+            tabStop="8000" tabStopVal="4000" tabStopUnit="HWPUNIT"
+            outlineShapeIDRef="1" ...>
+    <hp:grid lineGrid="0" charGrid="0" .../>
+    <hp:startNum pageStartsOn="BOTH" page="0" .../>
+    <hp:visibility hideFirstHeader="0" hideFirstFooter="0" .../>
+    <hp:pagePr landscape="WIDELY" width="59528" height="84188" ...>
+    <hp:margin header="4251" footer="4251" left="5669" right="5669"
+               top="2834" bottom="2834"/>
+    <hp:pageNum pos="BOTTOM_CENTER" formatType="DIGIT" sideChar="-"/>
+  </hp:secPr>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
+    """§9 구역 속성 추출.
+
+    Returns:
+        {
+            "textDirection": "HORIZONTAL",
+            "hideFirstHeader": False,
+            "hideFirstFooter": False,
+            "pageNum": {"pos": "BOTTOM_CENTER", "formatType": "DIGIT",
+                        "sideChar": "-"},
+            "startNum": {"page": 0},
+            "colDef": None,
+        }
+    """
+    section_xml = _get_section_xml(raw_xml, parsed)
+    if not section_xml:
+        return None
+
+    sec_match = re.search(
+        r'<hp:secPr\b([^>]*)>(.*?)</hp:secPr>',
+        section_xml, re.DOTALL
+    )
+    if not sec_match:
+        return None
+
+    attrs_str = sec_match.group(1)
+    inner = sec_match.group(2)
+
+    result = {}
+
+    # textDirection
+    td = re.search(r'\btextDirection="([^"]+)"', attrs_str)
+    if td:
+        result["textDirection"] = td.group(1)
+
+    # visibility
+    vis = re.search(r'<hp:visibility\b([^/]*)/?>', inner)
+    if vis:
+        v = vis.group(1)
+        for attr in ["hideFirstHeader", "hideFirstFooter",
+                      "hideFirstMasterPage", "hideFirstPageNum",
+                      "hideFirstEmptyLine"]:
+            m = re.search(rf'\b{attr}="(\d+)"', v)
+            if m:
+                result[attr] = bool(int(m.group(1)))
+
+    # startNum
+    sn = re.search(r'<hp:startNum\b([^/]*)/?>', inner)
+    if sn:
+        sns = sn.group(1)
+        start = {}
+        pso = re.search(r'\bpageStartsOn="([^"]+)"', sns)
+        if pso:
+            start["pageStartsOn"] = pso.group(1)
+        pg = re.search(r'\bpage="(\d+)"', sns)
+        if pg:
+            start["page"] = int(pg.group(1))
+        if start:
+            result["startNum"] = start
+
+    # pageNum
+    pn = re.search(r'<hp:pageNum\b([^/]*)/?>', inner)
+    if pn:
+        pns = pn.group(1)
+        pagenum = {}
+        for attr in ["pos", "formatType", "sideChar"]:
+            m = re.search(rf'\b{attr}="([^"]*)"', pns)
+            if m:
+                pagenum[attr] = m.group(1)
+        if pagenum:
+            result["pageNum"] = pagenum
+
+    # colDef (단 설정)
+    cd = re.search(r'<hp:colDef\b([^>]*)>(.*?)</hp:colDef>', inner, re.DOTALL)
+    if cd:
+        cds = cd.group(1)
+        coldef = {}
+        cnt = re.search(r'\bcount="(\d+)"', cds)
+        if cnt:
+            coldef["count"] = int(cnt.group(1))
+        layout = re.search(r'\blayout="([^"]+)"', cds)
+        if layout:
+            coldef["layout"] = layout.group(1)
+        if coldef:
+            result["colDef"] = coldef
+
+    return result if result else None
+
+
+def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("section_xml"):
+        return parsed["section_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "section" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/style_def.py
+++ b/handlers/tools/style_def.py
@@ -0,0 +1,68 @@
+# -*- coding: utf-8 -*-
+"""
+스타일 정의(Style) 추출
+
+HWPX 실제 태그 (header.xml):
+  <hh:styles itemCnt="12">
+    <hh:style id="0" type="PARA" name="바탕글" engName="Normal"
+              paraPrIDRef="3" charPrIDRef="0" nextStyleIDRef="0"
+              langID="1042" lockForm="0"/>
+    <hh:style id="1" type="PARA" name="머리말" engName="Header"
+              paraPrIDRef="2" charPrIDRef="3" nextStyleIDRef="1" .../>
+  </hh:styles>
+
+charPrIDRef → charPr(글자모양), paraPrIDRef → paraPr(문단모양) 연결.
+디폴트값 생성 안 함.
+"""
+
+import re
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> list | None:
+    """스타일 정의 추출.
+
+    Returns:
+        [
+            {
+                "id": 0, "type": "PARA",
+                "name": "바탕글", "engName": "Normal",
+                "paraPrIDRef": 3, "charPrIDRef": 0,
+                "nextStyleIDRef": 0,
+            },
+            ...
+        ]
+    """
+    header_xml = _get_header_xml(raw_xml, parsed)
+    if not header_xml:
+        return None
+
+    styles = re.findall(r'<hh:style\b([^/]*)/>', header_xml)
+    if not styles:
+        return None
+
+    result = []
+    for s in styles:
+        item = {}
+        for attr in ["id", "paraPrIDRef", "charPrIDRef", "nextStyleIDRef"]:
+            m = re.search(rf'\b{attr}="(\d+)"', s)
+            if m:
+                item[attr] = int(m.group(1))
+
+        for attr in ["type", "name", "engName"]:
+            m = re.search(rf'\b{attr}="([^"]*)"', s)
+            if m:
+                item[attr] = m.group(1)
+
+        result.append(item)
+
+    return result if result else None
+
+
+def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("header_xml"):
+        return parsed["header_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "header" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None
--- a/handlers/tools/table.py
+++ b/handlers/tools/table.py
@@ -0,0 +1,328 @@
+# -*- coding: utf-8 -*-
+"""
+§6 표(Table) 구조 추출
+
+HWPX 실제 태그 (section0.xml):
+  <hp:tbl id="..." rowCnt="5" colCnt="3" cellSpacing="0"
+          repeatHeader="1" pageBreak="CELL" ...>
+    <hp:colSz><hp:widthList>8504 8504 8504</hp:widthList></hp:colSz>
+    또는 열 수에 맞는 hp:colSz 형태
+    <hp:tr>
+      <hp:tc name="" header="0" borderFillIDRef="5" ...>
+        <hp:cellAddr colAddr="0" rowAddr="0"/>
+        <hp:cellSpan colSpan="2" rowSpan="1"/>
+        <hp:cellSz width="17008" height="2400"/>
+        <hp:cellMargin left="510" right="510" top="142" bottom="142"/>
+        <hp:subList>
+          <hp:p ...><hp:run ...><hp:t>셀 텍스트</hp:t></hp:run></hp:p>
+        </hp:subList>
+      </hp:tc>
+    </hp:tr>
+  </hp:tbl>
+
+디폴트값 생성 안 함.
+"""
+
+import re
+
+from domain.hwpx.hwpx_utils import hwpunit_to_mm
+
+
+def extract(raw_xml: dict, parsed: dict = None) -> list | None:
+    """§6 모든 표 추출.
+
+    Returns:
+        [
+            {
+                "index": 0,
+                "rowCnt": 5, "colCnt": 3,
+                "repeatHeader": True,
+                "pageBreak": "CELL",
+                "colWidths_hu": [8504, 8504, 8504],
+                "colWidths_pct": [33, 34, 33],
+                "rows": [
+                    [  # row 0
+                        {
+                            "colAddr": 0, "rowAddr": 0,
+                            "colSpan": 2, "rowSpan": 1,
+                            "width_hu": 17008, "height_hu": 2400,
+                            "borderFillIDRef": 5,
+                            "cellMargin": {"left": 510, "right": 510,
+                                           "top": 142, "bottom": 142},
+                            "text": "셀 텍스트",
+                            "lines": ["셀 텍스트"],
+                        },
+                        ...
+                    ],
+                    ...
+                ],
+            },
+            ...
+        ]
+    """
+    section_xml = _get_section_xml(raw_xml, parsed)
+    if not section_xml:
+        return None
+
+    # tbl 블록 전체 추출
+    tbl_blocks = _find_tbl_blocks(section_xml)
+    if not tbl_blocks:
+        return None
+
+    result = []
+    for idx, (tbl_attrs, tbl_inner) in enumerate(tbl_blocks):
+        tbl = {"index": idx}
+
+        # 표 속성
+        for attr in ["rowCnt", "colCnt"]:
+            m = re.search(rf'\b{attr}="(\d+)"', tbl_attrs)
+            if m:
+                tbl[attr] = int(m.group(1))
+
+        rh = re.search(r'\brepeatHeader="(\d+)"', tbl_attrs)
+        if rh:
+            tbl["repeatHeader"] = bool(int(rh.group(1)))
+
+        pb = re.search(r'\bpageBreak="([^"]+)"', tbl_attrs)
+        if pb:
+            tbl["pageBreak"] = pb.group(1)
+
+        # 행/셀 (열 너비보다 먼저 — 첫 행에서 열 너비 추출 가능)
+        rows = _extract_rows(tbl_inner)
+        if rows:
+            tbl["rows"] = rows
+
+        # 열 너비
+        col_widths = _extract_col_widths(tbl_inner)
+        if not col_widths and rows:
+            # colSz 없으면 행 데이터에서 추출 (colspan 고려)
+            col_cnt = tbl.get("colCnt", 0)
+            col_widths = _col_widths_from_rows(rows, col_cnt)
+            if not col_widths:
+                col_widths = _col_widths_from_first_row(rows[0])
+        if col_widths:
+            tbl["colWidths_hu"] = col_widths
+            total = sum(col_widths) or 1
+            tbl["colWidths_pct"] = [round(w / total * 100) for w in col_widths]
+
+        result.append(tbl)
+
+    return result if result else None
+
+
+def _find_tbl_blocks(xml: str) -> list:
+    """중첩 표를 고려하여 최상위 tbl 블록 추출"""
+    blocks = []
+    start = 0
+    while True:
+        # <hp:tbl 시작 찾기
+        m = re.search(r'<hp:tbl\b([^>]*)>', xml[start:])
+        if not m:
+            break
+
+        attrs = m.group(1)
+        tag_start = start + m.start()
+        content_start = start + m.end()
+
+        # 중첩 카운트로 닫는 태그 찾기
+        depth = 1
+        pos = content_start
+        while depth > 0 and pos < len(xml):
+            open_m = re.search(r'<hp:tbl\b', xml[pos:])
+            close_m = re.search(r'</hp:tbl>', xml[pos:])
+
+            if close_m is None:
+                break
+
+            if open_m and open_m.start() < close_m.start():
+                depth += 1
+                pos += open_m.end()
+            else:
+                depth -= 1
+                if depth == 0:
+                    inner = xml[content_start:pos + close_m.start()]
+                    blocks.append((attrs, inner))
+                pos += close_m.end()
+
+        start = pos
+
+    return blocks
+
+
+def _extract_col_widths(tbl_inner: str) -> list | None:
+    """열 너비 HWPUNIT 추출"""
+    # 패턴 1: <hp:colSz><hp:widthList>8504 8504 8504</hp:widthList>
+    wl = re.search(r'<hp:widthList>([^<]+)</hp:widthList>', tbl_inner)
+    if wl:
+        try:
+            return [int(w) for w in wl.group(1).strip().split()]
+        except ValueError:
+            pass
+
+    # 패턴 2: 개별 colSz 태그
+    cols = re.findall(r'<hp:colSz\b[^>]*\bwidth="(\d+)"', tbl_inner)
+    if cols:
+        return [int(c) for c in cols]
+
+    return None
+
+
+def _extract_rows(tbl_inner: str) -> list:
+    """tr/tc 파싱하여 2D 셀 배열 반환"""
+    rows = []
+
+    tr_blocks = re.findall(
+        r'<hp:tr\b[^>]*>(.*?)</hp:tr>', tbl_inner, re.DOTALL
+    )
+
+    for tr_inner in tr_blocks:
+        cells = []
+        tc_blocks = re.finditer(
+            r'<hp:tc\b([^>]*)>(.*?)</hp:tc>', tr_inner, re.DOTALL
+        )
+
+        for tc_match in tc_blocks:
+            tc_attrs = tc_match.group(1)
+            tc_inner = tc_match.group(2)
+            cell = _parse_cell(tc_attrs, tc_inner)
+            cells.append(cell)
+
+        rows.append(cells)
+
+    return rows
+
+
+def _parse_cell(tc_attrs: str, tc_inner: str) -> dict:
+    """개별 셀 파싱"""
+    cell = {}
+
+    # borderFillIDRef on tc tag
+    bf = re.search(r'\bborderFillIDRef="(\d+)"', tc_attrs)
+    if bf:
+        cell["borderFillIDRef"] = int(bf.group(1))
+
+    # header flag
+    hd = re.search(r'\bheader="(\d+)"', tc_attrs)
+    if hd:
+        cell["isHeader"] = bool(int(hd.group(1)))
+
+    # cellAddr
+    addr = re.search(
+        r'<hp:cellAddr\b[^>]*\bcolAddr="(\d+)"[^>]*\browAddr="(\d+)"',
+        tc_inner
+    )
+    if addr:
+        cell["colAddr"] = int(addr.group(1))
+        cell["rowAddr"] = int(addr.group(2))
+
+    # cellSpan
+    span = re.search(r'<hp:cellSpan\b([^/]*)/?>', tc_inner)
+    if span:
+        cs = re.search(r'\bcolSpan="(\d+)"', span.group(1))
+        rs = re.search(r'\browSpan="(\d+)"', span.group(1))
+        if cs:
+            cell["colSpan"] = int(cs.group(1))
+        if rs:
+            cell["rowSpan"] = int(rs.group(1))
+
+    # cellSz
+    sz = re.search(r'<hp:cellSz\b([^/]*)/?>', tc_inner)
+    if sz:
+        w = re.search(r'\bwidth="(\d+)"', sz.group(1))
+        h = re.search(r'\bheight="(\d+)"', sz.group(1))
+        if w:
+            cell["width_hu"] = int(w.group(1))
+        if h:
+            cell["height_hu"] = int(h.group(1))
+
+    # cellMargin
+    cm = re.search(r'<hp:cellMargin\b([^/]*)/?>', tc_inner)
+    if cm:
+        margin = {}
+        for side in ["left", "right", "top", "bottom"]:
+            m = re.search(rf'\b{side}="(\d+)"', cm.group(1))
+            if m:
+                margin[side] = int(m.group(1))
+        if margin:
+            cell["cellMargin"] = margin
+
+    # 셀 텍스트
+    texts = re.findall(r'<hp:t>([^<]*)</hp:t>', tc_inner)
+    all_text = " ".join(t.strip() for t in texts if t.strip())
+    if all_text:
+        cell["text"] = all_text
+
+    # ★ v2: 셀 내 run의 charPrIDRef 추출 (스타일 연결용)
+    run_cprs = re.findall(r'<hp:run\b[^>]*\bcharPrIDRef="(\d+)"', tc_inner)
+    if run_cprs:
+        cell["charPrIDRefs"] = [int(c) for c in run_cprs]
+        cell["primaryCharPrIDRef"] = int(run_cprs[0])
+
+    # ★ v2: 셀 내 p의 paraPrIDRef, styleIDRef 추출
+    para_pprs = re.findall(r'<hp:p\b[^>]*\bparaPrIDRef="(\d+)"', tc_inner)
+    if para_pprs:
+        cell["paraPrIDRefs"] = [int(p) for p in para_pprs]
+        cell["primaryParaPrIDRef"] = int(para_pprs[0])
+
+    para_stys = re.findall(r'<hp:p\b[^>]*\bstyleIDRef="(\d+)"', tc_inner)
+    if para_stys:
+        cell["styleIDRefs"] = [int(s) for s in para_stys]
+
+    # 다중행 (p 태그 기준)
+    paras = re.findall(r'<hp:p\b[^>]*>(.*?)</hp:p>', tc_inner, re.DOTALL)
+    lines = []
+    for p in paras:
+        p_texts = re.findall(r'<hp:t>([^<]*)</hp:t>', p)
+        line = " ".join(t.strip() for t in p_texts if t.strip())
+        if line:
+            lines.append(line)
+    if lines:
+        cell["lines"] = lines
+
+    return cell
+
+
+def _col_widths_from_first_row(first_row: list) -> list | None:
+    """첫 행 셀의 width_hu에서 열 너비 추출 (colSz 없을 때 대체)"""
+    widths = []
+    for cell in first_row:
+        w = cell.get("width_hu")
+        if w:
+            widths.append(w)
+    return widths if widths else None
+
+
+def _col_widths_from_rows(rows: list, col_cnt: int) -> list | None:
+    """★ v2: 모든 행을 순회하여 colspan=1인 행에서 정확한 열 너비 추출.
+
+    첫 행에 colspan이 있으면 열 너비가 부정확하므로,
+    모든 열이 colspan=1인 행을 찾아 사용.
+    """
+    if not rows or not col_cnt:
+        return None
+
+    # colspan=1인 셀만 있는 행 찾기 (모든 열 존재)
+    for row in rows:
+        # 이 행의 모든 셀이 colspan=1이고, 셀 수 == col_cnt인지
+        all_single = all(cell.get("colSpan", 1) == 1 for cell in row)
+        if all_single and len(row) == col_cnt:
+            widths = []
+            for cell in sorted(row, key=lambda c: c.get("colAddr", 0)):
+                w = cell.get("width_hu")
+                if w:
+                    widths.append(w)
+            if len(widths) == col_cnt:
+                return widths
+
+    # 못 찾으면 첫 행 폴백
+    return _col_widths_from_first_row(rows[0]) if rows else None
+
+
+def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None:
+    if parsed and parsed.get("section_xml"):
+        return parsed["section_xml"]
+    if isinstance(raw_xml, dict):
+        for name, content in raw_xml.items():
+            if "section" in name.lower() and isinstance(content, str):
+                return content
+    return raw_xml if isinstance(raw_xml, str) else None