📦 Initialize Geulbeot structure and merge Prompts & test projects

2026-03-05 11:32:29 +09:00
commit 555a954458
687 changed files with 205247 additions and 0 deletions
--- a/Code/geulbeot_10th/handlers/doc/init.py
+++ b/Code/geulbeot_10th/handlers/doc/init.py
@@ -0,0 +1,2 @@
+# -*- coding: utf-8 -*-
+"""doc 패키지 - 문서 유형 분석/정의/생성"""
--- a/Code/geulbeot_10th/handlers/doc/content_analyzer.py
+++ b/Code/geulbeot_10th/handlers/doc/content_analyzer.py
@@ -0,0 +1,640 @@
+# -*- coding: utf-8 -*-
+"""
+Content Analyzer (Phase 3 — Layer A)
+- template_info + semantic_map → content_prompt.json
+- 각 placeholder의 의미/유형/예시값/작성 패턴 추출
+- Phase 5에서 AI가 새 문서 생성 시 "레시피"로 참조
+
+★ 원칙: 모든 분류는 코드 100% (AI 없음)
+   purpose_hint / audience_hint / tone_hint는 빈 문자열로 남김
+   → 추후 AI enrichment 단계에서 채울 수 있도록 설계
+"""
+
+import re
+
+
+def generate(template_info: dict, semantic_map: dict,
+             parsed: dict = None) -> dict:
+    """
+    content_prompt.json 생성
+
+    Args:
+        template_info: doc_template_analyzer 추출 결과
+        semantic_map: semantic_mapper 분류 결과
+        parsed: HWPX 파싱 원본 (선택)
+
+    Returns:
+        content_prompt.json 구조
+    """
+    placeholders = {}
+    table_guide = {}
+
+    # ① 문서 기본 정보
+    document = _analyze_document(template_info)
+
+    # ② 헤더 placeholders
+    _analyze_header(template_info, placeholders)
+
+    # ③ 푸터 placeholders
+    _analyze_footer(template_info, placeholders)
+
+    # ④ 제목 placeholder
+    _analyze_title(template_info, semantic_map, placeholders)
+
+    # ⑤ 섹션 placeholders
+    _analyze_sections(semantic_map, placeholders, template_info)
+
+    # ⑤-b content_order 기반 문단/이미지 placeholders
+    _analyze_content_order(template_info, semantic_map, placeholders)
+
+    # ⑥ 표 가이드 + placeholders
+    _analyze_tables(template_info, semantic_map,
+                    placeholders, table_guide)
+
+    # ⑦ 작성 패턴
+    writing_guide = _analyze_writing_patterns(template_info, semantic_map)
+
+    return {
+        "version": "1.0",
+        "document": document,
+        "placeholders": placeholders,
+        "table_guide": table_guide,
+        "writing_guide": writing_guide
+    }
+
+
+# ================================================================
+#  문서 기본 정보
+# ================================================================
+
+def _analyze_document(template_info: dict) -> dict:
+    """문서 레벨 정보 추출"""
+    page = template_info.get("page", {})
+    paper = page.get("paper", {})
+
+    return {
+        "paper": paper.get("name", "A4"),
+        "layout": "landscape" if paper.get("landscape") else "portrait",
+        "margins": page.get("margins", {}),
+        "purpose_hint": "",    # AI enrichment 예약
+        "audience_hint": "",   # AI enrichment 예약
+        "tone_hint": ""        # AI enrichment 예약
+    }
+
+
+# ================================================================
+#  텍스트 유형 분류 (코드 100%, AI 없음)
+# ================================================================
+
+def _classify_text(text: str) -> dict:
+    """텍스트 패턴으로 콘텐츠 유형 분류"""
+    text = text.strip()
+    if not text:
+        return {"type": "empty", "pattern": "빈 셀"}
+
+    # 날짜: "2025. 1. 30(금)", "2025-01-30", "2025.01.30"
+    if re.match(r'\d{4}[\.\-/]\s*\d{1,2}[\.\-/]\s*\d{1,2}', text):
+        return {"type": "date", "pattern": "날짜 (YYYY. M. D)"}
+
+    # ★ 직급+이름 (부서보다 먼저!)
+    positions = [
+        '사원', '대리', '과장', '차장', '부장', '이사', '상무', '전무',
+        '연구원', '선임연구원', '책임연구원', '수석연구원',
+        '주임', '계장', '팀장', '실장', '부서장', '센터장'
+    ]
+    for pos in positions:
+        if pos in text:
+            return {"type": "author", "pattern": f"이름 + 직급({pos})"}
+
+    # 부서 (직급 아닌 것만 여기로)
+    if re.search(r'(실|부|국|과|원|처|센터|본부)$', text) and len(text) <= 12:
+        return {"type": "department", "pattern": "조직명"}
+
+    # 팀
+    if re.search(r'팀$', text) and len(text) <= 10:
+        return {"type": "team", "pattern": "팀명"}
+
+    # 페이지 참조: "1p", "2p"
+    if re.match(r'\d+p$', text):
+        return {"type": "page_ref", "pattern": "페이지 참조"}
+
+    # 문서 제목: ~계획(안), ~보고서, ~제안서 등
+    if re.search(r'(계획|보고서|제안서|기획서|결과|방안|현황|분석)'
+                 r'\s*(\(안\))?\s*$', text):
+        return {"type": "doc_title", "pattern": "문서 제목"}
+
+    # 슬로건/비전 (길고 추상적 키워드 포함)
+    if len(text) > 10 and any(k in text for k in
+                              ['함께', '세상', '미래', '가치', '만들어']):
+        return {"type": "slogan", "pattern": "회사 슬로건/비전"}
+
+    # 기본
+    return {"type": "text", "pattern": "자유 텍스트"}
+
+
+# ================================================================
+#  헤더 분석
+# ================================================================
+
+def _analyze_header(template_info: dict, placeholders: dict):
+    """헤더 영역 placeholder 분석"""
+    header = template_info.get("header", {})
+    if not header or not header.get("exists"):
+        return
+
+    if header.get("type") == "table" and header.get("table"):
+        _analyze_table_area(header["table"], "HEADER", "header",
+                            placeholders)
+    else:
+        texts = header.get("texts", [])
+        for i in range(max(len(texts), 1)):
+            ph = f"HEADER_TEXT_{i+1}"
+            example = texts[i] if i < len(texts) else ""
+            info = _classify_text(example)
+            info["example"] = example.strip()
+            info["location"] = "header"
+            placeholders[ph] = info
+
+
+# ================================================================
+#  푸터 분석
+# ================================================================
+
+def _analyze_footer(template_info: dict, placeholders: dict):
+    """푸터 영역 placeholder 분석"""
+    footer = template_info.get("footer", {})
+    if not footer or not footer.get("exists"):
+        return
+
+    if footer.get("type") == "table" and footer.get("table"):
+        _analyze_table_area(footer["table"], "FOOTER", "footer",
+                            placeholders)
+    else:
+        placeholders["PAGE_NUMBER"] = {
+            "type": "page_number",
+            "pattern": "페이지 번호",
+            "example": "1",
+            "location": "footer"
+        }
+
+
+# ================================================================
+#  헤더/푸터 공통: 표 형태 영역 분석
+# ================================================================
+
+def _analyze_table_area(tbl: dict, prefix: str, location: str,
+                        placeholders: dict):
+    """표 형태의 헤더/푸터 → placeholder 매핑
+
+    Args:
+        tbl: header["table"] 또는 footer["table"]
+        prefix: "HEADER" 또는 "FOOTER"
+        location: "header" 또는 "footer"
+        placeholders: 결과 dict (in-place 수정)
+    """
+    rows = tbl.get("rows", [])
+
+    for r_idx, row in enumerate(rows):
+        for c_idx, cell in enumerate(row):
+            lines = cell.get("lines", [])
+
+            if len(lines) > 1:
+                for l_idx, line_text in enumerate(lines):
+                    ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}_LINE_{l_idx+1}"
+                    info = _classify_text(line_text)
+                    info["example"] = line_text.strip()
+                    info["location"] = location
+                    placeholders[ph] = info
+            elif lines:
+                ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}"
+                info = _classify_text(lines[0])
+                info["example"] = lines[0].strip()
+                info["location"] = location
+                placeholders[ph] = info
+            else:
+                ph = f"{prefix}_R{r_idx+1}_C{c_idx+1}"
+                placeholders[ph] = {
+                    "type": "empty",
+                    "pattern": "빈 셀 (로고/여백)",
+                    "example": "",
+                    "location": location
+                }
+
+
+# ================================================================
+#  제목 분석
+# ================================================================
+
+def _analyze_title(template_info: dict, semantic_map: dict,
+                   placeholders: dict):
+    """제목 블록 placeholder 분석
+
+    ★ v1.1: template_manager._build_title_block_html()과 동일한
+       TITLE_R{r}_C{c} 명명 규칙 사용 (범용 매핑)
+    """
+    title_idx = semantic_map.get("title_table")
+    if title_idx is None:
+        return
+
+    tables = template_info.get("tables", [])
+    title_tbl = next((t for t in tables if t["index"] == title_idx), None)
+    if not title_tbl:
+        return
+
+    # 각 셀별로 placeholder 생성 (template과 동일한 이름)
+    for r_idx, row in enumerate(title_tbl.get("rows", [])):
+        for c_idx, cell in enumerate(row):
+            cell_text = cell.get("text", "").strip()
+            if not cell_text:
+                continue  # 빈 셀은 template에서도 placeholder 없음
+
+            ph_name = f"TITLE_R{r_idx+1}_C{c_idx+1}"
+            info = _classify_text(cell_text)
+            if "title" not in info["type"] and "doc_title" not in info["type"]:
+                # 제목표 안의 텍스트가 doc_title이 아닐 수도 있음 (부제 등)
+                # 가장 긴 텍스트만 doc_title로 분류
+                pass
+            info["example"] = cell_text
+            info["location"] = "title_block"
+            placeholders[ph_name] = info
+
+    # 가장 긴 텍스트를 가진 셀을 doc_title로 마킹
+    longest_ph = None
+    longest_len = 0
+    for ph_key in list(placeholders.keys()):
+        if ph_key.startswith("TITLE_R"):
+            ex = placeholders[ph_key].get("example", "")
+            if len(ex) > longest_len:
+                longest_len = len(ex)
+                longest_ph = ph_key
+    if longest_ph:
+        placeholders[longest_ph]["type"] = "doc_title"
+        placeholders[longest_ph]["pattern"] = "문서 제목"
+
+
+# ================================================================
+#  섹션 분석
+# ================================================================
+
+def _analyze_sections(semantic_map: dict, placeholders: dict,
+                      template_info: dict = None):
+    """섹션 placeholder 분석.
+
+    content_order에 문단이 있으면 SECTION_n_CONTENT는 생략
+    (개별 PARA_n이 본문 역할을 대신함).
+    """
+    sections = semantic_map.get("sections", [])
+
+    # content_order에 문단이 있으면 개별 PARA_n이 본문 담당 → CONTENT 불필요
+    has_co_paragraphs = False
+    if template_info:
+        co = template_info.get("content_order", [])
+        has_co_paragraphs = any(c['type'] == 'paragraph' for c in co) if co else False
+
+    if not sections:
+        placeholders["SECTION_1_TITLE"] = {
+            "type": "section_title", "pattern": "섹션 제목",
+            "example": "", "location": "body"
+        }
+        if not has_co_paragraphs:
+            placeholders["SECTION_1_CONTENT"] = {
+                "type": "section_content", "pattern": "섹션 본문",
+                "example": "", "location": "body"
+            }
+        return
+
+    for i, sec in enumerate(sections):
+        s_num = i + 1
+        title_text = sec if isinstance(sec, str) else sec.get("title", "")
+
+        placeholders[f"SECTION_{s_num}_TITLE"] = {
+            "type": "section_title", "pattern": "섹션 제목",
+            "example": title_text, "location": "body"
+        }
+        if not has_co_paragraphs:
+            placeholders[f"SECTION_{s_num}_CONTENT"] = {
+                "type": "section_content", "pattern": "섹션 본문",
+                "example": "", "location": "body"
+            }
+
+# ================================================================
+#  content_order 기반 문단/이미지 분석 (v5.2+)
+# ================================================================
+
+def _analyze_content_order(template_info: dict, semantic_map: dict,
+                           placeholders: dict):
+    """content_order의 paragraph/image → PARA_n, IMAGE_n placeholder 생성.
+
+    content_order가 없거나 문단이 없으면 아무것도 안 함 (legacy 호환).
+    """
+    content_order = template_info.get("content_order")
+    if not content_order:
+        return
+    if not any(c['type'] == 'paragraph' for c in content_order):
+        return
+
+    # 섹션 제목 패턴 (template_manager와 동일)
+    sec_patterns = [
+        re.compile(r'^\d+\.\s+\S'),
+        re.compile(r'^[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]\.\s*\S'),
+        re.compile(r'^제\s*\d+\s*[장절항]\s*\S'),
+    ]
+
+    para_num = 0
+    img_num = 0
+    section_num = 0
+
+    for item in content_order:
+        itype = item['type']
+
+        if itype == 'empty':
+            continue
+
+        # ── 표: _analyze_tables에서 처리 → 건너뛰기 ──
+        if itype == 'table':
+            continue
+
+        # ── 이미지 ──
+        if itype == 'image':
+            img_num += 1
+            placeholders[f"IMAGE_{img_num}"] = {
+                "type": "image",
+                "pattern": "이미지",
+                "example_ref": item.get("binaryItemIDRef", ""),
+                "location": "body"
+            }
+            caption = item.get("text", "")
+            if caption:
+                placeholders[f"IMAGE_{img_num}_CAPTION"] = {
+                    "type": "image_caption",
+                    "pattern": "이미지 캡션",
+                    "example": caption,
+                    "location": "body"
+                }
+            continue
+
+        # ── 문단 ──
+        if itype == 'paragraph':
+            text = item.get('text', '')
+
+            # 섹션 제목 → SECTION_n_TITLE (이미 _analyze_sections에서 등록됐을 수 있음)
+            if any(p.match(text) for p in sec_patterns):
+                section_num += 1
+                ph = f"SECTION_{section_num}_TITLE"
+                if ph not in placeholders:
+                    placeholders[ph] = {
+                        "type": "section_title",
+                        "pattern": "섹션 제목",
+                        "example": text,
+                        "location": "body"
+                    }
+                continue
+
+            # 일반 문단
+            para_num += 1
+            runs = item.get('runs', [])
+
+            if len(runs) > 1:
+                # 다중 run → 각 run별 placeholder
+                for r_idx, run in enumerate(runs):
+                    ph = f"PARA_{para_num}_RUN_{r_idx+1}"
+                    run_text = run.get("text", "")
+                    info = _classify_text(run_text)
+                    info["example"] = run_text[:100] if len(run_text) > 100 else run_text
+                    info["location"] = "body"
+                    info["run_index"] = r_idx + 1
+                    placeholders[ph] = info
+            else:
+                ph = f"PARA_{para_num}"
+                info = _classify_text(text)
+                info["example"] = text[:100] if len(text) > 100 else text
+                info["location"] = "body"
+                placeholders[ph] = info
+
+
+# ================================================================
+#  표 분석 → placeholder + 표 가이드
+# ================================================================
+
+def _analyze_tables(template_info: dict, semantic_map: dict,
+                    placeholders: dict, table_guide: dict):
+    """본문 데이터 표 → placeholder + table_guide"""
+    tables = template_info.get("tables", [])
+    body_indices = semantic_map.get("body_tables", [])
+    table_roles = semantic_map.get("table_roles", {})
+
+    for tbl_num_0, tbl_idx in enumerate(body_indices):
+        tbl_num = tbl_num_0 + 1
+        tbl = next((t for t in tables if t["index"] == tbl_idx), None)
+        if not tbl:
+            continue
+
+        role_info = table_roles.get(tbl_idx, table_roles.get(str(tbl_idx), {}))
+        col_headers = role_info.get("col_headers", [])
+        col_cnt = len(col_headers) if col_headers else tbl.get("colCnt", 0)
+
+        # ── 헤더 placeholder ──
+        for c_idx, h_text in enumerate(col_headers):
+            ph = f"TABLE_{tbl_num}_H_C{c_idx+1}"
+            placeholders[ph] = {
+                "type": "table_header", "pattern": "표 열 제목",
+                "example": h_text, "location": f"table_{tbl_num}"
+            }
+
+        # ── BODY placeholder ──
+        placeholders[f"TABLE_{tbl_num}_BODY"] = {
+            "type": "table_body",
+            "pattern": "표 데이터 행들 (HTML <tr> 반복)",
+            "example": "",
+            "location": f"table_{tbl_num}"
+        }
+
+        # ── 표 가이드 ──
+        table_guide[str(tbl_num)] = {
+            "col_headers": col_headers,
+            "col_count": col_cnt,
+            "row_count": tbl.get("rowCnt", 0),
+            "merge_pattern": _detect_merge_pattern(tbl),
+            "bullet_chars": _detect_bullet_chars(tbl),
+            "example_rows": _extract_example_rows(tbl, role_info),
+            "col_types": _classify_columns(col_headers),
+            "row_bf_pattern": _extract_row_bf_pattern(tbl, role_info),           
+        }
+
+
+def _detect_merge_pattern(tbl: dict) -> dict:
+    """셀 병합 패턴 감지"""
+    pattern = {}
+    for row in tbl.get("rows", []):
+        for cell in row:
+            col = cell.get("colAddr", 0)
+            if cell.get("rowSpan", 1) > 1:
+                pattern.setdefault(f"col_{col}", "row_group")
+            if cell.get("colSpan", 1) > 1:
+                pattern.setdefault(f"col_{col}", "col_span")
+    return pattern
+
+
+def _detect_bullet_chars(tbl: dict) -> list:
+    """표 셀 텍스트에서 불릿 문자 감지"""
+    bullets = set()
+    pats = [
+        (r'^-\s',  '- '),  (r'^·\s',  '· '),  (r'^•\s',  '• '),
+        (r'^▸\s',  '▸ '),  (r'^▶\s',  '▶ '),  (r'^※\s',  '※ '),
+        (r'^◈\s',  '◈ '),  (r'^○\s',  '○ '),  (r'^●\s',  '● '),
+    ]
+    for row in tbl.get("rows", []):
+        for cell in row:
+            for line in cell.get("lines", []):
+                for pat, char in pats:
+                    if re.match(pat, line.strip()):
+                        bullets.add(char)
+    return sorted(bullets)
+
+
+def _extract_example_rows(tbl: dict, role_info: dict) -> list:
+    """데이터 행에서 예시 최대 3행 추출"""
+    rows = tbl.get("rows", [])
+    header_row = role_info.get("header_row")
+    if header_row is None:
+        header_row = -1
+
+    examples = []
+    for r_idx, row in enumerate(rows):
+        if r_idx <= header_row:
+            continue
+        row_data = []
+        for cell in row:
+            text = cell.get("text", "").strip()
+            if len(text) > 80:
+                text = text[:77] + "..."
+            row_data.append(text)
+        examples.append(row_data)
+        if len(examples) >= 3:
+            break
+    return examples
+
+
+def _classify_columns(col_headers: list) -> list:
+    """열 헤더 키워드로 용도 추론"""
+    type_map = {
+        "category": ['구분', '분류', '항목', '카테고리'],
+        "content":  ['내용', '설명', '상세', '세부내용'],
+        "note":     ['비고', '참고', '기타', '메모'],
+        "date":     ['날짜', '일자', '일시', '기간'],
+        "person":   ['담당', '담당자', '작성자', '책임'],
+        "number":   ['수량', '금액', '단가', '합계'],
+    }
+    result = []
+    for c_idx, header in enumerate(col_headers):
+        h = header.strip()
+        col_type = "text"
+        for t, keywords in type_map.items():
+            if h in keywords:
+                col_type = t
+                break
+        result.append({"col": c_idx, "type": col_type, "header": h})
+    return result
+
+def _extract_row_bf_pattern(tbl: dict, role_info: dict) -> list:
+    """첫 데이터행의 셀별 borderFillIDRef → 열별 bf class 패턴.
+
+    AI가 TABLE_BODY <td> 생성 시 class="bf-{id}" 적용하도록 안내.
+    예: [{"col": 0, "bf_class": "bf-12"}, {"col": 1, "bf_class": "bf-8"}, ...]
+    """
+    rows = tbl.get("rows", [])
+    header_row = role_info.get("header_row")
+    if header_row is None:
+        header_row = -1
+
+    # 첫 데이터행 찾기
+    for r_idx, row in enumerate(rows):
+        if r_idx <= header_row:
+            continue
+        pattern = []
+        for cell in row:
+            bf_id = cell.get("borderFillIDRef")
+            pattern.append({
+                "col": cell.get("colAddr", len(pattern)),
+                "bf_class": f"bf-{bf_id}" if bf_id else "",
+                "colSpan": cell.get("colSpan", 1),
+                "rowSpan": cell.get("rowSpan", 1),
+            })
+        return pattern
+
+    return []
+# ================================================================
+#  작성 패턴 분석
+# ================================================================
+
+def _analyze_writing_patterns(template_info: dict,
+                              semantic_map: dict) -> dict:
+    """문서 전체의 작성 패턴 분석"""
+    result = {
+        "bullet_styles": [],
+        "numbering_patterns": [],
+        "avg_line_length": 0,
+        "font_primary": "",
+        "font_size_body": ""
+    }
+
+    # ── 불릿 수집 (모든 표 텍스트) ──
+    all_bullets = set()
+    tables = template_info.get("tables", [])
+    for tbl in tables:
+        for row in tbl.get("rows", []):
+            for cell in row:
+                for line in cell.get("lines", []):
+                    if re.match(r'^[-·•▸▶※◈○●]\s', line.strip()):
+                        all_bullets.add(line.strip()[0] + " ")
+
+    # ── numbering tools 데이터 ──
+    numbering = template_info.get("numbering", {})
+    for num in numbering.get("numberings", []):
+        levels = num.get("levels", [])
+        patterns = [lv.get("pattern", "") for lv in levels[:3]]
+        if patterns:
+            result["numbering_patterns"].append(patterns)
+
+    for b in numbering.get("bullets", []):
+        char = b.get("char", "")
+        if char:
+            all_bullets.add(char + " ")
+
+    result["bullet_styles"] = sorted(all_bullets)
+
+# ── 평균 라인 길이 ──
+    lengths = []
+    for tbl in tables:
+        for row in tbl.get("rows", []):
+            for cell in row:
+                for line in cell.get("lines", []):
+                    if line.strip():
+                        lengths.append(len(line.strip()))
+
+    # content_order 문단 텍스트도 포함
+    content_order = template_info.get("content_order", [])
+    for item in content_order:
+        if item['type'] == 'paragraph':
+            text = item.get('text', '').strip()
+            if text:
+                lengths.append(len(text))
+                # 불릿 감지도 추가
+                if re.match(r'^[-·•▸▶※◈○●]\s', text):
+                    all_bullets.add(text[0] + " ")
+
+    if lengths:
+        result["avg_line_length"] = round(sum(lengths) / len(lengths))
+
+    # ── 주요 폰트 ──
+    fonts = template_info.get("fonts", {})
+    hangul = fonts.get("HANGUL", [])
+    if hangul and isinstance(hangul, list) and len(hangul) > 0:
+        result["font_primary"] = hangul[0].get("face", "")
+
+    # ── 본문 글자 크기 (char_styles id=0 기본) ──
+    char_styles = template_info.get("char_styles", [])
+    if char_styles:
+        result["font_size_body"] = f"{char_styles[0].get('height_pt', 10)}pt"
+
+    return result
--- a/Code/geulbeot_10th/handlers/doc/custom_doc_type.py
+++ b/Code/geulbeot_10th/handlers/doc/custom_doc_type.py
@@ -0,0 +1,555 @@
+# -*- coding: utf-8 -*-
+"""
+사용자 정의 문서 유형 프로세서 (v2.1 - 템플릿 기반)
+- template.html 로드
+- config.json의 구조/가이드 활용
+- 사용자 입력 내용을 템플릿에 정리하여 채움
+- 창작 X, 정리/재구성 O
+
+★ v2.1 변경사항:
+- 한글 포함 placeholder 지원 (TABLE_1_H_구분 등)
+- TABLE_*_BODY / TABLE_*_H_* placeholder 구분 처리
+- 개조식 항목 <ul class="bullet-list"> 래핑
+- 페이지 분량 제한 프롬프트 강화
+- 헤더/푸터 다중행 placeholder 설명 추가
+"""
+
+import json
+import re
+import sys, os
+sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+from handlers.template.template_manager import TemplateManager
+from pathlib import Path
+from handlers.common import call_claude, extract_html       
+
+
+# ★ 한글 포함 placeholder 정규식 (영문 + 숫자 + 언더스코어 + 한글)
+PH_PATTERN = re.compile(r'\{\{([A-Za-z0-9_\uAC00-\uD7AF]+)\}\}')
+
+
+class CustomDocTypeProcessor:
+    """사용자 정의 문서 유형 처리기 (템플릿 기반)"""
+    
+    def __init__(self):
+        self.doc_types_user = Path('templates/user/doc_types')
+        self.template_manager = TemplateManager()
+    
+    def load_config(self, doc_type_id: str) -> dict:
+        """config.json 로드"""
+        config_path = self.doc_types_user / doc_type_id / 'config.json'
+        if not config_path.exists():
+            raise FileNotFoundError(f"문서 유형을 찾을 수 없습니다: {doc_type_id}")
+        
+        with open(config_path, 'r', encoding='utf-8') as f:
+            return json.load(f)
+
+    def load_content_prompt(self, doc_type_id: str, template_id: str = None) -> dict:
+        """content_prompt.json 로드 (doc_type 우선 → template fallback)"""
+        # ① doc_type 폴더
+        path = self.doc_types_user / doc_type_id / 'content_prompt.json'
+        if path.exists():
+            with open(path, 'r', encoding='utf-8') as f:
+                return json.load(f)
+        
+        # ② template 폴더 fallback
+        if template_id:
+            tpl_path = Path('templates/user/templates') / template_id / 'content_prompt.json'
+            if tpl_path.exists():
+                with open(tpl_path, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+        
+        return {}
+
+    def load_template(self, doc_type_id: str) -> str:
+        """template.html 로드 — template_manager 경유 (분리 구조)"""
+        # ① config에서 template_id 확인
+        config = self.load_config(doc_type_id)
+        tpl_id = config.get("template_id")
+        
+        if tpl_id:
+            # ★ 새 구조: template_manager에서 로드
+            tpl_data = self.template_manager.load_template(tpl_id)
+            if "html" in tpl_data:
+                return tpl_data["html"]
+        
+        # ★ 하위 호환: 레거시 방식 (같은 폴더의 template.html)
+        template_path = self.doc_types_user / doc_type_id / 'template.html'
+        if template_path.exists():
+            with open(template_path, 'r', encoding='utf-8') as f:
+                return f.read()
+        
+        return None
+    
+    def generate(self, content: str, doc_type_id: str, options: dict = None,
+                 image_data: dict = None) -> dict:
+        """문서 생성 - 템플릿 + 사용자 입력
+
+        Args:
+            content: 사용자 입력 텍스트
+            doc_type_id: 문서 유형 ID
+            options: 추가 옵션 (instruction 등)
+            image_data: 이미지 dict {binaryItemIDRef: {"base64": ..., "mime": ...}}
+                        None이면 템플릿 폴더에서 자동 로드 시도
+        """
+        try:
+            config = self.load_config(doc_type_id)
+            template = self.load_template(doc_type_id)
+            
+            if template:
+                # 이미지 데이터 준비
+                if image_data is None:
+                    image_data = self._load_image_data(config)
+                result = self._generate_with_template(
+                    content, config, template, options, image_data
+                )
+            else:
+                result = self._generate_with_guide(content, config, options)
+            
+            return result
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
+    
+    def _generate_with_template(self, content: str, config: dict, 
+                                template: str, options: dict,
+                                image_data: dict = None) -> dict:
+        """템플릿 기반 생성 — content_prompt.json 활용"""
+        
+        context = config.get('context', {})
+        structure = config.get('structure', {})
+        instruction = options.get('instruction', '') if options else ''
+        
+        # ★ content_prompt 로드
+        doc_type_id = config.get('id', '')
+        template_id = config.get('template_id', '')
+        cp = self.load_content_prompt(doc_type_id, template_id)
+        
+        placeholders_info = cp.get('placeholders', {})
+        table_guide = cp.get('table_guide', {})
+        writing_guide = cp.get('writing_guide', {})
+        doc_info = cp.get('document', {})
+        
+        # ★ placeholder 가이드 생성 (type/pattern/example 포함)
+        ph_guide_lines = []
+        for ph_key, ph_info in placeholders_info.items():
+            ph_type = ph_info.get('type', 'text')
+            pattern = ph_info.get('pattern', '')
+            example = ph_info.get('example', '')
+            location = ph_info.get('location', '')
+            
+            line = f"  {ph_key}:"
+            line += f"\n    type: {ph_type}"
+            line += f"\n    pattern: {pattern}"
+            if example:
+                line += f"\n    example: \"{example}\""
+            line += f"\n    location: {location}"
+            ph_guide_lines.append(line)
+        
+        ph_guide = "\n".join(ph_guide_lines) if ph_guide_lines else "(no guide available)"
+        
+        # ★ 표 가이드 생성
+        tbl_guide_lines = []
+        for tbl_num, tbl_info in table_guide.items():
+            headers = tbl_info.get('col_headers', [])
+            col_types = tbl_info.get('col_types', [])
+            merge = tbl_info.get('merge_pattern', {})
+            bullets = tbl_info.get('bullet_chars', [])
+            examples = tbl_info.get('example_rows', [])
+            
+            tbl_guide_lines.append(f"\n### Table {tbl_num}:")
+            tbl_guide_lines.append(f"  Columns: {json.dumps(headers, ensure_ascii=False)}")
+            if col_types:
+                for ct in col_types:
+                    tbl_guide_lines.append(
+                        f"  Col {ct['col']} '{ct['header']}': {ct['type']}")
+            if merge:
+                tbl_guide_lines.append(f"  Merge: {json.dumps(merge, ensure_ascii=False)}")
+                tbl_guide_lines.append(
+                    f"  → row_group means: use rowspan to group rows by that column")
+            if bullets:
+                tbl_guide_lines.append(f"  Bullet chars: {bullets}")
+
+            # ★ row_bf_pattern 추가
+            bf_pattern = tbl_info.get('row_bf_pattern', [])
+            if bf_pattern:
+                tbl_guide_lines.append(f"  Row cell classes (apply to each <td>):")
+                for bp in bf_pattern:
+                    col = bp.get('col', '?')
+                    bf_cls = bp.get('bf_class', '')
+                    cs = bp.get('colSpan', 1)
+                    rs = bp.get('rowSpan', 1)
+                    span_info = ""
+                    if cs > 1: span_info += f" colSpan={cs}"
+                    if rs > 1: span_info += f" rowSpan={rs}"
+                    tbl_guide_lines.append(
+                        f'    col_{col}: class="{bf_cls}"{span_info}')
+
+            if examples:
+                tbl_guide_lines.append(f"  Example rows:")
+                for ex in examples[:2]:
+                    tbl_guide_lines.append(
+                        f"    {json.dumps(ex, ensure_ascii=False)}")
+        
+        tbl_guide = "\n".join(tbl_guide_lines) if tbl_guide_lines else "No table guide"
+        
+        # ★ 페이지 추정
+        page_estimate = structure.get('pageEstimate', 1)
+        
+        # ★ placeholder 키 목록 (from template)
+        placeholders = PH_PATTERN.findall(template)
+        placeholders = list(dict.fromkeys(placeholders))
+        
+        prompt = f"""Fill the template placeholders with reorganized content.
+
+## Document Definition
+{context.get('documentDefinition', 'structured document')}
+
+## Context
+- Type: {context.get('documentType', '')}
+- Purpose: {context.get('purpose', '')}
+- Audience: {context.get('audience', '')}
+- Tone: {context.get('tone', '')}
+- Layout: {doc_info.get('layout', 'portrait')}
+- Page limit: {page_estimate} page(s). Be CONCISE.
+
+## Writing Style
+- Bullet chars: {writing_guide.get('bullet_styles', ['- ', '· '])}
+- Primary font: {writing_guide.get('font_primary', '')}
+- Keep lines ~{writing_guide.get('avg_line_length', 25)} chars average
+
+## Placeholder Guide (type, pattern, example for each)
+{ph_guide}
+
+## Table Structure Guide
+{tbl_guide}
+
+## Input Content
+{content[:6000] if content else '(empty)'}
+
+## Additional Instructions
+{instruction if instruction else 'None'}
+
+## ALL Placeholders to fill (JSON keys):
+{json.dumps(placeholders, ensure_ascii=False)}
+
+## ★ Critical Rules
+1. Output ONLY valid JSON — every placeholder above as a key
+2. HEADER/FOOTER: use the PATTERN and modify the EXAMPLE for new content
+   - department → user's department or keep example
+   - author → user's name or keep example  
+   - date → today's date in same format
+   - slogan → keep exactly as example
+3. TITLE: create title matching doc_title pattern from input content
+4. TABLE_*_H_*: plain text column headers (use col_headers from guide)
+5. TABLE_*_BODY: HTML <tr> rows only (no <table> wrapper)
+   - Follow merge_pattern: row_group → use rowspan
+   - Use bullet_chars from guide inside cells
+   - Match example_rows structure
+5b. TABLE_*_BODY <td>: apply class from 'Row cell classes' guide\n
+   - e.g. <td class=\"bf-12\">content</td>\n
+6. SECTION_*_CONTENT: use bullet style from writing guide
+7. Empty string "" for inapplicable placeholders
+8. Do NOT invent content — reorganize input only
+9. PARA_*: reorganize input text for each paragraph placeholder
+   - Keep the meaning, improve clarity and structure
+   - PARA_n_RUN_m: if a paragraph has multiple runs, fill each run separately
+10. IMAGE_*: output exactly "KEEP_ORIGINAL" (image is auto-inserted from source)
+11. IMAGE_*_CAPTION: write a concise caption describing the image context
+12. Total volume: {page_estimate} page(s)
+
+Output ONLY valid JSON:"""
+
+        try:
+            response = call_claude(
+                "You fill document template placeholders with reorganized content. "
+                "Output valid JSON only. Respect the template structure exactly.",
+                prompt,
+                max_tokens=6000
+            )
+            
+            fill_data = self._extract_json(response)
+            
+            if not fill_data:
+                return {'error': 'JSON extraction failed', 'raw': response[:500]}
+            
+            html = self._fill_template(template, fill_data, image_data)
+            
+            return {'success': True, 'html': html}
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
+    
+    def _fill_template(self, template: str, data: dict,
+                       image_data: dict = None) -> str:
+        """템플릿에 데이터 채우기
+
+        Args:
+            template: HTML 템플릿
+            data: AI가 채운 placeholder → value dict
+            image_data: 이미지 dict {binaryItemIDRef: {"base64": ..., "mime": ...}}
+        """
+        html = template
+
+        # ★ content_prompt에서 IMAGE_n → binaryItemIDRef 매핑 빌드
+        image_ref_map = self._build_image_ref_map(data, image_data)
+
+        for key, value in data.items():
+            placeholder = '{{' + key + '}}'
+
+            # ── IMAGE_n: 원본 이미지 삽입 ──
+            if re.match(r'^IMAGE_\d+$', key):
+                img_tag = image_ref_map.get(key, '')
+                html = html.replace(placeholder, img_tag)
+                continue
+
+            if isinstance(value, str) and value.strip():
+                # ★ 개조식 내용 처리: · 또는 - 로 시작하는 항목
+                lines = value.strip().split('\n')
+                is_bullet_list = sum(
+                    1 for l in lines
+                    if l.strip().startswith('·') or l.strip().startswith('-')
+                ) > len(lines) * 0.5
+
+                if is_bullet_list and len(lines) > 1:
+                    # ★ v2.2: inline context (<p><span> 안)에서는 <ul> 금지
+                    # PARA_*, SECTION_*_TITLE, HEADER_*, FOOTER_*, TITLE_*, *_RUN_*
+                    # 이들은 <p> 또는 <td> 안에 있어 block 요소 삽입 시 HTML 깨짐
+                    _is_inline = re.match(
+                        r'^(PARA_|SECTION_\d+_TITLE|HEADER_|FOOTER_|TITLE_|.*_RUN_)',
+                        key
+                    )
+                    if _is_inline:
+                        # <br> 줄바꿈으로 구조 보존
+                        clean_lines = []
+                        for item in lines:
+                            item = item.strip()
+                            if item.startswith('·'):
+                                item = item[1:].strip()
+                            elif item.startswith('-'):
+                                item = item[1:].strip()
+                            if item:
+                                clean_lines.append(f'· {item}')
+                        value = '<br>\n'.join(clean_lines)
+                    else:
+                        # <div> 안 (SECTION_*_CONTENT 등) → <ul><li> 허용
+                        li_items = []
+                        for item in lines:
+                            item = item.strip()
+                            if item.startswith('·'):
+                                item = item[1:].strip()
+                            elif item.startswith('-'):
+                                item = item[1:].strip()
+                            if item:
+                                li_items.append(f'<li>{item}</li>')
+                        value = '<ul class="bullet-list">\n' + '\n'.join(li_items) + '\n</ul>'
+
+            html = html.replace(placeholder, str(value) if value else '')
+
+        # ★ 남은 placeholder 정리 (한글 포함)
+        html = PH_PATTERN.sub('', html)
+
+        return html
+
+    def _build_image_ref_map(self, data: dict, image_data: dict = None) -> dict:
+        """IMAGE_n placeholder → <img> 태그 매핑 생성.
+
+        content_prompt.json의 placeholders에서 IMAGE_n의 example_ref
+        (= binaryItemIDRef)를 찾고, image_data에서 base64를 가져옴.
+        """
+        ref_map = {}
+        if not image_data:
+            return ref_map
+
+        # content_prompt placeholders에서 IMAGE_n → ref 매핑
+        # (generate 호출 시 content_prompt를 아직 안 가지고 있으므로
+        #  template HTML의 data-ref 속성 또는 순서 매칭으로 해결)
+        # 방법: template에서 IMAGE_1, IMAGE_2... 순서와
+        #       image_data의 키 순서를 매칭
+
+        # image_data 키 목록 (BinData 등장 순서)
+        img_refs = sorted(image_data.keys())
+
+        img_num = 0
+        for ref in img_refs:
+            img_num += 1
+            key = f"IMAGE_{img_num}"
+            img_info = image_data[ref]
+
+            b64 = img_info.get("base64", "")
+            mime = img_info.get("mime", "image/png")
+
+            if b64:
+                ref_map[key] = (
+                    f'<img src="data:{mime};base64,{b64}" '
+                    f'alt="{ref}" style="max-width:100%; height:auto;">'
+                )
+            else:
+                # base64 없으면 파일 경로 참조
+                file_path = img_info.get("path", "")
+                if file_path:
+                    ref_map[key] = (
+                        f'<img src="{file_path}" '
+                        f'alt="{ref}" style="max-width:100%; height:auto;">'
+                    )
+                else:
+                    ref_map[key] = f'<!-- image not found: {ref} -->'
+
+        return ref_map
+
+    def _load_image_data(self, config: dict) -> dict:
+        """템플릿 폴더에서 images.json 로드 (BinData 추출 결과).
+
+        images.json 구조:
+        {
+            "IMG001": {"base64": "iVBOR...", "mime": "image/png"},
+            "IMG002": {"base64": "...", "mime": "image/jpeg"}
+        }
+
+        또는 이미지 파일이 직접 저장된 경우 경로를 반환.
+        """
+        tpl_id = config.get("template_id", "")
+        if not tpl_id:
+            return {}
+
+        tpl_path = Path('templates/user/templates') / tpl_id
+
+        # ① images.json (base64 저장 방식)
+        images_json = tpl_path / 'images.json'
+        if images_json.exists():
+            try:
+                with open(images_json, 'r', encoding='utf-8') as f:
+                    return json.load(f)
+            except:
+                pass
+
+        # ② images/ 폴더 (파일 저장 방식)
+        images_dir = tpl_path / 'images'
+        if images_dir.exists():
+            result = {}
+            mime_map = {
+                '.png': 'image/png', '.jpg': 'image/jpeg',
+                '.jpeg': 'image/jpeg', '.gif': 'image/gif',
+                '.bmp': 'image/bmp', '.svg': 'image/svg+xml',
+                '.wmf': 'image/x-wmf', '.emf': 'image/x-emf',
+            }
+            for img_file in sorted(images_dir.iterdir()):
+                if img_file.suffix.lower() in mime_map:
+                    ref = img_file.stem  # 파일명 = binaryItemIDRef
+                    result[ref] = {
+                        "path": str(img_file),
+                        "mime": mime_map.get(img_file.suffix.lower(), "image/png")
+                    }
+            return result
+
+        return {}
+    
+    def _extract_json(self, response: str) -> dict:
+        """응답에서 JSON 추출"""
+        # ```json ... ``` 블록 찾기
+        match = re.search(r'```json\s*(.*?)\s*```', response, re.DOTALL)
+        if match:
+            try:
+                return json.loads(match.group(1))
+            except:
+                pass
+        
+        # 가장 큰 { } 블록 찾기
+        brace_depth = 0
+        start = -1
+        for i, ch in enumerate(response):
+            if ch == '{':
+                if brace_depth == 0:
+                    start = i
+                brace_depth += 1
+            elif ch == '}':
+                brace_depth -= 1
+                if brace_depth == 0 and start >= 0:
+                    try:
+                        return json.loads(response[start:i+1])
+                    except:
+                        start = -1
+        
+        return None
+    
+    def _generate_with_guide(self, content: str, config: dict, options: dict) -> dict:
+        """가이드 기반 생성 (템플릿 없을 때)"""
+        
+        context = config.get('context', {})
+        structure = config.get('structure', {})
+        layout = config.get('layout', {})
+        style = config.get('style', {})
+        
+        instruction = options.get('instruction', '') if options else ''
+        
+        # 섹션 구조 설명
+        sections = layout.get('sections', [])
+        sections_desc = ""
+        for i, sec in enumerate(sections, 1):
+            sections_desc += f"""
+{i}. {sec.get('name', f'섹션{i}')}
+   - 작성 스타일: {sec.get('writingStyle', '혼합')}
+   - 불릿: {'있음' if sec.get('hasBulletIcon') else '없음'}
+   - 표: {'있음' if sec.get('hasTable') else '없음'}
+   - 내용: {sec.get('contentDescription', '')}
+"""
+        
+        page_estimate = structure.get('pageEstimate', 1)
+        
+        system_prompt = f"""당신은 "{context.get('documentType', '문서')}" 작성 전문가입니다.
+
+## 문서 특성
+- 목적: {context.get('purpose', '')}
+- 대상: {context.get('audience', '')}
+- 톤: {context.get('tone', '')}
+- 전체 스타일: {structure.get('writingStyle', '혼합')}
+- 분량: 약 {page_estimate}페이지
+
+## 문서 구조
+{sections_desc}
+
+## 작성 원칙
+{chr(10).join('- ' + p for p in structure.get('writingPrinciples', []))}
+
+## 주의사항
+{chr(10).join('- ' + m for m in structure.get('commonMistakes', []))}
+
+## 핵심!
+- 사용자 입력을 **정리/재구성**하세요
+- **새로 창작하지 마세요**
+- 분석된 문서 구조를 그대로 따르세요
+- 개조식 섹션은 "· " 불릿 사용
+- 분량을 {page_estimate}페이지 내로 제한하세요"""
+
+        user_prompt = f"""다음 내용을 "{context.get('documentType', '문서')}" 양식으로 정리해주세요.
+
+## 입력 내용
+{content[:6000] if content else '(내용 없음)'}
+
+## 추가 요청
+{instruction if instruction else '없음'}
+
+## 출력 형식
+완전한 A4 규격 HTML 문서로 출력하세요.
+- <!DOCTYPE html>로 시작
+- UTF-8 인코딩
+- @page {{ size: A4 }} CSS 포함
+- 폰트: {style.get('font', {}).get('name', '맑은 고딕')}
+- 머릿말/꼬리말 포함
+- 약 {page_estimate}페이지 분량
+
+HTML만 출력하세요."""
+
+        try:
+            response = call_claude(system_prompt, user_prompt, max_tokens=6000)
+            html = extract_html(response)
+            
+            if not html:
+                return {'error': 'HTML 생성 실패'}
+            
+            return {'success': True, 'html': html}
+            
+        except Exception as e:
+            import traceback
+            return {'error': str(e), 'trace': traceback.format_exc()}
--- a/Code/geulbeot_10th/handlers/doc/doc_type_analyzer.py
+++ b/Code/geulbeot_10th/handlers/doc/doc_type_analyzer.py