Files
test/handlers/template_manager.py

1008 lines
39 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
템플릿 관리자 (Template Manager) v5.2
- 템플릿의 독립적 CRUD (생성/조회/삭제/교체)
- 문서 유형(DocType)과 분리된 저장 구조
- HWPX에서 템플릿 추출 → templates/user/templates/{tpl_id}/ 에 저장
★ v5.2 변경:
- _build_body_html() 재설계: content_order 기반 본문 생성
→ 문단·이미지·표를 원본 순서 그대로 HTML 조립
→ content_order 없으면 기존 legacy 방식 자동 fallback
- _build_title_block_html() 분리 (재사용성)
★ 저장 구조:
templates/user/
├── doc_types/{type_id}/
│ ├── config.json ← 유형 정보 (맥락/구조/가이드)
│ └── template_id: "tpl_xxx" ← 어떤 템플릿 참조하는지
└── templates/{tpl_id}/
├── template.html ← HTML 골격 + placeholder
├── style.json ← 테두리/폰트/색상/여백/borderFill
└── meta.json ← 이름, 출처, 생성일
★ 사용 흐름:
1) "템플릿 추가" → extract_and_save(hwpx_path, name) → tpl_id
2) "문서 유형 추가" → doc_type_analyzer가 내부적으로 extract_and_save 호출
3) "템플릿 교체" → change_template(type_id, new_tpl_id)
4) "문서 생성" → load_template(tpl_id) → template.html + style.json
"""
import json
import time
import shutil
from pathlib import Path
from typing import Optional
class TemplateManager:
"""템플릿 독립 관리"""
# 기본 경로
TEMPLATES_USER = Path('templates/user/templates')
TEMPLATES_DEFAULT = Path('templates/default/templates')
DOC_TYPES_USER = Path('templates/user/doc_types')
def __init__(self, base_path: str = None):
if base_path:
self.TEMPLATES_USER = Path(base_path) / 'user' / 'templates'
self.TEMPLATES_DEFAULT = Path(base_path) / 'default' / 'templates'
self.DOC_TYPES_USER = Path(base_path) / 'user' / 'doc_types'
# ================================================================
# 핵심 API
# ================================================================
def extract_and_save(self, parsed: dict, name: str,
source_file: str = "", description: str = "") -> dict:
"""
HWPX 파싱 결과에서 템플릿 추출 후 저장
Args:
parsed: HWPX 파서 결과 (raw_xml, tables, section_xml, header_xml, footer_xml)
name: 템플릿 이름 (예: "GPD 발표기획서 양식")
source_file: 원본 파일명
description: 설명
Returns:
{"success": True, "template_id": "tpl_xxx", "path": "...", "template_info": {...}}
"""
from .doc_template_analyzer import DocTemplateAnalyzer
try:
analyzer = DocTemplateAnalyzer()
# ① 구조 추출 (template_info)
template_info = analyzer.analyze(parsed)
# ①-b semantic_map 생성 (표 역할 분류, 섹션 감지)
from . import semantic_mapper
semantic_map = semantic_mapper.generate(template_info, parsed)
# ② HTML 생성 (semantic_map으로 표 필터링)
template_html = self._generate_basic_html(template_info, parsed, semantic_map)
# 저장
tpl_id = f"tpl_{int(time.time())}"
tpl_path = self.TEMPLATES_USER / tpl_id
tpl_path.mkdir(parents=True, exist_ok=True)
# template.html
(tpl_path / 'template.html').write_text(template_html, encoding='utf-8')
# style.json (template_info + 추출된 스타일)
style_data = {
"version": "v4",
"source": "doc_template_analyzer",
"template_info": template_info,
"css": "", # 추후 커스텀 CSS 오버라이드용
"fonts": {},
"colors": self._extract_colors(template_info),
"border_fills": template_info.get("border_fills", {}),
"tables": [],
"style_summary": {}
}
(tpl_path / 'style.json').write_text(
json.dumps(style_data, ensure_ascii=False, indent=2),
encoding='utf-8'
)
# meta.json
meta = {
"id": tpl_id,
"name": name,
"original_file": source_file,
"file_type": Path(source_file).suffix if source_file else ".hwpx",
"description": description,
"features": self._summarize_features(template_info, semantic_map),
"created_at": time.strftime("%Y-%m-%dT%H:%M:%SZ"),
"source": "doc_template_analyzer"
}
(tpl_path / 'meta.json').write_text(
json.dumps(meta, ensure_ascii=False, indent=2),
encoding='utf-8'
)
# semantic_map.json
(tpl_path / 'semantic_map.json').write_text(
json.dumps(semantic_map, ensure_ascii=False, indent=2),
encoding='utf-8'
)
return {
"success": True,
"template_id": tpl_id,
"path": str(tpl_path),
"template_info": template_info,
"semantic_map": semantic_map,
"meta": meta
}
except Exception as e:
import traceback
return {"error": str(e), "trace": traceback.format_exc()}
def load_template(self, tpl_id: str) -> dict:
"""
템플릿 로드 (template.html + style.json)
Returns:
{"html": "...", "style": {...}, "meta": {...}}
"""
# 사용자 템플릿 → 기본 템플릿 순서로 탐색
for base in [self.TEMPLATES_USER, self.TEMPLATES_DEFAULT]:
tpl_path = base / tpl_id
if tpl_path.exists():
result = {}
html_file = tpl_path / 'template.html'
if html_file.exists():
result["html"] = html_file.read_text(encoding='utf-8')
style_file = tpl_path / 'style.json'
if style_file.exists():
result["style"] = json.loads(style_file.read_text(encoding='utf-8'))
meta_file = tpl_path / 'meta.json'
if meta_file.exists():
result["meta"] = json.loads(meta_file.read_text(encoding='utf-8'))
result["template_id"] = tpl_id
result["path"] = str(tpl_path)
return result
return {"error": f"템플릿을 찾을 수 없습니다: {tpl_id}"}
def list_templates(self) -> list:
"""모든 템플릿 목록 조회"""
templates = []
for base, is_default in [(self.TEMPLATES_DEFAULT, True), (self.TEMPLATES_USER, False)]:
if not base.exists():
continue
for folder in sorted(base.iterdir()):
if not folder.is_dir():
continue
meta_file = folder / 'meta.json'
if meta_file.exists():
try:
meta = json.loads(meta_file.read_text(encoding='utf-8'))
meta["is_default"] = is_default
templates.append(meta)
except:
templates.append({
"id": folder.name,
"name": folder.name,
"is_default": is_default
})
return templates
def delete_template(self, tpl_id: str) -> dict:
"""템플릿 삭제 (사용자 템플릿만)"""
tpl_path = self.TEMPLATES_USER / tpl_id
if not tpl_path.exists():
return {"error": f"템플릿을 찾을 수 없습니다: {tpl_id}"}
# 이 템플릿을 참조하는 DocType이 있는지 확인
referencing = self._find_referencing_doc_types(tpl_id)
if referencing:
names = ', '.join(r['name'] for r in referencing[:3])
return {
"error": f"이 템플릿을 사용 중인 문서 유형이 있습니다: {names}",
"referencing_types": referencing
}
shutil.rmtree(tpl_path)
return {"success": True, "deleted": tpl_id}
def change_template(self, type_id: str, new_tpl_id: str) -> dict:
"""
문서 유형의 템플릿 교체
Args:
type_id: 문서 유형 ID
new_tpl_id: 새 템플릿 ID
"""
config_path = self.DOC_TYPES_USER / type_id / 'config.json'
if not config_path.exists():
return {"error": f"문서 유형을 찾을 수 없습니다: {type_id}"}
# 새 템플릿 존재 확인
new_tpl = self.load_template(new_tpl_id)
if "error" in new_tpl:
return new_tpl
# config 업데이트
config = json.loads(config_path.read_text(encoding='utf-8'))
old_tpl_id = config.get("template_id", "")
config["template_id"] = new_tpl_id
config["updatedAt"] = time.strftime("%Y-%m-%dT%H:%M:%SZ")
config_path.write_text(
json.dumps(config, ensure_ascii=False, indent=2),
encoding='utf-8'
)
return {
"success": True,
"type_id": type_id,
"old_template_id": old_tpl_id,
"new_template_id": new_tpl_id
}
def get_template_for_doctype(self, type_id: str) -> dict:
"""문서 유형에 연결된 템플릿 로드"""
config_path = self.DOC_TYPES_USER / type_id / 'config.json'
if not config_path.exists():
# default에서도 탐색
config_path = self.TEMPLATES_DEFAULT.parent / 'doc_types' / type_id / 'config.json'
if not config_path.exists():
return {"error": f"문서 유형을 찾을 수 없습니다: {type_id}"}
config = json.loads(config_path.read_text(encoding='utf-8'))
tpl_id = config.get("template_id")
if not tpl_id:
# ★ 하위 호환: template_id가 없으면 같은 폴더의 template.html 사용
legacy_path = config_path.parent / 'template.html'
if legacy_path.exists():
return {
"html": legacy_path.read_text(encoding='utf-8'),
"style": {},
"meta": {"id": type_id, "name": "레거시 템플릿"},
"template_id": None,
"legacy": True
}
return {"error": "연결된 템플릿이 없습니다"}
return self.load_template(tpl_id)
# ================================================================
# 내부 유틸
# ================================================================
def _find_referencing_doc_types(self, tpl_id: str) -> list:
"""특정 템플릿을 참조하는 DocType 목록"""
result = []
if not self.DOC_TYPES_USER.exists():
return result
for folder in self.DOC_TYPES_USER.iterdir():
config_file = folder / 'config.json'
if config_file.exists():
try:
config = json.loads(config_file.read_text(encoding='utf-8'))
if config.get("template_id") == tpl_id:
result.append({
"id": config.get("id", folder.name),
"name": config.get("name", folder.name)
})
except:
pass
return result
def _generate_basic_html(self, template_info: dict, parsed: dict,
semantic_map: dict = None) -> str:
"""tools 추출 결과 + style_generator → template.html 생성"""
# ① CSS 생성 (style_generator)
from . import style_generator
css = style_generator.generate_css(template_info, semantic_map)
# ② 헤더 HTML
header_html = self._build_header_html(template_info.get("header"))
# ③ 푸터 HTML
footer_html = self._build_footer_html(template_info.get("footer"))
# ④ 본문 HTML (섹션 + 표)
body_html = self._build_body_html(template_info, parsed, semantic_map)
# ⑤ 조립
html = f"""<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<title>Template</title>
<style>
{css}
</style>
</head>
<body>
<div class="page">
{header_html}
{body_html}
{footer_html}
</div>
</body>
</html>"""
return html
# ── 보조 메서드들 ──
def _build_header_html(self, header_info: dict | None) -> str:
"""header tools 추출값 → HTML + placeholder"""
if not header_info or not header_info.get("exists"):
return "<!-- no header -->"
html = '<div class="doc-header">\n'
if header_info.get("type") == "table" and header_info.get("table"):
tbl = header_info["table"]
rows = tbl.get("rows", [])
col_pcts = tbl.get("colWidths_pct", [])
# ★ 추가: colWidths_pct 없으면 셀 width_hu에서 계산
if not col_pcts and rows:
widths = [c.get("width_hu", 0) for c in rows[0]]
total = sum(widths)
if total > 0:
col_pcts = [round(w / total * 100) for w in widths]
html += '<table>\n'
if col_pcts:
html += '<colgroup>\n'
for pct in col_pcts:
html += f' <col style="width:{pct}%">\n'
html += '</colgroup>\n'
for r_idx, row in enumerate(rows):
html += '<tr>\n'
for c_idx, cell in enumerate(row):
lines = cell.get("lines", [])
cell_text = cell.get("text", "").strip() # ★ 추가
ph_name = f"HEADER_R{r_idx+1}_C{c_idx+1}"
# ★ 수정: 텍스트 없는 셀은 비움
if not cell_text and not lines:
content = ""
elif len(lines) > 1:
# 멀티라인 셀 → 각 라인별 placeholder
line_phs = []
for l_idx in range(len(lines)):
line_phs.append(f"{{{{{ph_name}_LINE_{l_idx+1}}}}}")
content = "<br>".join(line_phs)
else:
content = f"{{{{{ph_name}}}}}"
# colSpan/rowSpan
attrs = ""
bf_ref = cell.get("borderFillIDRef")
if bf_ref:
attrs += f' class="bf-{bf_ref}"'
if cell.get("colSpan", 1) > 1:
attrs += f' colspan="{cell["colSpan"]}"'
if cell.get("rowSpan", 1) > 1:
attrs += f' rowspan="{cell["rowSpan"]}"'
html += f' <td{attrs}>{content}</td>\n'
html += '</tr>\n'
html += '</table>\n'
else:
# 텍스트형 헤더
texts = header_info.get("texts", [])
for i in range(max(len(texts), 1)):
html += f' <div>{{{{{f"HEADER_TEXT_{i+1}"}}}}}</div>\n'
html += '</div>'
return html
def _build_footer_html(self, footer_info: dict | None) -> str:
"""footer tools 추출값 → HTML + placeholder"""
if not footer_info or not footer_info.get("exists"):
return "<!-- no footer -->"
html = '<div class="doc-footer">\n'
if footer_info.get("type") == "table" and footer_info.get("table"):
tbl = footer_info["table"]
rows = tbl.get("rows", [])
col_pcts = tbl.get("colWidths_pct", [])
# ★ 추가: colWidths_pct 없으면 셀 width_hu에서 계산
if not col_pcts and rows:
widths = [c.get("width_hu", 0) for c in rows[0]]
total = sum(widths)
if total > 0:
col_pcts = [round(w / total * 100) for w in widths]
html += '<table>\n'
if col_pcts:
html += '<colgroup>\n'
for pct in col_pcts:
html += f' <col style="width:{pct}%">\n'
html += '</colgroup>\n'
for r_idx, row in enumerate(rows):
html += '<tr>\n'
for c_idx, cell in enumerate(row):
lines = cell.get("lines", [])
cell_text = cell.get("text", "").strip() # ★ 추가
ph_name = f"FOOTER_R{r_idx+1}_C{c_idx+1}"
# ★ 수정: 텍스트 없는 셀은 비움
if not cell_text and not lines:
content = ""
elif len(lines) > 1:
line_phs = []
for l_idx in range(len(lines)):
line_phs.append(f"{{{{{ph_name}_LINE_{l_idx+1}}}}}")
content = "<br>".join(line_phs)
else:
content = f"{{{{{ph_name}}}}}"
attrs = ""
bf_ref = cell.get("borderFillIDRef")
if bf_ref:
attrs += f' class="bf-{bf_ref}"'
if cell.get("colSpan", 1) > 1:
attrs += f' colspan="{cell["colSpan"]}"'
if cell.get("rowSpan", 1) > 1:
attrs += f' rowspan="{cell["rowSpan"]}"'
html += f' <td{attrs}>{content}</td>\n'
html += '</tr>\n'
html += '</table>\n'
else:
# 텍스트형 푸터 — 일반적으로 페이지 번호
html += ' <div style="text-align:center">{{PAGE_NUMBER}}</div>\n'
html += '</div>'
return html
def _build_body_html(self, template_info: dict, parsed: dict,
semantic_map: dict = None) -> str:
"""본문 영역 HTML 생성.
★ v5.2: content_order가 있으면 원본 순서 그대로 조립.
content_order 없으면 기존 섹션+표 방식 (하위 호환).
"""
content_order = template_info.get("content_order")
if content_order and self._has_paragraph_content(content_order):
return self._build_body_from_content_order(
template_info, content_order, semantic_map
)
else:
return self._build_body_legacy(
template_info, parsed, semantic_map
)
# ── content_order 기반 본문 생성 (v5.2+) ──
def _has_paragraph_content(self, content_order: list) -> bool:
"""content_order에 문단이 있는지 (표만 있으면 legacy 사용)"""
return any(
c['type'] == 'paragraph' for c in content_order
)
def _build_body_from_content_order(self, template_info: dict,
content_order: list,
semantic_map: dict = None) -> str:
"""content_order 기반 — 원본 문서 순서 그대로 HTML 조립.
콘텐츠 유형별 처리:
paragraph → <p class="ppr-{id}"><span class="cpr-{id}">{{CONTENT_n}}</span></p>
table → data-table placeholder (title_table 제외)
image → <div class="img-wrap">{{IMAGE_n}}</div>
empty → 생략 (연속 빈 문단 의미 없음)
"""
import re
tables = template_info.get("tables", [])
# semantic_map에서 title/body 인덱스
title_table_idx = None
body_table_indices = []
if semantic_map:
title_table_idx = semantic_map.get("title_table")
body_table_indices = semantic_map.get("body_tables", [])
else:
body_table_indices = [t["index"] for t in tables]
# ★ v5.3: content_order table_idx → tables 리스트 매핑
# content_order.table_idx = section body에서 만난 표 순번 (0-based)
# tables 리스트 = HWPX 전체 표 (header/footer 포함)
# → header/footer 제외한 "본문 가시 표" 리스트로 매핑해야 정확함
header_footer_indices = set()
if semantic_map:
for idx_key, role_info in semantic_map.get("table_roles", {}).items():
role = role_info.get("role", "")
if role in ("header_table", "footer_table"):
try:
header_footer_indices.add(int(idx_key))
except (ValueError, TypeError):
pass
body_visible_tables = [
t for t in tables
if t["index"] not in header_footer_indices
]
body_parts = []
# ── 제목 블록 (title_table이 있으면) ──
if title_table_idx is not None:
title_tbl = next(
(t for t in tables if t["index"] == title_table_idx), None
)
if title_tbl:
body_parts.append(
self._build_title_block_html(title_tbl)
)
# ── content_order 순회 ──
para_num = 0 # 문단 placeholder 번호
tbl_num = 0 # 데이터 표 번호 (1-based)
img_num = 0 # 이미지 placeholder 번호
in_section = False
section_num = 0
# 섹션 제목 패턴
sec_patterns = [
re.compile(r'^\d+\.\s+\S'),
re.compile(r'^[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]\.\s*\S'),
re.compile(r'^제\s*\d+\s*[장절항]\s*\S'),
]
def _is_section_title(text: str) -> bool:
return any(p.match(text) for p in sec_patterns)
for item in content_order:
itype = item['type']
# ── 빈 문단: 생략 ──
if itype == 'empty':
continue
# ── 표: title_table은 이미 처리, body_table만 ──
# table_idx = content_order.py가 부여한 등장순서 0-based
# ★ v5.3: body_visible_tables로 매핑 (header/footer 표 제외)
if itype == 'table':
t_idx = item.get('table_idx', 0)
# body_visible_tables에서 해당 인덱스의 표 가져오기
if t_idx < len(body_visible_tables):
tbl_data = body_visible_tables[t_idx]
if tbl_data["index"] == title_table_idx:
continue # title_table 건너뛰기
if tbl_data["index"] not in body_table_indices:
continue # body 데이터 표가 아니면 건너뛰기
tbl_num += 1
col_cnt = item.get('colCnt', '3')
try:
col_cnt = int(col_cnt)
except (ValueError, TypeError):
col_cnt = 3
# semantic_map에서 col_headers 가져오기
_roles = semantic_map.get("table_roles", {}) if semantic_map else {}
if t_idx < len(body_visible_tables):
tbl_data = body_visible_tables[t_idx]
tbl_role = _roles.get(tbl_data["index"],
_roles.get(str(tbl_data["index"]), {}))
col_headers = tbl_role.get("col_headers", [])
actual_col_cnt = len(col_headers) if col_headers else col_cnt
rows = tbl_data.get("rows", [])
header_row_data = rows[0] if rows else None
col_pcts = tbl_data.get("colWidths_pct", [])
else:
actual_col_cnt = col_cnt
header_row_data = None
col_pcts = []
body_parts.append(
self._build_table_placeholder(
tbl_num, actual_col_cnt, col_pcts,
header_row=header_row_data
)
)
continue
# ── 이미지 ──
if itype == 'image':
img_num += 1
ppr = item.get('paraPrIDRef', '0')
caption = item.get('text', '')
ref = item.get('binaryItemIDRef', '')
img_html = f'<div class="img-wrap ppr-{ppr}">\n'
img_html += f' {{{{IMAGE_{img_num}}}}}\n'
if caption:
img_html += f' <p class="img-caption">{{{{IMAGE_{img_num}_CAPTION}}}}</p>\n'
img_html += '</div>'
body_parts.append(img_html)
continue
# ── 문단 ──
if itype == 'paragraph':
text = item.get('text', '')
ppr = item.get('paraPrIDRef', '0')
cpr = item.get('charPrIDRef', '0')
# 섹션 제목 감지
if _is_section_title(text):
# 이전 섹션 닫기
if in_section:
body_parts.append('</div>\n')
section_num += 1
in_section = True
body_parts.append(
f'<div class="section" data-section="{section_num}">\n'
f' <p class="section-title ppr-{ppr} cpr-{cpr}">'
f'{{{{SECTION_{section_num}_TITLE}}}}</p>'
)
continue
# 일반 문단
para_num += 1
# runs가 여러 개면 다중 span
runs = item.get('runs', [])
if len(runs) > 1:
spans = []
for r_idx, run in enumerate(runs):
r_cpr = run.get('charPrIDRef', cpr)
spans.append(
f'<span class="cpr-{r_cpr}">'
f'{{{{PARA_{para_num}_RUN_{r_idx+1}}}}}</span>'
)
inner = ''.join(spans)
else:
inner = (
f'<span class="cpr-{cpr}">'
f'{{{{PARA_{para_num}}}}}</span>'
)
body_parts.append(
f'<p class="ppr-{ppr}">{inner}</p>'
)
# 마지막 섹션 닫기
if in_section:
body_parts.append('</div>\n')
return "\n\n".join(body_parts)
def _build_title_block_html(self, title_tbl: dict) -> str:
"""제목표 → title-block HTML (기존 로직 분리)"""
rows = title_tbl.get("rows", [])
col_pcts = title_tbl.get("colWidths_pct", [])
html = '<div class="title-block">\n<table class="title-table">\n'
if col_pcts:
html += '<colgroup>\n'
for pct in col_pcts:
html += f' <col style="width:{pct}%">\n'
html += '</colgroup>\n'
for r_idx, row in enumerate(rows):
html += '<tr>\n'
for c_idx, cell in enumerate(row):
attrs = ""
bf_ref = cell.get("borderFillIDRef")
if bf_ref:
attrs += f' class="bf-{bf_ref}"'
cs = cell.get("colSpan", 1)
if cs > 1:
attrs += f' colspan="{cs}"'
rs = cell.get("rowSpan", 1)
if rs > 1:
attrs += f' rowspan="{rs}"'
cell_text = cell.get("text", "").strip()
if cell_text:
ph_name = f"TITLE_R{r_idx+1}_C{c_idx+1}"
html += f' <td{attrs}>{{{{{ph_name}}}}}</td>\n'
else:
html += f' <td{attrs}></td>\n'
html += '</tr>\n'
html += '</table>\n</div>\n'
return html
# ── 기존 섹션+표 방식 (하위 호환) ──
def _build_body_legacy(self, template_info: dict, parsed: dict,
semantic_map: dict = None) -> str:
"""content_order 없을 때 — 기존 v5.1 방식 유지"""
body_parts = []
tables = template_info.get("tables", [])
# ── semantic_map이 있으면 활용 ──
if semantic_map:
body_table_indices = semantic_map.get("body_tables", [])
title_idx = semantic_map.get("title_table")
else:
# semantic_map 없으면 전체 표 사용 (하위 호환)
body_table_indices = [t["index"] for t in tables]
title_idx = None
# ── 제목 블록 ──
if title_idx is not None:
title_tbl = next((t for t in tables if t["index"] == title_idx), None)
if title_tbl:
body_parts.append(self._build_title_block_html(title_tbl))
# ── 본문 데이터 표만 필터링 ──
body_tables = [t for t in tables if t["index"] in body_table_indices]
# ── 섹션 감지 ──
section_titles = self._detect_section_titles(parsed)
if not section_titles and not body_tables:
# 구조 정보 부족 → 기본 1섹션
body_parts.append(
'<div class="section">\n'
' <div class="section-title">{{SECTION_1_TITLE}}</div>\n'
' <div class="section-content">{{SECTION_1_CONTENT}}</div>\n'
'</div>'
)
else:
sec_count = max(len(section_titles), 1)
tbl_idx = 0
for s in range(sec_count):
s_num = s + 1
body_parts.append(
f'<div class="section">\n'
f' <div class="section-title">{{{{SECTION_{s_num}_TITLE}}}}</div>\n'
f' <div class="section-content">{{{{SECTION_{s_num}_CONTENT}}}}</div>\n'
)
# 이 섹션에 표 배분
if tbl_idx < len(body_tables):
t = body_tables[tbl_idx]
col_cnt = t.get("colCnt", 3)
# semantic_map에서 실제 col_headers 가져오기
_roles = semantic_map.get("table_roles", {}) if semantic_map else {}
tbl_role = _roles.get(t["index"], _roles.get(str(t["index"]), {}))
col_headers = tbl_role.get("col_headers", [])
actual_col_cnt = len(col_headers) if col_headers else col_cnt
# 헤더행 셀 데이터 (bf_id 포함)
rows = t.get("rows", [])
header_row_data = rows[0] if rows else None
body_parts.append(
self._build_table_placeholder(
tbl_idx + 1, actual_col_cnt,
t.get("colWidths_pct", []),
header_row=header_row_data # ★ 헤더행 전달
)
)
tbl_idx += 1
body_parts.append('</div>\n')
# 남은 표
while tbl_idx < len(body_tables):
t = body_tables[tbl_idx]
col_cnt = t.get("colCnt", 3)
_roles = semantic_map.get("table_roles", {}) if semantic_map else {}
tbl_role = _roles.get(t["index"], _roles.get(str(t["index"]), {}))
col_headers = tbl_role.get("col_headers", [])
actual_col_cnt = len(col_headers) if col_headers else col_cnt
rows = t.get("rows", [])
header_row_data = rows[0] if rows else None
body_parts.append(
self._build_table_placeholder(
tbl_idx + 1, actual_col_cnt,
t.get("colWidths_pct", []),
header_row=header_row_data
)
)
tbl_idx += 1
return "\n".join(body_parts)
def _build_table_placeholder(self, tbl_num: int, col_cnt: int,
col_pcts: list = None,
header_row: list = None) -> str:
"""표 1개의 placeholder HTML 생성
Args:
tbl_num: 표 번호 (1-based)
col_cnt: 열 수
col_pcts: 열 너비 % 리스트
header_row: 헤더행 셀 리스트 [{bf_id, colSpan, ...}, ...]
"""
# colgroup
colgroup = ""
num_cols = len(col_pcts) if col_pcts else col_cnt
if num_cols > 0:
colgroup = "<colgroup>\n"
if col_pcts and len(col_pcts) == num_cols:
for pct in col_pcts:
colgroup += f' <col style="width:{pct}%">\n'
else:
for _ in range(num_cols):
colgroup += " <col>\n"
colgroup += "</colgroup>\n"
# 헤더 행 — ★ bf_id가 있으면 class 적용
header_cells = []
if header_row:
for c, cell in enumerate(header_row):
bf_id = cell.get("borderFillIDRef")
cs = cell.get("colSpan", 1)
attrs = ""
if bf_id:
attrs += f' class="bf-{bf_id}"'
if cs > 1:
attrs += f' colspan="{cs}"'
header_cells.append(
f' <th{attrs}>{{{{TABLE_{tbl_num}_H_C{c+1}}}}}</th>'
)
else:
# fallback: bf 없는 경우
for c in range(col_cnt):
header_cells.append(
f' <th>{{{{TABLE_{tbl_num}_H_C{c+1}}}}}</th>'
)
header_row_html = "\n".join(header_cells)
return (
f'<table class="data-table tbl-{tbl_num}">\n'
f'{colgroup}'
f'<thead>\n'
f' <tr>\n{header_row_html}\n </tr>\n'
f'</thead>\n'
f'<tbody>\n'
f' {{{{TABLE_{tbl_num}_BODY}}}}\n'
f'</tbody>\n'
f'</table>'
)
def _detect_section_titles(self, parsed: dict) -> list:
"""parsed 텍스트에서 섹션 제목 패턴 탐색"""
import re
titles = []
# parsed에서 텍스트 추출
paragraphs = parsed.get("paragraphs", [])
if not paragraphs:
# raw_xml에서 <hp:t> 태그 텍스트 추출 시도
section_xml = ""
raw_xml = parsed.get("raw_xml", {})
for key, val in raw_xml.items():
if "section" in key.lower():
section_xml = val if isinstance(val, str) else ""
break
if not section_xml:
section_xml = parsed.get("section_xml", "")
if section_xml:
t_matches = re.findall(r'<hp:t>([^<]+)</hp:t>', section_xml)
paragraphs = [t.strip() for t in t_matches if t.strip()]
# 섹션 제목 패턴
patterns = [
r'^(\d+)\.\s+\S', # "1. 제목"
r'^[ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ]\.\s*\S', # ". 제목"
r'^제\s*\d+\s*[장절항]\s*\S', # "제1장 제목"
]
for text in paragraphs:
if isinstance(text, dict):
text = text.get("text", "")
text = str(text).strip()
if not text:
continue
for pat in patterns:
if re.match(pat, text):
titles.append(text)
break
return titles
def _extract_colors(self, template_info: dict) -> dict:
"""template_info에서 색상 정보 추출"""
colors = {"background": [], "border": [], "text": []}
bf = template_info.get("border_fills", {})
for fill_id, fill_data in bf.items():
# ★ background 키 사용 (bg → background)
bg = fill_data.get("background", fill_data.get("bg", ""))
if bg and bg.lower() not in ("", "none", "transparent") \
and bg not in colors["background"]:
colors["background"].append(bg)
# ★ css dict에서 border 색상 추출
css_dict = fill_data.get("css", {})
for prop, val in css_dict.items():
if "border" in prop and val and val != "none":
# "0.12mm solid #999999" → "#999999"
parts = val.split()
if len(parts) >= 3:
c = parts[-1]
if c.startswith("#") and c not in colors["border"]:
colors["border"].append(c)
# fallback: 직접 side 키 (top/bottom/left/right)
for side_key in ("top", "bottom", "left", "right"):
side = fill_data.get(side_key, {})
if isinstance(side, dict):
c = side.get("color", "")
if c and c not in colors["border"]:
colors["border"].append(c)
return colors
def _summarize_features(self, template_info: dict,
semantic_map: dict = None) -> list:
"""template_info에서 특징 요약"""
features = []
header = template_info.get("header", {})
footer = template_info.get("footer", {})
tables = template_info.get("tables", [])
# 폰트 (fonts 구조: {"HANGUL": [{"face": "맑은 고딕"}], ...})
fonts = template_info.get("fonts", {})
hangul = fonts.get("HANGUL", [])
if hangul and isinstance(hangul, list) and len(hangul) > 0:
features.append(f"폰트: {hangul[0].get('face', '?')}")
# 머릿말 (header.table.colCnt)
if header.get("exists"):
col_cnt = header.get("table", {}).get("colCnt", "?")
features.append(f"머릿말: {col_cnt}")
# 꼬릿말 (footer.table.colCnt)
if footer.get("exists"):
col_cnt = footer.get("table", {}).get("colCnt", "?")
features.append(f"꼬릿말: {col_cnt}")
# 표 — semantic_map이 있으면 데이터 표만
if semantic_map and semantic_map.get("body_tables"):
for idx in semantic_map["body_tables"]:
t = next((tb for tb in tables if tb["index"] == idx), None)
if t:
features.append(
f"표: {t.get('rowCnt', '?')}x{t.get('colCnt', '?')}"
)
elif tables:
t = tables[0]
features.append(f"표: {t.get('rowCnt', '?')}x{t.get('colCnt', '?')}")
return features