# -*- coding: utf-8 -*- """ 템플릿 처리 로직 (v3 - 실제 구조 정확 분석) - HWPX 파일의 실제 표 구조, 이미지 배경, 테두리 정확히 추출 - ARGB 8자리 색상 정규화 - NONE 테두리 색상 제외 """ import os import json import uuid import shutil import zipfile import xml.etree.ElementTree as ET from pathlib import Path from datetime import datetime from typing import Dict, Any, List, Optional from collections import Counter, defaultdict # 템플릿 저장 경로 TEMPLATES_DIR = Path(__file__).parent.parent.parent / 'templates' / 'user' / 'templates' TEMPLATES_DIR.mkdir(exist_ok=True) # HWP 명세서 기반 상수 LINE_TYPES = { 'NONE': '없음', 'SOLID': '실선', 'DASH': '긴 점선', 'DOT': '점선', 'DASH_DOT': '-.-.-.-.', 'DASH_DOT_DOT': '-..-..-..', 'DOUBLE_SLIM': '2중선', 'SLIM_THICK': '가는선+굵은선', 'THICK_SLIM': '굵은선+가는선', 'SLIM_THICK_SLIM': '가는선+굵은선+가는선', 'WAVE': '물결', 'DOUBLE_WAVE': '물결 2중선', } class TemplateProcessor: """템플릿 처리 클래스 (v3)""" NS = { 'hh': 'http://www.hancom.co.kr/hwpml/2011/head', 'hc': 'http://www.hancom.co.kr/hwpml/2011/core', 'hp': 'http://www.hancom.co.kr/hwpml/2011/paragraph', 'hs': 'http://www.hancom.co.kr/hwpml/2011/section', } def __init__(self): self.templates_dir = TEMPLATES_DIR self.templates_dir.mkdir(exist_ok=True) # ========================================================================= # 공개 API # ========================================================================= def get_list(self) -> Dict[str, Any]: """저장된 템플릿 목록""" templates = [] for item in self.templates_dir.iterdir(): if item.is_dir(): meta_path = item / 'meta.json' if meta_path.exists(): try: meta = json.loads(meta_path.read_text(encoding='utf-8')) templates.append({ 'id': meta.get('id', item.name), 'name': meta.get('name', item.name), 'features': meta.get('features', []), 'created_at': meta.get('created_at', '') }) except: pass templates.sort(key=lambda x: x.get('created_at', ''), reverse=True) return {'templates': templates} def analyze(self, file, name: str) -> Dict[str, Any]: """템플릿 파일 분석 및 저장""" filename = file.filename ext = Path(filename).suffix.lower() if ext not in ['.hwpx', '.hwp', '.pdf']: return {'error': f'지원하지 않는 파일 형식: {ext}'} template_id = str(uuid.uuid4())[:8] template_dir = self.templates_dir / template_id template_dir.mkdir(exist_ok=True) try: original_path = template_dir / f'original{ext}' file.save(str(original_path)) if ext == '.hwpx': style_data = self._analyze_hwpx(original_path, template_dir) else: style_data = self._analyze_fallback(ext) if 'error' in style_data: shutil.rmtree(template_dir) return style_data # 특징 추출 features = self._extract_features(style_data) # 메타 저장 meta = { 'id': template_id, 'name': name, 'original_file': filename, 'file_type': ext, 'features': features, 'created_at': datetime.now().isoformat() } (template_dir / 'meta.json').write_text( json.dumps(meta, ensure_ascii=False, indent=2), encoding='utf-8' ) # 스타일 저장 (template_dir / 'style.json').write_text( json.dumps(style_data, ensure_ascii=False, indent=2), encoding='utf-8' ) # CSS 저장 css = style_data.get('css', '') css_dir = template_dir / 'css' css_dir.mkdir(exist_ok=True) (css_dir / 'template.css').write_text(css, encoding='utf-8') return { 'success': True, 'template': { 'id': template_id, 'name': name, 'features': features, 'created_at': meta['created_at'] } } except Exception as e: if template_dir.exists(): shutil.rmtree(template_dir) raise e def delete(self, template_id: str) -> Dict[str, Any]: """템플릿 삭제""" template_dir = self.templates_dir / template_id if not template_dir.exists(): return {'error': '템플릿을 찾을 수 없습니다'} shutil.rmtree(template_dir) return {'success': True, 'deleted': template_id} def get_style(self, template_id: str) -> Optional[Dict[str, Any]]: """템플릿 스타일 반환""" style_path = self.templates_dir / template_id / 'style.json' if not style_path.exists(): return None return json.loads(style_path.read_text(encoding='utf-8')) # ========================================================================= # HWPX 분석 (핵심) # ========================================================================= def _analyze_hwpx(self, file_path: Path, template_dir: Path) -> Dict[str, Any]: """HWPX 분석 - 실제 구조 정확히 추출""" extract_dir = template_dir / 'extracted' try: with zipfile.ZipFile(file_path, 'r') as zf: zf.extractall(extract_dir) result = { 'version': 'v3', 'fonts': {}, 'colors': { 'background': [], 'border': [], 'text': [] }, 'border_fills': {}, 'tables': [], 'special_borders': [], 'style_summary': {}, 'css': '' } # 1. header.xml 분석 header_path = extract_dir / 'Contents' / 'header.xml' if header_path.exists(): self._parse_header(header_path, result) # 2. section0.xml 분석 section_path = extract_dir / 'Contents' / 'section0.xml' if section_path.exists(): self._parse_section(section_path, result) # 3. 스타일 요약 생성 result['style_summary'] = self._create_style_summary(result) # 4. CSS 생성 result['css'] = self._generate_css(result) return result finally: if extract_dir.exists(): shutil.rmtree(extract_dir) def _parse_header(self, header_path: Path, result: Dict): """header.xml 파싱 - 폰트, borderFill""" tree = ET.parse(header_path) root = tree.getroot() # 폰트 for fontface in root.findall('.//hh:fontface', self.NS): if fontface.get('lang') == 'HANGUL': for font in fontface.findall('hh:font', self.NS): result['fonts'][font.get('id')] = font.get('face') # borderFill for bf in root.findall('.//hh:borderFill', self.NS): bf_id = bf.get('id') bf_data = self._parse_border_fill(bf, result) result['border_fills'][bf_id] = bf_data def _parse_border_fill(self, bf, result: Dict) -> Dict: """개별 borderFill 파싱""" bf_id = bf.get('id') data = { 'id': bf_id, 'type': 'empty', 'background': None, 'image': None, 'borders': {} } # 이미지 배경 img_brush = bf.find('.//hc:imgBrush', self.NS) if img_brush is not None: img = img_brush.find('hc:img', self.NS) if img is not None: data['type'] = 'image' data['image'] = { 'ref': img.get('binaryItemIDRef'), 'effect': img.get('effect') } # 단색 배경 win_brush = bf.find('.//hc:winBrush', self.NS) if win_brush is not None: face_color = self._normalize_color(win_brush.get('faceColor')) if face_color and face_color != 'none': if data['type'] == 'empty': data['type'] = 'solid' data['background'] = face_color if face_color not in result['colors']['background']: result['colors']['background'].append(face_color) # 4방향 테두리 for side in ['top', 'bottom', 'left', 'right']: border = bf.find(f'hh:{side}Border', self.NS) if border is not None: border_type = border.get('type', 'NONE') width = border.get('width', '0.1 mm') color = self._normalize_color(border.get('color', '#000000')) data['borders'][side] = { 'type': border_type, 'type_name': LINE_TYPES.get(border_type, border_type), 'width': width, 'width_mm': self._parse_width(width), 'color': color } # 보이는 테두리만 색상 수집 if border_type != 'NONE': if data['type'] == 'empty': data['type'] = 'border_only' if color and color not in result['colors']['border']: result['colors']['border'].append(color) # 특수 테두리 수집 if border_type not in ['SOLID', 'NONE']: result['special_borders'].append({ 'bf_id': bf_id, 'side': side, 'type': border_type, 'type_name': LINE_TYPES.get(border_type, border_type), 'width': width, 'color': color }) return data def _parse_section(self, section_path: Path, result: Dict): """section0.xml 파싱 - 표 구조""" tree = ET.parse(section_path) root = tree.getroot() border_fills = result['border_fills'] for tbl in root.findall('.//{http://www.hancom.co.kr/hwpml/2011/paragraph}tbl'): table_data = { 'rows': int(tbl.get('rowCnt', 0)), 'cols': int(tbl.get('colCnt', 0)), 'cells': [], 'structure': { 'header_row_style': None, 'first_col_style': None, 'body_style': None, 'has_image_cells': False } } # 셀별 분석 cell_by_position = {} for tc in tbl.findall('.//{http://www.hancom.co.kr/hwpml/2011/paragraph}tc'): cell_addr = tc.find('{http://www.hancom.co.kr/hwpml/2011/paragraph}cellAddr') if cell_addr is None: continue row = int(cell_addr.get('rowAddr', 0)) col = int(cell_addr.get('colAddr', 0)) bf_id = tc.get('borderFillIDRef') bf_info = border_fills.get(bf_id, {}) # 텍스트 추출 text = '' for t in tc.findall('.//{http://www.hancom.co.kr/hwpml/2011/paragraph}t'): if t.text: text += t.text cell_data = { 'row': row, 'col': col, 'bf_id': bf_id, 'bf_type': bf_info.get('type'), 'background': bf_info.get('background'), 'image': bf_info.get('image'), 'text_preview': text[:30] if text else '' } table_data['cells'].append(cell_data) cell_by_position[(row, col)] = cell_data if bf_info.get('type') == 'image': table_data['structure']['has_image_cells'] = True # 구조 분석: 헤더행, 첫열 스타일 self._analyze_table_structure(table_data, cell_by_position, border_fills) result['tables'].append(table_data) def _analyze_table_structure(self, table_data: Dict, cells: Dict, border_fills: Dict): """표 구조 분석 - 헤더행/첫열 스타일 파악""" rows = table_data['rows'] cols = table_data['cols'] if rows == 0 or cols == 0: return # 첫 행 (헤더) 분석 header_styles = [] for c in range(cols): cell = cells.get((0, c)) if cell: header_styles.append(cell.get('bf_id')) if header_styles: # 가장 많이 쓰인 스타일 most_common = Counter(header_styles).most_common(1) if most_common: bf_id = most_common[0][0] bf = border_fills.get(bf_id) if bf and bf.get('background'): table_data['structure']['header_row_style'] = { 'bf_id': bf_id, 'background': bf.get('background'), 'borders': bf.get('borders', {}) } # 첫 열 분석 (행 1부터) first_col_styles = [] for r in range(1, rows): cell = cells.get((r, 0)) if cell: first_col_styles.append(cell.get('bf_id')) if first_col_styles: most_common = Counter(first_col_styles).most_common(1) if most_common: bf_id = most_common[0][0] bf = border_fills.get(bf_id) if bf and bf.get('background'): table_data['structure']['first_col_style'] = { 'bf_id': bf_id, 'background': bf.get('background') } # 본문 셀 스타일 (첫열 제외) body_styles = [] for r in range(1, rows): for c in range(1, cols): cell = cells.get((r, c)) if cell: body_styles.append(cell.get('bf_id')) if body_styles: most_common = Counter(body_styles).most_common(1) if most_common: bf_id = most_common[0][0] bf = border_fills.get(bf_id) table_data['structure']['body_style'] = { 'bf_id': bf_id, 'background': bf.get('background') if bf else None } def _create_style_summary(self, result: Dict) -> Dict: """AI 프롬프트용 스타일 요약""" summary = { '폰트': list(result['fonts'].values())[:3], '색상': { '배경색': result['colors']['background'], '테두리색': result['colors']['border'] }, '표_스타일': [], '특수_테두리': [] } # 표별 스타일 요약 for i, tbl in enumerate(result['tables']): tbl_summary = { '표번호': i + 1, '크기': f"{tbl['rows']}행 × {tbl['cols']}열", '이미지셀': tbl['structure']['has_image_cells'] } header = tbl['structure'].get('header_row_style') if header: tbl_summary['헤더행'] = f"배경={header.get('background')}" first_col = tbl['structure'].get('first_col_style') if first_col: tbl_summary['첫열'] = f"배경={first_col.get('background')}" body = tbl['structure'].get('body_style') if body: tbl_summary['본문'] = f"배경={body.get('background') or '없음'}" summary['표_스타일'].append(tbl_summary) # 특수 테두리 요약 seen = set() for sb in result['special_borders']: key = f"{sb['type_name']} {sb['width']} {sb['color']}" if key not in seen: seen.add(key) summary['특수_테두리'].append(key) return summary def _generate_css(self, result: Dict) -> str: """CSS 생성 - 실제 구조 반영""" fonts = list(result['fonts'].values())[:2] font_family = f"'{fonts[0]}'" if fonts else "'맑은 고딕'" bg_colors = result['colors']['background'] header_bg = bg_colors[0] if bg_colors else '#D6D6D6' # 특수 테두리에서 2중선 찾기 double_border = None for sb in result['special_borders']: if 'DOUBLE' in sb['type']: double_border = sb break css = f"""/* 템플릿 스타일 v3 - HWPX 구조 기반 */ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700&display=swap'); :root {{ --font-primary: 'Noto Sans KR', {font_family}, sans-serif; --color-header-bg: {header_bg}; --color-border: #000000; }} body {{ font-family: var(--font-primary); font-size: 10pt; line-height: 1.6; color: #000000; }} .sheet {{ width: 210mm; min-height: 297mm; padding: 20mm; margin: 10px auto; background: white; box-shadow: 0 0 10px rgba(0,0,0,0.1); }} @media print {{ .sheet {{ margin: 0; box-shadow: none; page-break-after: always; }} }} /* 표 기본 */ table {{ width: 100%; border-collapse: collapse; margin: 1em 0; font-size: 9pt; }} th, td {{ border: 0.12mm solid var(--color-border); padding: 6px 8px; vertical-align: middle; }} /* 헤더 행 */ thead th, tr:first-child th, tr:first-child td {{ background-color: var(--color-header-bg); font-weight: bold; text-align: center; }} /* 첫 열 (구분 열) - 배경색 */ td:first-child {{ background-color: var(--color-header-bg); text-align: center; font-weight: 500; }} /* 본문 셀 - 배경 없음 */ td:not(:first-child) {{ background-color: transparent; }} /* 2중선 테두리 (헤더 하단) */ thead tr:last-child th, thead tr:last-child td, tr:first-child th, tr:first-child td {{ border-bottom: 0.5mm double var(--color-border); }} """ return css # ========================================================================= # 유틸리티 # ========================================================================= def _normalize_color(self, color: str) -> str: """ARGB 8자리 → RGB 6자리""" if not color or color == 'none': return color color = color.strip() # #AARRGGBB → #RRGGBB if color.startswith('#') and len(color) == 9: return '#' + color[3:] return color def _parse_width(self, width_str: str) -> float: """너비 문자열 → mm""" if not width_str: return 0.1 try: return float(width_str.split()[0]) except: return 0.1 def _extract_features(self, data: Dict) -> List[str]: """특징 목록""" features = [] fonts = list(data.get('fonts', {}).values()) if fonts: features.append(f"폰트: {', '.join(fonts[:2])}") bg_colors = data.get('colors', {}).get('background', []) if bg_colors: features.append(f"배경색: {', '.join(bg_colors[:2])}") tables = data.get('tables', []) if tables: has_img = any(t['structure']['has_image_cells'] for t in tables) if has_img: features.append("이미지 배경 셀") special = data.get('special_borders', []) if special: types = set(s['type_name'] for s in special) features.append(f"특수 테두리: {', '.join(list(types)[:2])}") return features if features else ['기본 템플릿'] def _analyze_fallback(self, ext: str) -> Dict: """HWP, PDF 기본 분석""" return { 'version': 'v3', 'fonts': {'0': '맑은 고딕'}, 'colors': {'background': [], 'border': ['#000000'], 'text': ['#000000']}, 'border_fills': {}, 'tables': [], 'special_borders': [], 'style_summary': { '폰트': ['맑은 고딕'], '색상': {'배경색': [], '테두리색': ['#000000']}, '표_스타일': [], '특수_테두리': [] }, 'css': self._get_default_css(), 'note': f'{ext} 파일은 기본 분석만 지원. HWPX 권장.' } def _get_default_css(self) -> str: return """/* 기본 스타일 */ @import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700&display=swap'); body { font-family: 'Noto Sans KR', sans-serif; font-size: 10pt; } .sheet { width: 210mm; min-height: 297mm; padding: 20mm; margin: 10px auto; background: white; } table { width: 100%; border-collapse: collapse; } th, td { border: 0.5pt solid #000; padding: 8px; } th { background: #D6D6D6; } """