# -*- coding: utf-8 -*- """ §3 글꼴(FaceName) 추출 HWPX 실제 태그 (header.xml): 디폴트값 생성 안 함. 추출 실패 시 None 반환. """ import re def extract(raw_xml: dict, parsed: dict = None) -> dict | None: """§3 fontface에서 언어별 글꼴 정의 추출. Returns: { "HANGUL": [{"id": 0, "face": "돋움", "type": "TTF"}, ...], "LATIN": [{"id": 0, "face": "돋움", "type": "TTF"}, ...], "HANJA": [...], ... } 또는 추출 실패 시 None """ header_xml = _get_header_xml(raw_xml, parsed) if not header_xml: return None result = {} # fontface 블록을 lang별로 추출 fontface_blocks = re.findall( r']*\blang="([^"]+)"[^>]*>(.*?)', header_xml, re.DOTALL ) if not fontface_blocks: return None for lang, block_content in fontface_blocks: fonts = [] font_matches = re.finditer( r']*' r'\bid="(\d+)"[^>]*' r'\bface="([^"]+)"[^>]*' r'\btype="([^"]+)"', block_content ) for fm in font_matches: fonts.append({ "id": int(fm.group(1)), "face": fm.group(2), "type": fm.group(3), }) if fonts: result[lang] = fonts return result if result else None def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None: """header.xml 문자열을 가져온다.""" if parsed and parsed.get("header_xml"): return parsed["header_xml"] if isinstance(raw_xml, dict): for name, content in raw_xml.items(): if "header" in name.lower() and isinstance(content, str): return content if isinstance(raw_xml, str): return raw_xml return None