Files
test/handlers/tools/font.py

82 lines
2.2 KiB
Python

# -*- coding: utf-8 -*-
"""
§3 글꼴(FaceName) 추출
HWPX 실제 태그 (header.xml):
<hh:fontface lang="HANGUL" fontCnt="9">
<hh:font id="0" face="돋움" type="TTF" isEmbedded="0">
<hh:font id="1" face="맑은 고딕" type="TTF" isEmbedded="0">
</hh:fontface>
<hh:fontface lang="LATIN" fontCnt="9">
<hh:font id="0" face="돋움" type="TTF" isEmbedded="0">
</hh:fontface>
디폴트값 생성 안 함. 추출 실패 시 None 반환.
"""
import re
def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
"""§3 fontface에서 언어별 글꼴 정의 추출.
Returns:
{
"HANGUL": [{"id": 0, "face": "돋움", "type": "TTF"}, ...],
"LATIN": [{"id": 0, "face": "돋움", "type": "TTF"}, ...],
"HANJA": [...],
...
}
또는 추출 실패 시 None
"""
header_xml = _get_header_xml(raw_xml, parsed)
if not header_xml:
return None
result = {}
# fontface 블록을 lang별로 추출
fontface_blocks = re.findall(
r'<hh:fontface\b[^>]*\blang="([^"]+)"[^>]*>(.*?)</hh:fontface>',
header_xml, re.DOTALL
)
if not fontface_blocks:
return None
for lang, block_content in fontface_blocks:
fonts = []
font_matches = re.finditer(
r'<hh:font\b[^>]*'
r'\bid="(\d+)"[^>]*'
r'\bface="([^"]+)"[^>]*'
r'\btype="([^"]+)"',
block_content
)
for fm in font_matches:
fonts.append({
"id": int(fm.group(1)),
"face": fm.group(2),
"type": fm.group(3),
})
if fonts:
result[lang] = fonts
return result if result else None
def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
"""header.xml 문자열을 가져온다."""
if parsed and parsed.get("header_xml"):
return parsed["header_xml"]
if isinstance(raw_xml, dict):
for name, content in raw_xml.items():
if "header" in name.lower() and isinstance(content, str):
return content
if isinstance(raw_xml, str):
return raw_xml
return None