# -*- coding: utf-8 -*- """ 스타일 정의(Style) 추출 HWPX 실제 태그 (header.xml): charPrIDRef → charPr(글자모양), paraPrIDRef → paraPr(문단모양) 연결. 디폴트값 생성 안 함. """ import re def extract(raw_xml: dict, parsed: dict = None) -> list | None: """스타일 정의 추출. Returns: [ { "id": 0, "type": "PARA", "name": "바탕글", "engName": "Normal", "paraPrIDRef": 3, "charPrIDRef": 0, "nextStyleIDRef": 0, }, ... ] """ header_xml = _get_header_xml(raw_xml, parsed) if not header_xml: return None styles = re.findall(r'', header_xml) if not styles: return None result = [] for s in styles: item = {} for attr in ["id", "paraPrIDRef", "charPrIDRef", "nextStyleIDRef"]: m = re.search(rf'\b{attr}="(\d+)"', s) if m: item[attr] = int(m.group(1)) for attr in ["type", "name", "engName"]: m = re.search(rf'\b{attr}="([^"]*)"', s) if m: item[attr] = m.group(1) result.append(item) return result if result else None def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None: if parsed and parsed.get("header_xml"): return parsed["header_xml"] if isinstance(raw_xml, dict): for name, content in raw_xml.items(): if "header" in name.lower() and isinstance(content, str): return content return raw_xml if isinstance(raw_xml, str) else None