# -*- coding: utf-8 -*- """ §9 구역 정의(Section) 추출 HWPX 실제 태그 (section0.xml): 디폴트값 생성 안 함. """ import re def extract(raw_xml: dict, parsed: dict = None) -> dict | None: """§9 구역 속성 추출. Returns: { "textDirection": "HORIZONTAL", "hideFirstHeader": False, "hideFirstFooter": False, "pageNum": {"pos": "BOTTOM_CENTER", "formatType": "DIGIT", "sideChar": "-"}, "startNum": {"page": 0}, "colDef": None, } """ section_xml = _get_section_xml(raw_xml, parsed) if not section_xml: return None sec_match = re.search( r']*)>(.*?)', section_xml, re.DOTALL ) if not sec_match: return None attrs_str = sec_match.group(1) inner = sec_match.group(2) result = {} # textDirection td = re.search(r'\btextDirection="([^"]+)"', attrs_str) if td: result["textDirection"] = td.group(1) # visibility vis = re.search(r'', inner) if vis: v = vis.group(1) for attr in ["hideFirstHeader", "hideFirstFooter", "hideFirstMasterPage", "hideFirstPageNum", "hideFirstEmptyLine"]: m = re.search(rf'\b{attr}="(\d+)"', v) if m: result[attr] = bool(int(m.group(1))) # startNum sn = re.search(r'', inner) if sn: sns = sn.group(1) start = {} pso = re.search(r'\bpageStartsOn="([^"]+)"', sns) if pso: start["pageStartsOn"] = pso.group(1) pg = re.search(r'\bpage="(\d+)"', sns) if pg: start["page"] = int(pg.group(1)) if start: result["startNum"] = start # pageNum pn = re.search(r'', inner) if pn: pns = pn.group(1) pagenum = {} for attr in ["pos", "formatType", "sideChar"]: m = re.search(rf'\b{attr}="([^"]*)"', pns) if m: pagenum[attr] = m.group(1) if pagenum: result["pageNum"] = pagenum # colDef (단 설정) cd = re.search(r']*)>(.*?)', inner, re.DOTALL) if cd: cds = cd.group(1) coldef = {} cnt = re.search(r'\bcount="(\d+)"', cds) if cnt: coldef["count"] = int(cnt.group(1)) layout = re.search(r'\blayout="([^"]+)"', cds) if layout: coldef["layout"] = layout.group(1) if coldef: result["colDef"] = coldef return result if result else None def _get_section_xml(raw_xml: dict, parsed: dict = None) -> str | None: if parsed and parsed.get("section_xml"): return parsed["section_xml"] if isinstance(raw_xml, dict): for name, content in raw_xml.items(): if "section" in name.lower() and isinstance(content, str): return content return raw_xml if isinstance(raw_xml, str) else None