v8:문서유형 분석등록 및 추출_20260206
This commit is contained in:
127
handlers/tools/border_fill.py
Normal file
127
handlers/tools/border_fill.py
Normal file
@@ -0,0 +1,127 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
§2 테두리/배경(BorderFill) 추출
|
||||
|
||||
HWPX 실제 태그 (header.xml):
|
||||
<hh:borderFill id="3" threeD="0" shadow="0" centerLine="NONE" ...>
|
||||
<hh:leftBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
||||
<hh:rightBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
||||
<hh:topBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
||||
<hh:bottomBorder type="SOLID" width="0.12 mm" color="#000000"/>
|
||||
<hh:diagonal type="SOLID" width="0.1 mm" color="#000000"/>
|
||||
<hc:fillBrush>
|
||||
<hc:winBrush faceColor="#EDEDED" hatchColor="#FFE7E7E7" alpha="0"/>
|
||||
</hc:fillBrush>
|
||||
</hh:borderFill>
|
||||
|
||||
디폴트값 생성 안 함.
|
||||
"""
|
||||
|
||||
import re
|
||||
|
||||
from domain.hwpx.hwpx_utils import BORDER_TYPE_TO_CSS, hwpx_border_to_css
|
||||
|
||||
|
||||
def extract(raw_xml: dict, parsed: dict = None) -> dict | None:
|
||||
"""§2 borderFill 전체 추출 → id별 dict.
|
||||
|
||||
Returns:
|
||||
{
|
||||
3: {
|
||||
"id": 3,
|
||||
"left": {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
|
||||
"right": {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
|
||||
"top": {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
|
||||
"bottom": {"type": "SOLID", "width": "0.12 mm", "color": "#000000"},
|
||||
"diagonal": {"type": "SOLID", "width": "0.1 mm", "color": "#000000"},
|
||||
"background": "#EDEDED", # fillBrush faceColor
|
||||
"css": { # 편의: 미리 변환된 CSS
|
||||
"border-left": "0.12mm solid #000000",
|
||||
...
|
||||
"background-color": "#EDEDED",
|
||||
}
|
||||
},
|
||||
...
|
||||
}
|
||||
또는 추출 실패 시 None
|
||||
"""
|
||||
header_xml = _get_header_xml(raw_xml, parsed)
|
||||
if not header_xml:
|
||||
return None
|
||||
|
||||
blocks = re.findall(
|
||||
r'<hh:borderFill\b([^>]*)>(.*?)</hh:borderFill>',
|
||||
header_xml, re.DOTALL
|
||||
)
|
||||
|
||||
if not blocks:
|
||||
return None
|
||||
|
||||
result = {}
|
||||
for attrs_str, inner in blocks:
|
||||
id_m = re.search(r'\bid="(\d+)"', attrs_str)
|
||||
if not id_m:
|
||||
continue
|
||||
bf_id = int(id_m.group(1))
|
||||
|
||||
item = {"id": bf_id}
|
||||
|
||||
# 4방향 + diagonal
|
||||
for side, tag in [
|
||||
("left", "leftBorder"),
|
||||
("right", "rightBorder"),
|
||||
("top", "topBorder"),
|
||||
("bottom", "bottomBorder"),
|
||||
("diagonal", "diagonal"),
|
||||
]:
|
||||
# 태그 전체를 먼저 찾고, 속성을 개별 추출 (순서 무관)
|
||||
tag_m = re.search(rf'<hh:{tag}\b([^/]*?)/?>', inner)
|
||||
if tag_m:
|
||||
tag_attrs = tag_m.group(1)
|
||||
t = re.search(r'\btype="([^"]+)"', tag_attrs)
|
||||
w = re.search(r'\bwidth="([^"]+)"', tag_attrs)
|
||||
c = re.search(r'\bcolor="([^"]+)"', tag_attrs)
|
||||
item[side] = {
|
||||
"type": t.group(1) if t else "NONE",
|
||||
"width": w.group(1).replace(" ", "") if w else "0.12mm",
|
||||
"color": c.group(1) if c else "#000000",
|
||||
}
|
||||
|
||||
# 배경 (fillBrush > winBrush faceColor)
|
||||
bg_m = re.search(
|
||||
r'<hc:winBrush\b[^>]*\bfaceColor="([^"]+)"', inner
|
||||
)
|
||||
if bg_m:
|
||||
face = bg_m.group(1)
|
||||
if face and face.lower() != "none":
|
||||
item["background"] = face
|
||||
|
||||
# CSS 편의 변환
|
||||
css = {}
|
||||
for side in ["left", "right", "top", "bottom"]:
|
||||
border_data = item.get(side)
|
||||
if border_data:
|
||||
css[f"border-{side}"] = hwpx_border_to_css(border_data)
|
||||
else:
|
||||
css[f"border-{side}"] = "none"
|
||||
# border_data가 없으면 CSS에도 넣지 않음
|
||||
|
||||
if "background" in item:
|
||||
css["background-color"] = item["background"]
|
||||
|
||||
if css:
|
||||
item["css"] = css
|
||||
|
||||
result[bf_id] = item
|
||||
|
||||
return result if result else None
|
||||
|
||||
|
||||
def _get_header_xml(raw_xml: dict, parsed: dict = None) -> str | None:
|
||||
if parsed and parsed.get("header_xml"):
|
||||
return parsed["header_xml"]
|
||||
if isinstance(raw_xml, dict):
|
||||
for name, content in raw_xml.items():
|
||||
if "header" in name.lower() and isinstance(content, str):
|
||||
return content
|
||||
return raw_xml if isinstance(raw_xml, str) else None
|
||||
Reference in New Issue
Block a user