From 62e9cd54d015e4cf98255a591d766dfdf1bb18ab Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=EB=AF=BC?= Date: Thu, 19 Mar 2026 09:02:25 +0900 Subject: [PATCH] Upload hwpx_generator.py --- 03.Code/업로드용/converters/hwpx_generator.py | 468 ++++++++++++++++++ 1 file changed, 468 insertions(+) create mode 100644 03.Code/업로드용/converters/hwpx_generator.py diff --git a/03.Code/업로드용/converters/hwpx_generator.py b/03.Code/업로드용/converters/hwpx_generator.py new file mode 100644 index 0000000..053888e --- /dev/null +++ b/03.Code/업로드용/converters/hwpx_generator.py @@ -0,0 +1,468 @@ +""" +HWPX + 깃린 +StyleAnalyzer 寃곌낵瑜 諛 + ㅽ⑸ HWPX +import os +import zipfile +import xml.etree.ElementTree as ET +from typing import List, Dict, Optional +from dataclasses import dataclass +from pathlib import Path + +from style_analyzer import StyleAnalyzer, StyledElement +from hwp_style_mapping import HwpStyleMapper, HwpStyle, ROLE_TO_STYLE_NAME + + +@dataclass +class HwpxConfig: + """HWPX + ㅼ """ + paper_width: int = 59528 # A4 + 鍮 + (hwpunit, 1/7200 inch) + paper_height: int = 84188 # A4 蹂몄шщ" + default_font_size: int = 1000 # 10pt (hwpunit) + + + +class HwpxGenerator: + """HWPX + 깃린""" + + def __init__(self, config: Optional[HwpxConfig] = None): + self.config = config or HwpxConfig() + self.mapper = HwpStyleMapper() + self.used_styles: set = set() + + def generate(self, elements: List[StyledElement], output_path: str) -> str: + """ + StyledElement 由ъㅽ몃遺 + HWPX + + + Args: + elements: StyleAnalyzer濡 遺 + 瑜 由ъㅽ + output_path: 異 (.hwpx) + + Returns: + + 깅 + """ + # ъ⑸ ㅽ + self.used_styles = {e.role for e in elements} + + # + 대 + + temp_dir = Path(output_path).with_suffix('.temp') + temp_dir.mkdir(parents=True, exist_ok=True) + + try: + # HWPX 援ъ“ + + self._create_mimetype(temp_dir) + self._create_meta_inf(temp_dir) + self._create_version(temp_dir) + self._create_header(temp_dir) + self._create_content(temp_dir, elements) + self._create_settings(temp_dir) + + # ZIP쇰 異 + self._create_hwpx(temp_dir, output_path) + + return output_path + + finally: + # + 由 + import shutil + if temp_dir.exists(): + shutil.rmtree(temp_dir) + + def _create_mimetype(self, temp_dir: Path): + """mimetype + """ + mimetype_path = temp_dir / "mimetype" + mimetype_path.write_text("application/hwp+zip") + + def _create_meta_inf(self, temp_dir: Path): + """META-INF/manifest.xml + """ + meta_dir = temp_dir / "META-INF" + meta_dir.mkdir(exist_ok=True) + + manifest = """ + + + + + + +""" + + (meta_dir / "manifest.xml").write_text(manifest, encoding='utf-8') + + def _create_version(self, temp_dir: Path): + """version.xml + """ + version = """ +""" + + (temp_dir / "version.xml").write_text(version, encoding='utf-8') + + def _create_header(self, temp_dir: Path): + """Contents/header.xml + (ㅽы )""" + contents_dir = temp_dir / "Contents" + contents_dir.mkdir(exist_ok=True) + + # ㅽ + + + + char_props_xml = self._generate_char_properties() + para_props_xml = self._generate_para_properties() + styles_xml = self._generate_styles_xml() + + header = f""" + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +{char_props_xml} +{para_props_xml} +{styles_xml} + + + + + +""" + + (contents_dir / "header.xml").write_text(header, encoding='utf-8') + + def _generate_char_properties(self) -> str: + """湲 + XML + """ + lines = [f' '] + + # 湲곕낯 湲 + (id=0) + lines.append(''' + + + + + + + + + + ''') + + # 湲 + + for idx, role in enumerate(sorted(self.used_styles), start=1): + style = self.mapper.get_style(role) + height = int(style.font_size * 100) # pt hwpunit + color = style.font_color.lstrip('#') + font_id = "1" if style.font_bold else "0" # 援듦 硫 蹂몄шщ + + lines.append(f''' + + + + + + + + + + ''') + + lines.append(' ') + return '\n'.join(lines) + + def _generate_para_properties(self) -> str: + """臾몃 + XML + """ + lines = [f' '] + + # 湲곕낯 臾몃 + (id=0) + lines.append(''' + + + + + + + + + + + + + + + + ''') + + # 臾몃 + + align_map = {"left": "LEFT", "center": "CENTER", "right": "RIGHT", "justify": "JUSTIFY"} + + for idx, role in enumerate(sorted(self.used_styles), start=1): + style = self.mapper.get_style(role) + align_val = align_map.get(style.align, "JUSTIFY") + line_spacing = int(style.line_spacing) + left_margin = int(style.indent_left * 100) + indent = int(style.indent_first * 100) + space_before = int(style.space_before * 100) + space_after = int(style.space_after * 100) + + lines.append(f''' + + + + + + + + + + + + + + + + ''') + + lines.append(' ') + return '\n'.join(lines) + + def _generate_styles_xml(self) -> str: + """ㅽ + (charPrIDRef, paraPrIDRef 李몄“)""" + lines = [f' '] + + # 湲곕낯 ㅽ湲) + lines.append(' ') + + # ㅽ + (蹂몃Ц + ㅽ XML + + paragraphs = [] + current_table = None + + # ㅽ깆 留ㅽ + + role_to_idx = {role: idx for idx, role in enumerate(sorted(self.used_styles), start=1)} + + for elem in elements: + style = self.mapper.get_style(elem.role) + style_idx = role_to_idx.get(elem.role, 0) + + # 뱀 + if elem.role in ["TH", "TD", "TABLE_CAPTION", "TABLE", "FIGURE"]: + continue # /洹몃┝ 蹂 + + 泥 + + # 몃 + para_xml = self._create_paragraph(elem.text, style, style_idx) + paragraphs.append(para_xml) + + section = f""" + +{"".join(paragraphs)} +""" + + (contents_dir / "section0.xml").write_text(section, encoding='utf-8') + + def _create_paragraph(self, text: str, style: HwpStyle, style_idx: int) -> str: + """ XML + """ + text = self._escape_xml(text) + + return f''' + + + {text} + + ''' + + def _escape_xml(self, text: str) -> str: + """XML 뱀몄ㅼ + """ + return (text + .replace("&", "&") + .replace("<", "<") + .replace(">", ">") + .replace('"', """) + .replace("'", "'")) + + def _create_settings(self, temp_dir: Path): + """settings.xml + """ + settings = """ + + + + + +""" + + (temp_dir / "settings.xml").write_text(settings, encoding='utf-8') + + def _create_hwpx(self, temp_dir: Path, output_path: str): + """HWPX + (ZIP 異)""" + with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf: + # mimetype 異 泥 踰吏몃 + mimetype_path = temp_dir / "mimetype" + zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED) + + # 몄 + for root, dirs, files in os.walk(temp_dir): + for file in files: + if file == "mimetype": + continue + file_path = Path(root) / file + arcname = file_path.relative_to(temp_dir) + zf.write(file_path, arcname) + + +def convert_html_to_hwpx(html: str, output_path: str) -> str: + """ + HTML HWPX 蹂몄 + output_path: 異 + + Returns: + + 깅 + """ + # 1. HTML 遺 + + 遺 + 猷: {len(elements)}媛 ") + for role, count in analyzer.get_role_summary().items(): + print(f" {role}: {count}") + + # 2. HWPX + + generator = HwpxGenerator() + result_path = generator.generate(elements, output_path) + + print(f" + 猷: {result_path}") + return result_path + + +if __name__ == "__main__": + # +ㅽ + test_html = """ + + +
+

嫄댁 + ㅒ룻紐 痢〓 DX ㅻТ吏移

+

濡 /UAV쨌GIS쨌吏 /吏諛⑤ 湲곕 + + 1

+
+ +

1. 媛

+

蹂 蹂닿 + 嫄댁 + 諛 紐 遺 + 쇱〓 吏 + ㅻТ 吏移 + 怨듯⑸.

+ +

1.1 諛곌꼍

+

理洹 濡怨 GIS 湲곗 + 쇰 痢〓 +臾닿 ш 蹂 듬.

+ +

1.1.1 湲곗

+

1) 濡 痢〓 +

+

濡 + ⑺ 痢〓 湲곗〈 諛⑹鍮 + 깆ш 듬.

+ +

(1) RTK 濡

+

ㅼ媛 + 蹂댁 湲곕μ + 媛異濡 듬.

+ +
    +
  • 怨諛 GPS 湲 댁
  • +
  • + 쇳곕 + ㅼ + +
  • +
+ + + """ + + output = "/home/claude/test_output.hwpx" + convert_html_to_hwpx(test_html, output)