']
# 湲곕낯 ㅽ湲)
lines.append(' ')
# ㅽ
(蹂몃Ц + ㅽ XML
paragraphs = []
current_table = None
# ㅽ깆 留ㅽ
role_to_idx = {role: idx for idx, role in enumerate(sorted(self.used_styles), start=1)}
for elem in elements:
style = self.mapper.get_style(elem.role)
style_idx = role_to_idx.get(elem.role, 0)
# 뱀
if elem.role in ["TH", "TD", "TABLE_CAPTION", "TABLE", "FIGURE"]:
continue # /洹몃┝ 蹂
泥
# 몃
para_xml = self._create_paragraph(elem.text, style, style_idx)
paragraphs.append(para_xml)
section = f"""
{"".join(paragraphs)}
"""
(contents_dir / "section0.xml").write_text(section, encoding='utf-8')
def _create_paragraph(self, text: str, style: HwpStyle, style_idx: int) -> str:
""" XML
"""
text = self._escape_xml(text)
return f'''
{text}
'''
def _escape_xml(self, text: str) -> str:
"""XML 뱀몄ㅼ
"""
return (text
.replace("&", "&")
.replace("<", "<")
.replace(">", ">")
.replace('"', """)
.replace("'", "'"))
def _create_settings(self, temp_dir: Path):
"""settings.xml
"""
settings = """
"""
(temp_dir / "settings.xml").write_text(settings, encoding='utf-8')
def _create_hwpx(self, temp_dir: Path, output_path: str):
"""HWPX
(ZIP 異)"""
with zipfile.ZipFile(output_path, 'w', zipfile.ZIP_DEFLATED) as zf:
# mimetype 異 泥 踰吏몃
mimetype_path = temp_dir / "mimetype"
zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)
# 몄
for root, dirs, files in os.walk(temp_dir):
for file in files:
if file == "mimetype":
continue
file_path = Path(root) / file
arcname = file_path.relative_to(temp_dir)
zf.write(file_path, arcname)
def convert_html_to_hwpx(html: str, output_path: str) -> str:
"""
HTML HWPX 蹂몄
output_path: 異
Returns:
깅
"""
# 1. HTML 遺
遺
猷: {len(elements)}媛 ")
for role, count in analyzer.get_role_summary().items():
print(f" {role}: {count}")
# 2. HWPX
generator = HwpxGenerator()
result_path = generator.generate(elements, output_path)
print(f"
猷: {result_path}")
return result_path
if __name__ == "__main__":
#
ㅽ
test_html = """
嫄댁
ㅒ룻紐 痢〓 DX ㅻТ吏移
濡 /UAV쨌GIS쨌吏 /吏諛⑤ 湲곕
1
1. 媛
蹂 蹂닿
嫄댁
諛 紐 遺
쇱〓 吏
ㅻТ 吏移
怨듯⑸.
1.1 諛곌꼍
理洹 濡怨 GIS 湲곗
쇰 痢〓
臾닿 ш 蹂 듬.
1.1.1 湲곗
1) 濡 痢〓
濡
⑺ 痢〓 湲곗〈 諛⑹鍮
깆ш 듬.
(1) RTK 濡
ㅼ媛
蹂댁 湲곕μ
媛異濡 듬.
"""
output = "/home/claude/test_output.hwpx"
convert_html_to_hwpx(test_html, output)