Files
_Geulbeot/03.Code/업로드용/converters/hwpx_style_injector.py

804 lines
29 KiB
Python

"""
HWPX ㅽ
pyhwpx濡
ы濡: 깅 HWPX ㅼㅽ
1. HWPX 異 댁
2. header.xmlㅼㅽ
ㅽ媛 щ 二쇱
3. section*.xml
styleIDRef 留ㅽ
import os
import re
import zipfile
import shutil
import tempfile
from pathlib import Path
from typing import Dict, List, Optional
from dataclasses import dataclass
@dataclass
class StyleDefinition:
""" (-1=, 0=1, 1=2, ...)
# ㅽ ㅽ
ROLE_STYLES: Dict[str, StyleDefinition] = {
# 媛 臾몃 ( 踰 留ㅺ린湲!)
'H1': StyleDefinition(
id=101, name='1 紐', font_size=2200, font_bold=True,
font_color='#006400', align='CENTER', line_spacing=200,
indent_left=0, indent_first=0, space_before=400, space_after=200,
outline_level=0 # ^1
),
'H2': StyleDefinition(
id=102, name='1.1 紐', font_size=1500, font_bold=True,
font_color='#03581d', align='LEFT', line_spacing=200,
indent_left=0, indent_first=0, space_before=300, space_after=100,
outline_level=1 # ^1.^2
),
'H3': StyleDefinition(
id=103, name='1.1.1 紐', font_size=1400, font_bold=True,
font_color='#228B22', align='LEFT', line_spacing=200,
indent_left=500, indent_first=0, space_before=200, space_after=100,
outline_level=2 # ^1.^2.^3
),
'H4': StyleDefinition(
id=104, name='媛. 紐', font_size=1300, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=200,
indent_left=1000, indent_first=0, space_before=150, space_after=50,
outline_level=3 # ^4.
),
'H5': StyleDefinition(
id=105, name='1) 紐', font_size=1200, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=200,
indent_left=1500, indent_first=0, space_before=100, space_after=50,
outline_level=4 # ^5)
),
'H6': StyleDefinition(
id=106, name='媛) 紐', font_size=1150, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=200,
indent_left=2000, indent_first=0, space_before=100, space_after=50,
outline_level=5 # ^6)
),
'H7': StyleDefinition(
id=115, name='', font_size=1100, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=200,
indent_left=2300, indent_first=0, space_before=100, space_after=50,
outline_level=6 # ^7 (몄몃Ц ㅽ
蹂몃Ц', font_size=1100, font_bold=False,
font_color='#000000', align='JUSTIFY', line_spacing=200,
indent_left=1500, indent_first=0, space_before=0, space_after=0
),
'LIST_ITEM': StyleDefinition(
id=108, name='蹂몃Ц', font_size=1050, font_bold=False,
font_color='#000000', align='JUSTIFY', line_spacing=200,
indent_left=2500, indent_first=0, space_before=0, space_after=0
),
'TABLE_CAPTION': StyleDefinition(
id=109, name='< 紐>', font_size=1100, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=130,
indent_left=0, indent_first=0, space_before=200, space_after=100
),
'FIGURE_CAPTION': StyleDefinition(
id=110, name='<洹몃┝ 紐>', font_size=1100, font_bold=True,
font_color='#000000', align='CENTER', line_spacing=130,
indent_left=0, indent_first=0, space_before=100, space_after=200
),
'COVER_TITLE': StyleDefinition(
id=111, name='吏紐', font_size=2800, font_bold=True,
font_color='#1a365d', align='CENTER', line_spacing=150,
indent_left=0, indent_first=0, space_before=0, space_after=200
),
'COVER_SUBTITLE': StyleDefinition(
id=112, name='吏遺', font_size=1800, font_bold=False,
font_color='#2d3748', align='CENTER', line_spacing=150,
indent_left=0, indent_first=0, space_before=0, space_after=100
),
'TOC_1': StyleDefinition(
id=113, name='紐⑹감1', font_size=1200, font_bold=True,
font_color='#000000', align='LEFT', line_spacing=180,
indent_left=0, indent_first=0, space_before=100, space_after=50
),
'TOC_2': StyleDefinition(
id=114, name='紐⑹감2', font_size=1100, font_bold=False,
font_color='#000000', align='LEFT', line_spacing=180,
indent_left=500, indent_first=0, space_before=0, space_after=0
),
}
# 截 媛 踰 湲곕
!
# idRef="0" numbering id=1
李몄,
援댄
class HwpxStyleInjector:
"""HWPX ㅽ
"""
def __init__(self):
self.temp_dir: Optional[Path] = None
self.role_to_style_id: Dict[str, int] = {}
self.role_to_para_id: Dict[str, int] = {} #
self.role_to_char_id: Dict[str, int] = {} #
self.next_char_id = 0
self.next_para_id = 0
self.next_style_id = 0
def _find_max_ids(self):
""" ㅽ : 諛湲(id=0)留 吏, 몄 곕━ ㅽ 援"""
header_path = self.temp_dir / "Contents" / "header.xml"
if not header_path.exists():
self.next_char_id = 1
self.next_para_id = 1
self.next_style_id = 1
return
content = header_path.read_text(encoding='utf-8')
# 湲곗〈 "蹂몃Ц", "媛 1~10" ㅽ嫄 (id=1~22)
# 諛湲(id=0)留 吏!
# style id=1~30 嫄 (諛湲 )
content = re.sub(r'<hh:style id="([1-9]|[12]\d|30)"[^/]*/>\s*', '', content)
# itemCnt
#
header_path.write_text(content, encoding='utf-8')
print(f" [INFO] 湲곗〈 ㅽ 1~10 ) 嫄
")
# charPr, paraPr 湲곗〈 寃 ㅼ遺
(李몄 源⑥吏
濡ㅽ 1
! (Ctrl+2 = id=1, Ctrl+3 = id=2, ...)
self.next_style_id = 1
def inject(self, hwpx_path: str, role_positions: Dict[str, List[tuple]]) -> str:
"""
HWPX ㅼㅽ
Args:
hwpx_path: HWPX
role_positions: 蹂
移蹂 {role: [(section_idx, para_idx), ...]}
Returns:
HWPX
"""
print(f"\n HWPX ㅽ
...")
print(f"
: {hwpx_path}")
# 1.
self.temp_dir = Path(tempfile.mkdtemp(prefix='hwpx_inject_'))
print(f" 대 : {self.temp_dir}")
try:
with zipfile.ZipFile(hwpx_path, 'r') as zf:
zf.extractall(self.temp_dir)
# 異 댁 吏
section ш린 湲곗 ID 李얘린 ( ID
)
self._find_max_ids()
print(f" [DEBUG] Starting IDs: char={self.next_char_id}, para={self.next_para_id}, style={self.next_style_id}")
# 2. header.xmlㅽ媛
used_roles = set(role_positions.keys())
self._inject_header_styles(used_roles)
# 3. section*.xml
self._inject_section_styles(role_positions)
# 4. ㅼ 異
output_path = hwpx_path # 댁곌린
self._repack_hwpx(output_path)
print(f"
: {output_path}")
return output_path
finally:
#
if self.temp_dir and self.temp_dir.exists():
shutil.rmtree(self.temp_dir)
def _inject_header_styles(self, used_roles: set):
"""header.xmlㅽ媛 (紐⑤ ROLE_STYLES 二쇱
)"""
header_path = self.temp_dir / "Contents" / "header.xml"
if not header_path.exists():
print(" [寃쎄 ] header.xml ")
return
content = header_path.read_text(encoding='utf-8')
# 紐⑤ ROLE_STYLES 二쇱
(used_roles 臾댁)
char_props = []
para_props = []
styles = []
for role, style_def in ROLE_STYLES.items():
char_id = self.next_char_id
para_id = self.next_para_id
style_id = self.next_style_id
self.role_to_style_id[role] = style_id
self.role_to_para_id[role] = para_id #
self.role_to_char_id[role] = char_id #
# charPr
char_props.append(self._make_char_pr(char_id, style_def))
# paraPr
para_props.append(self._make_para_pr(para_id, style_def))
# style
styles.append(self._make_style(style_id, style_def.name, para_id, char_id))
self.next_char_id += 1
self.next_para_id += 1
self.next_style_id += 1
if not styles:
print(" [ 蹂 ] 二쇱
")
return
# charProperties媛
content = self._insert_before_tag(
content, '</hh:charProperties>', '\n'.join(char_props) + '\n'
)
# paraProperties媛
content = self._insert_before_tag(
content, '</hh:paraProperties>', '\n'.join(para_props) + '\n'
)
# styles媛
content = self._insert_before_tag(
content, '</hh:styles>', '\n'.join(styles) + '\n'
)
# numbering id=1 ⑦
(idRef="0" 湲곕낯 紐⑥)
# 대 媛 踰 1, 1.1, 1.1.1... !
content = self._replace_default_numbering(content)
# itemCnt
content = self._update_item_counts(content)
header_path.write_text(content, encoding='utf-8')
print(f" header.xml
({len(styles)} ㅽ媛)")
def _make_char_pr(self, id: int, style: StyleDefinition) -> str:
"""charPr XML
( 以
!)"""
color = style.font_color.lstrip('#')
font_id = "1" if style.font_bold else "0"
return f'<hh:charPr id="{id}" height="{style.font_size}" textColor="#{color}" shadeColor="none" useFontSpace="0" useKerning="0" symMark="NONE" borderFillIDRef="1"><hh:fontRef hangul="{font_id}" latin="{font_id}" hanja="{font_id}" japanese="{font_id}" other="{font_id}" symbol="{font_id}" user="{font_id}"/><hh:ratio hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/><hh:spacing hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/><hh:relSz hangul="100" latin="100" hanja="100" japanese="100" other="100" symbol="100" user="100"/><hh:offset hangul="0" latin="0" hanja="0" japanese="0" other="0" symbol="0" user="0"/><hh:underline type="NONE" shape="SOLID" color="#000000"/><hh:strikeout shape="NONE" color="#000000"/><hh:outline type="NONE"/><hh:shadow type="NONE" color="#B2B2B2" offsetX="10" offsetY="10"/></hh:charPr>'
def _make_para_pr(self, id: int, style: StyleDefinition) -> str:
"""paraPr XML
( 以
!)"""
# 媛 臾몃
type="NONE"
# idRef="0" numbering id=1 (湲곕낯 踰 紐⑥)
李몄
if style.outline_level >= 0:
heading = f'<hh:heading type="OUTLINE" idRef="0" level="{style.outline_level}"/>'
else:
heading = '<hh:heading type="NONE" idRef="0" level="0"/>'
return f'<hh:paraPr id="{id}" tabPrIDRef="0" condense="0" fontLineHeight="0" snapToGrid="0" suppressLineNumbers="0" checked="0"><hh:align horizontal="{style.align}" vertical="BASELINE"/>{heading}<hh:breakSetting breakLatinWord="KEEP_WORD" breakNonLatinWord="KEEP_WORD" widowOrphan="0" keepWithNext="0" keepLines="0" pageBreakBefore="0" lineWrap="BREAK"/><hh:autoSpacing eAsianEng="0" eAsianNum="0"/><hh:margin><hc:intent value="{style.indent_first}" unit="HWPUNIT"/><hc:left value="{style.indent_left}" unit="HWPUNIT"/><hc:right value="0" unit="HWPUNIT"/><hc:prev value="{style.space_before}" unit="HWPUNIT"/><hc:next value="{style.space_after}" unit="HWPUNIT"/></hh:margin><hh:lineSpacing type="PERCENT" value="{style.line_spacing}" unit="HWPUNIT"/><hh:border borderFillIDRef="1" offsetLeft="0" offsetRight="0" offsetTop="0" offsetBottom="0" connect="0" ignoreMargin="0"/></hh:paraPr>'
def _make_style(self, id: int, name: str, para_id: int, char_id: int) -> str:
"""style XML
"""
safe_name = name.replace('<', '&lt;').replace('>', '&gt;')
return f'<hh:style id="{id}" type="PARA" name="{safe_name}" engName="" paraPrIDRef="{para_id}" charPrIDRef="{char_id}" nextStyleIDRef="{id}" langID="1042" lockForm="0"/>'
def _insert_before_tag(self, content: str, tag: str, insert_text: str) -> str:
""" ㅽ 쎌
"""
return content.replace(tag, insert_text + tag)
def _update_item_counts(self, content: str) -> str:
"""itemCnt
"""
# charProperties itemCnt
char_count = content.count('<hh:charPr ')
content = re.sub(
r'<hh:charProperties itemCnt="(\d+)"',
f'<hh:charProperties itemCnt="{char_count}"',
content
)
# paraProperties itemCnt
para_count = content.count('<hh:paraPr ')
content = re.sub(
r'<hh:paraProperties itemCnt="(\d+)"',
f'<hh:paraProperties itemCnt="{para_count}"',
content
)
# styles itemCnt
style_count = content.count('<hh:style ')
content = re.sub(
r'<hh:styles itemCnt="(\d+)"',
f'<hh:styles itemCnt="{style_count}"',
content
)
# numberings itemCnt
numbering_count = content.count('<hh:numbering ')
content = re.sub(
r'<hh:numberings itemCnt="(\d+)"',
f'<hh:numberings itemCnt="{numbering_count}"',
content
)
return content
def _replace_default_numbering(self, content: str) -> str:
"""numbering id=1 ⑦
곕━ ⑦
댁쇰 援"""
# 곕━媛 媛 踰 ⑦
new_patterns = [
{'level': '1', 'format': 'DIGIT', 'pattern': '1'},
{'level': '2', 'format': 'DIGIT', 'pattern': '^1.^2'},
{'level': '3', 'format': 'DIGIT', 'pattern': '^1.^2.^3'},
{'level': '4', 'format': 'HANGUL_SYLLABLE', 'pattern': '^4.'},
{'level': '5', 'format': 'DIGIT', 'pattern': '^5)'},
{'level': '6', 'format': 'HANGUL_SYLLABLE', 'pattern': '^6)'},
{'level': '7', 'format': 'CIRCLED_DIGIT', 'pattern': '^7'},
]
# numbering id="1" 李얘린
match = re.search(r'(<hh:numbering id="1"[^>]*>)(.*?)(</hh:numbering>)', content, re.DOTALL)
if not match:
print(" [寃쎄 ] numbering id=1 , 援 嫄대
")
return content
numbering_content = match.group(2)
for np in new_patterns:
level = np['level']
fmt = np['format']
pattern = np['pattern']
# 대 level쇰
def replace_parahead(m):
tag = m.group(0)
# numFormat 蹂寃
tag = re.sub(r'numFormat="[^"]*"', f'numFormat="{fmt}"', tag)
# ⑦
( ) 蹂寃
tag = re.sub(r'>([^<]*)</hh:paraHead>', f'>{pattern}</hh:paraHead>', tag)
return tag
numbering_content = re.sub(
rf'<hh:paraHead[^>]*level="{level}"[^>]*>.*?</hh:paraHead>',
replace_parahead,
numbering_content
)
new_content = match.group(1) + numbering_content + match.group(3)
print(" [INFO] numbering id=1 ⑦
(1, ^1.^2, ^1.^2.^3...)")
return content.replace(match.group(0), new_content)
def _adjust_tables(self, content: str) -> str:
"""
ш린 議곗
1. 800 hwpunit ( 댁 諛⑹ )
2.
:
瑜 媛 洹 遺
諛 ( 泥 醫寃)
"""
def adjust_table(match):
tbl = match.group(0)
#
異異
sz_match = re.search(r'<hp:sz width="(\d+)"', tbl)
table_width = int(sz_match.group(1)) if sz_match else 47624
# 媛異
col_match = re.search(r'colCnt="(\d+)"', tbl)
col_cnt = int(col_match.group(1)) if col_match else 4
# 鍮
( 30%, )
first_col_width = int(table_width * 0.25)
other_col_width = (table_width - first_col_width) // (col_cnt - 1) if col_cnt > 1 else table_width
# 媛
min_height = 800 # 8mm
#
ш린 議곗
col_idx = [0] # closure
def adjust_cell_sz(cell_match):
width = int(cell_match.group(1))
height = int(cell_match.group(2))
#
new_height = max(height, min_height)
return f'<hp:cellSz width="{width}" height="{new_height}"/>'
tbl = re.sub(
r'<hp:cellSz width="(\d+)" height="(\d+)"/>',
adjust_cell_sz,
tbl
)
return tbl
return re.sub(r'<hp:tbl[^>]*>.*?</hp:tbl>', adjust_table, content, flags=re.DOTALL)
def _inject_section_styles(self, role_positions: Dict[str, List[tuple]]):
"""section*.xml ( ㅽ 留㼼 諛⑹ 踰
洹 : role_to_style_id 李얘린
section_files = sorted(contents_dir.glob("section*.xml"))
print(f" [DEBUG] section files: {[f.name for f in section_files]}")
total_modified = 0
for section_file in section_files:
print(f" [DEBUG] Processing: {section_file.name}")
original_content = section_file.read_text(encoding='utf-8')
print(f" [DEBUG] File size: {len(original_content)} bytes")
content = original_content #
蹂듭щ낯
# 癒몃━留щ━留 蹂댁〈 (placeholder濡 援 )
header_footer_map = {}
placeholder_idx = 0
def save_header_footer(match):
nonlocal placeholder_idx
key = f"__HF_PLACEHOLDER_{placeholder_idx}__"
header_footer_map[key] = match.group(0)
placeholder_idx += 1
return key
# 癒몃━留щ━留
content = re.sub(r'<hp:header[^>]*>.*?</hp:header>', save_header_footer, content, flags=re.DOTALL)
content = re.sub(r'<hp:footer[^>]*>.*?</hp:footer>', save_header_footer, content, flags=re.DOTALL)
# 紐⑤ <hp:p> ㅼ
ㅽ 異異
para_pattern = r'(<hp:p [^>]*>)(.*?)(</hp:p>)'
section_modified = 0
def replace_style(match):
nonlocal total_modified, section_modified
open_tag = match.group(1)
inner = match.group(2)
close_tag = match.group(3)
# ㅽ 異異 ( 嫄 )
text = re.sub(r'<[^>]+>', '', inner).strip()
if not text:
return match.group(0)
# ㅽ 遺
text_start = text[:50] # 泥 50
matched_role = None
matched_style_id = None
matched_para_id = None
matched_char_id = None
# 紐 ⑦
留㼼 (뱀몄 )
# Unicode: \u25a0 \u25b8 \u25c6 \u25b6 \u25cf \u25cb \u25aa \u25ba
\u2605 \u203b\u00b7
prefix = r'^[\u25a0\u25b8\u25c6\u25b6\u25cf\u25cb\u25aa\u25ba\u261e\u2605\u203b\u00b7\s]*'
# FIGURE_CAPTION: "[洹몃┝ 1-1]", "[洹몃┝ 1-2]" (媛 癒쇱 泥댄 !)
# 洹몃┝ = \uadf8\ub9bc
if re.match(r'^\[\uadf8\ub9bc\s*[\d-]+\]', text_start):
matched_role = 'FIGURE_CAPTION'
# TABLE_CAPTION: "< 1-1>", "[ 1-1]"
# = \ud45c
elif re.match(r'^[<\[]\ud45c\s*[\d-]+[>\]]', text_start):
matched_role = 'TABLE_CAPTION'
# H1: "1", "1 媛 "
elif re.match(prefix + r'\uc81c?\s*\d+\uc7a5?\s', text_start) or re.match(prefix + r'[1-9]\s+[\uac00-\ud7a3]', text_start):
matched_role = 'H1'
# H3: "1.1.1 " (H2蹂대 癒쇱 泥댄 !)
elif re.match(prefix + r'\d+\.\d+\.\d+\s', text_start):
matched_role = 'H3'
# H2: "1.1 "
elif re.match(prefix + r'\d+\.\d+\s', text_start):
matched_role = 'H2'
# H4: "媛. "
elif re.match(prefix + r'[\uac00-\ud7a3]\.\s', text_start):
matched_role = 'H4'
# H5: "1) "
elif re.match(prefix + r'\d+\)\s', text_start):
matched_role = 'H5'
# H6: "(1) " "媛) "
elif re.match(prefix + r'\(\d+\)\s', text_start):
matched_role = 'H6'
elif re.match(prefix + r'[\uac00-\ud7a3]\)\s', text_start):
matched_role = 'H6'
# LIST_ITEM: " ", " ", " "
elif re.match(r'^[\u25cb\u25cf\u25e6\u2022\u2023\u25b8]\s', text_start):
matched_role = 'LIST_ITEM'
elif re.match(r'^[-\u2013\u2014]\s', text_start):
matched_role = 'LIST_ITEM'
# 留㼼 怨 ㅽ 쇰㈃
if matched_role and matched_role in self.role_to_style_id:
matched_style_id = self.role_to_style_id[matched_role]
matched_para_id = self.role_to_para_id[matched_role]
matched_char_id = self.role_to_char_id[matched_role]
elif 'BODY' in self.role_to_style_id and len(text) > 20:
# 湲
ㅽ몃 蹂몃Ц쇰 媛
matched_role = 'BODY'
matched_style_id = self.role_to_style_id['BODY']
matched_para_id = self.role_to_para_id['BODY']
matched_char_id = self.role_to_char_id['BODY']
if matched_style_id:
# 1. hp:p 媛
if 'styleIDRef="' in open_tag:
new_open = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{matched_style_id}"', open_tag)
else:
new_open = open_tag.replace('<hp:p ', f'<hp:p styleIDRef="{matched_style_id}" ')
# 2. hp:p
蹂寃! ( ㅽ
hp:run
蹂寃! ( ㅽ 4. 媛 臾몃 踰 嫄 ( 踰 遺쇰源!)
if matched_role in ROLE_STYLES and ROLE_STYLES[matched_role].outline_level >= 0:
new_inner = self._remove_manual_numbering(new_inner, matched_role)
total_modified += 1
section_modified += 1
return new_open + new_inner + close_tag
return match.group(0)
new_content = re.sub(para_pattern, replace_style, content, flags=re.DOTALL)
#
ш린 議곗
new_content = self._adjust_tables(new_content)
# outlineShapeIDRef瑜 1濡 蹂寃 (곕━媛 援댄 numbering id=1 ъ )
new_content = re.sub(
r'outlineShapeIDRef="[^"]*"',
'outlineShapeIDRef="1"',
new_content
)
# 癒몃━留щ━留듭뱀 깆ㅼ몃 styleIDRef 蹂寃"""
# <hp:p ...> 李얘린
pattern = r'<hp:p\s[^>]*>'
matches = list(re.finditer(pattern, content))
if para_idx >= len(matches):
return content
match = matches[para_idx]
old_tag = match.group(0)
# styleIDRef
蹂寃 異媛
if 'styleIDRef=' in old_tag:
new_tag = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{style_id}"', old_tag)
else:
#
異媛
new_tag = old_tag.replace('<hp:p ', f'<hp:p styleIDRef="{style_id}" ')
return content[:match.start()] + new_tag + content[match.end():]
def _remove_manual_numbering(self, inner: str, role: str) -> str:
""" 媛 臾몃
踰 嫄 ( 踰 遺쇰源!)
HTML
"1 DX 媛 " "DX 媛 " (쇰 "1" 遺 )
HTML
"1.1 痢〓 DX" "痢〓 DX" (쇰 "1.1" 遺 )
"""
#
patterns = {
'H1': r'^( \s*\d+\s* \s*)', # "1 " 嫄
'H2': r'^(\d+\.\d+\s+)', # "1.1 " 嫄
'H3': r'^(\d+\.\d+\.\d+\s+)', # "1.1.1 " 嫄
'H4': r'^([媛- ]\.\s+)', # "媛. " 嫄
'H5': r'^(\d+\)\s+)', # "1) " 嫄
'H6': r'^([媛- ]\)\s+|\(\d+\)\s+)', # "媛) " "(1) " 嫄
'H7': r'^([△™bㅲβ╈㎮ⓥ]+\s*)', # " " 嫄
}
if role not in patterns:
return inner
pattern = patterns[role]
# <hp:t> ㅼ
def remove_number(match):
text = match.group(1)
# 泥 踰吏 <hp:t>
new_text = re.sub(pattern, '', text, count=1)
return f'<hp:t>{new_text}</hp:t>'
# 泥 踰吏 hp:t 泥
new_inner = re.sub(r'<hp:t>([^<]*)</hp:t>', remove_number, inner, count=1)
return new_inner
def _repack_hwpx(self, output_path: str):
"""HWPX 異"""
print(f" [DEBUG] Repacking to: {output_path}")
print(f" [DEBUG] Source dir: {self.temp_dir}")
# 異
section ш린
temp_output = output_path + ".tmp"
with zipfile.ZipFile(temp_output, 'w', zipfile.ZIP_DEFLATED) as zf:
# mimetype 異 吏몃
mimetype_path = self.temp_dir / "mimetype"
if mimetype_path.exists():
zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED)
# 몄
file_count = 0
for root, dirs, files in os.walk(self.temp_dir):
for file in files:
if file == "mimetype":
continue
file_path = Path(root) / file
arcname = file_path.relative_to(self.temp_dir)
zf.write(file_path, arcname)
file_count += 1
print(f" [DEBUG] Total files zipped: {file_count}")
#
蹂寃
import time
for attempt in range(3):
try:
if os.path.exists(output_path):
os.remove(output_path)
os.rename(temp_output, output_path)
break
except PermissionError:
print(f" [DEBUG] 湲 湲 以... ({attempt + 1}/3)")
time.sleep(0.5)
else:
# 3踰
print(f" [寃쎄 ] 댁곌린 ㅽ,
ъ : {temp_output}")
output_path = temp_output
# 異
寃곌낵
移異
Args:
hwpx_path: HWPX
elements: StyleAnalyzerъ
Returns:
HWPX
"""
#
# 李멸 : section 0, para
濡 媛
role_positions: Dict[str, List[tuple]] = {}
for idx, elem in enumerate(elements):
role = elem.role
if role not in role_positions:
role_positions[role] = []
# (section_idx, para_idx) -
section 0 媛
role_positions[role].append((0, idx))
injector = HwpxStyleInjector()
return injector.inject(hwpx_path, role_positions)
#
if __name__ == "__main__":
#
ㅽ몄
test_positions = {
'H1': [(0, 0), (0, 5)],
'H2': [(0, 1), (0, 6)],
'BODY': [(0, 2), (0, 3), (0, 4)],
}
# injector = HwpxStyleInjector()
# injector.inject("test.hwpx", test_positions)
print("HwpxStyleInjector 紐⑤ 濡
")