From 93dd43a73de7c6d710d412b78850be0b2ec63adb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=EB=AF=BC?= Date: Thu, 19 Mar 2026 14:02:59 +0900 Subject: [PATCH] =?UTF-8?q?Cleanup:=20Deleting=2003.Code/=EC=97=85?= =?UTF-8?q?=EB=A1=9C=EB=93=9C=EC=9A=A9/converters/hwpx=5Fstyle=5Finjector.?= =?UTF-8?q?py?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../converters/hwpx_style_injector.py | 803 ------------------ 1 file changed, 803 deletions(-) delete mode 100644 03.Code/업로드용/converters/hwpx_style_injector.py diff --git a/03.Code/업로드용/converters/hwpx_style_injector.py b/03.Code/업로드용/converters/hwpx_style_injector.py deleted file mode 100644 index 34d9431..0000000 --- a/03.Code/업로드용/converters/hwpx_style_injector.py +++ /dev/null @@ -1,803 +0,0 @@ -""" -HWPX ㅽ -湲 -pyhwpx濡 -ы濡: 깅 HWPX ㅼㅽ -1. HWPX 異 댁 -2. header.xmlㅼㅽ - ㅽ媛 щ 二쇱 -3. section*.xml - 蹂 - styleIDRef 留ㅽ -import os -import re -import zipfile -import shutil -import tempfile -from pathlib import Path -from typing import Dict, List, Optional -from dataclasses import dataclass - - -@dataclass -class StyleDefinition: - """ㅽ 媛 (-1=, 0=1, 1=2, ...) - - - - - - - - - - - - - - -# ㅽ ㅽ -ROLE_STYLES: Dict[str, StyleDefinition] = { - # 媛 臾몃 ( 踰 留ㅺ린湲!) - 'H1': StyleDefinition( - id=101, name='1 紐', font_size=2200, font_bold=True, - font_color='#006400', align='CENTER', line_spacing=200, - indent_left=0, indent_first=0, space_before=400, space_after=200, - outline_level=0 # ^1 - ), - 'H2': StyleDefinition( - id=102, name='1.1 紐', font_size=1500, font_bold=True, - font_color='#03581d', align='LEFT', line_spacing=200, - indent_left=0, indent_first=0, space_before=300, space_after=100, - outline_level=1 # ^1.^2 - ), - 'H3': StyleDefinition( - id=103, name='1.1.1 紐', font_size=1400, font_bold=True, - font_color='#228B22', align='LEFT', line_spacing=200, - indent_left=500, indent_first=0, space_before=200, space_after=100, - outline_level=2 # ^1.^2.^3 - ), - 'H4': StyleDefinition( - id=104, name='媛. 紐', font_size=1300, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=200, - indent_left=1000, indent_first=0, space_before=150, space_after=50, - outline_level=3 # ^4. - ), - 'H5': StyleDefinition( - id=105, name='1) 紐', font_size=1200, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=200, - indent_left=1500, indent_first=0, space_before=100, space_after=50, - outline_level=4 # ^5) - ), - 'H6': StyleDefinition( - id=106, name='媛) 紐', font_size=1150, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=200, - indent_left=2000, indent_first=0, space_before=100, space_after=50, - outline_level=5 # ^6) - ), - 'H7': StyleDefinition( - id=115, name=' 紐', font_size=1100, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=200, - indent_left=2300, indent_first=0, space_before=100, space_after=50, - outline_level=6 # ^7 (몄몃Ц ㅽ - 蹂몃Ц', font_size=1100, font_bold=False, - font_color='#000000', align='JUSTIFY', line_spacing=200, - indent_left=1500, indent_first=0, space_before=0, space_after=0 - ), - 'LIST_ITEM': StyleDefinition( - id=108, name='蹂몃Ц', font_size=1050, font_bold=False, - font_color='#000000', align='JUSTIFY', line_spacing=200, - indent_left=2500, indent_first=0, space_before=0, space_after=0 - ), - 'TABLE_CAPTION': StyleDefinition( - id=109, name='< 紐>', font_size=1100, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=130, - indent_left=0, indent_first=0, space_before=200, space_after=100 - ), - 'FIGURE_CAPTION': StyleDefinition( - id=110, name='<洹몃┝ 紐>', font_size=1100, font_bold=True, - font_color='#000000', align='CENTER', line_spacing=130, - indent_left=0, indent_first=0, space_before=100, space_after=200 - ), - 'COVER_TITLE': StyleDefinition( - id=111, name='吏紐', font_size=2800, font_bold=True, - font_color='#1a365d', align='CENTER', line_spacing=150, - indent_left=0, indent_first=0, space_before=0, space_after=200 - ), - 'COVER_SUBTITLE': StyleDefinition( - id=112, name='吏遺', font_size=1800, font_bold=False, - font_color='#2d3748', align='CENTER', line_spacing=150, - indent_left=0, indent_first=0, space_before=0, space_after=100 - ), - 'TOC_1': StyleDefinition( - id=113, name='紐⑹감1', font_size=1200, font_bold=True, - font_color='#000000', align='LEFT', line_spacing=180, - indent_left=0, indent_first=0, space_before=100, space_after=50 - ), - 'TOC_2': StyleDefinition( - id=114, name='紐⑹감2', font_size=1100, font_bold=False, - font_color='#000000', align='LEFT', line_spacing=180, - indent_left=500, indent_first=0, space_before=0, space_after=0 - ), -} - -# 截 媛 踰 湲곕 - 깊! -# idRef="0" numbering id=1 - 李몄“濡, 대 ⑦ - 댁 - 援댄 - - -class HwpxStyleInjector: - """HWPX ㅽ -湲""" - - def __init__(self): - self.temp_dir: Optional[Path] = None - self.role_to_style_id: Dict[str, int] = {} - self.role_to_para_id: Dict[str, int] = {} # - self.role_to_char_id: Dict[str, int] = {} # - self.next_char_id = 0 - self.next_para_id = 0 - self.next_style_id = 0 - - def _find_max_ids(self): - """ ㅽ : 諛湲(id=0)留 吏, 몄 곕━ ㅽ 援""" - header_path = self.temp_dir / "Contents" / "header.xml" - if not header_path.exists(): - self.next_char_id = 1 - self.next_para_id = 1 - self.next_style_id = 1 - return - - content = header_path.read_text(encoding='utf-8') - - # 湲곗〈 "蹂몃Ц", "媛 1~10" ㅽ嫄 (id=1~22) - # 諛湲(id=0)留 吏! - - # style id=1~30 嫄 (諛湲 ) - content = re.sub(r'\s*', '', content) - - # itemCnt - - - # - header_path.write_text(content, encoding='utf-8') - print(f" [INFO] 湲곗〈 ㅽ 1~10 ) 嫄 - 猷") - - # charPr, paraPr 湲곗〈 寃 ㅼ遺 - (李몄“ 源⑥吏 - 濡ㅽ 1遺 - ! (Ctrl+2 = id=1, Ctrl+3 = id=2, ...) - self.next_style_id = 1 - - def inject(self, hwpx_path: str, role_positions: Dict[str, List[tuple]]) -> str: - """ - HWPX ㅼㅽ - ㅽ - - - Args: - hwpx_path: HWPX - role_positions: 蹂 - - 移蹂 {role: [(section_idx, para_idx), ...]} - - Returns: - HWPX - """ - print(f"\n HWPX ㅽ - ...") - print(f" -: {hwpx_path}") - - # 1. - 대 異 댁 - self.temp_dir = Path(tempfile.mkdtemp(prefix='hwpx_inject_')) - print(f" 대 : {self.temp_dir}") - - try: - with zipfile.ZipFile(hwpx_path, 'r') as zf: - zf.extractall(self.temp_dir) - - # 異 댁 吏 - section ш린 湲곗〈 理 ID 李얘린 ( ID 뱀 - - ) - self._find_max_ids() - print(f" [DEBUG] Starting IDs: char={self.next_char_id}, para={self.next_para_id}, style={self.next_style_id}") - - # 2. header.xmlㅽ媛 - used_roles = set(role_positions.keys()) - self._inject_header_styles(used_roles) - - # 3. section*.xml - self._inject_section_styles(role_positions) - - # 4. ㅼ 異 - output_path = hwpx_path # 댁곌린 - self._repack_hwpx(output_path) - - print(f" ㅽ - 猷: {output_path}") - return output_path - - finally: - # - 대 由 - if self.temp_dir and self.temp_dir.exists(): - shutil.rmtree(self.temp_dir) - - def _inject_header_styles(self, used_roles: set): - """header.xmlㅽ媛 (紐⑤ ROLE_STYLES 二쇱 -)""" - header_path = self.temp_dir / "Contents" / "header.xml" - if not header_path.exists(): - print(" [寃쎄 ] header.xml ") - return - - content = header_path.read_text(encoding='utf-8') - - # 紐⑤ ROLE_STYLES 二쇱 - (used_roles 臾댁) - char_props = [] - para_props = [] - styles = [] - - for role, style_def in ROLE_STYLES.items(): - char_id = self.next_char_id - para_id = self.next_para_id - style_id = self.next_style_id - - self.role_to_style_id[role] = style_id - self.role_to_para_id[role] = para_id # - self.role_to_char_id[role] = char_id # - - # charPr - - char_props.append(self._make_char_pr(char_id, style_def)) - - # paraPr - - para_props.append(self._make_para_pr(para_id, style_def)) - - # style - - styles.append(self._make_style(style_id, style_def.name, para_id, char_id)) - - self.next_char_id += 1 - self.next_para_id += 1 - self.next_style_id += 1 - - if not styles: - print(" [ 蹂 ] 二쇱 - ㅽ ") - return - - # charProperties媛 - content = self._insert_before_tag( - content, '', '\n'.join(char_props) + '\n' - ) - - # paraProperties媛 - content = self._insert_before_tag( - content, '', '\n'.join(para_props) + '\n' - ) - - # styles媛 - content = self._insert_before_tag( - content, '', '\n'.join(styles) + '\n' - ) - - # numbering id=1 ⑦ - 援 (idRef="0" 湲곕낯 踰 紐⑥) - # 대 媛 踰 1, 1.1, 1.1.1... ! - content = self._replace_default_numbering(content) - - # itemCnt - - content = self._update_item_counts(content) - - header_path.write_text(content, encoding='utf-8') - print(f" header.xml - 猷 ({len(styles)}媛 ㅽ媛)") - - def _make_char_pr(self, id: int, style: StyleDefinition) -> str: - """charPr XML - ( 以 - !)""" - color = style.font_color.lstrip('#') - font_id = "1" if style.font_bold else "0" - - return f'' - - def _make_para_pr(self, id: int, style: StyleDefinition) -> str: - """paraPr XML - ( 以 - !)""" - # 媛 臾몃 - 硫 type="NONE" - # idRef="0" numbering id=1 (湲곕낯 踰 紐⑥) - 李몄“ - if style.outline_level >= 0: - heading = f'' - else: - heading = '' - - return f'{heading}' - - def _make_style(self, id: int, name: str, para_id: int, char_id: int) -> str: - """style XML - """ - safe_name = name.replace('<', '<').replace('>', '>') - return f'' - - def _insert_before_tag(self, content: str, tag: str, insert_text: str) -> str: - """ ㅽ 쎌 -""" - return content.replace(tag, insert_text + tag) - - def _update_item_counts(self, content: str) -> str: - """itemCnt - - """ - # charProperties itemCnt - char_count = content.count(' str: - """numbering id=1 ⑦ - 댁 - 곕━ ⑦ - 댁쇰 援""" - # 곕━媛 媛 踰 ⑦ - - new_patterns = [ - {'level': '1', 'format': 'DIGIT', 'pattern': '1'}, - {'level': '2', 'format': 'DIGIT', 'pattern': '^1.^2'}, - {'level': '3', 'format': 'DIGIT', 'pattern': '^1.^2.^3'}, - {'level': '4', 'format': 'HANGUL_SYLLABLE', 'pattern': '^4.'}, - {'level': '5', 'format': 'DIGIT', 'pattern': '^5)'}, - {'level': '6', 'format': 'HANGUL_SYLLABLE', 'pattern': '^6)'}, - {'level': '7', 'format': 'CIRCLED_DIGIT', 'pattern': '^7'}, - ] - - # numbering id="1" 李얘린 - match = re.search(r'(]*>)(.*?)()', content, re.DOTALL) - if not match: - print(" [寃쎄 ] numbering id=1 , 援 嫄대 - ") - return content - - numbering_content = match.group(2) - - for np in new_patterns: - level = np['level'] - fmt = np['format'] - pattern = np['pattern'] - - # 대 level쇰 - - 援 - def replace_parahead(m): - tag = m.group(0) - # numFormat 蹂寃 - tag = re.sub(r'numFormat="[^"]*"', f'numFormat="{fmt}"', tag) - # ⑦ - ( ㅽ 댁 ) 蹂寃 - tag = re.sub(r'>([^<]*)', f'>{pattern}', tag) - return tag - - numbering_content = re.sub( - rf']*level="{level}"[^>]*>.*?', - replace_parahead, - numbering_content - ) - - new_content = match.group(1) + numbering_content + match.group(3) - print(" [INFO] numbering id=1 ⑦ - 援 猷 (1, ^1.^2, ^1.^2.^3...)") - return content.replace(match.group(0), new_content) - - def _adjust_tables(self, content: str) -> str: - """ - ш린 議곗 - - 1. 800 hwpunit ( 댁 諛⑹ ) - 2. - 鍮 - : - 泥 - 鍮 - 瑜 媛 洹 遺 - 諛 ( 泥 醫寃) - """ - - def adjust_table(match): - tbl = match.group(0) - - # - 泥 - 鍮 - 異異 - sz_match = re.search(r' 1 else table_width - - # 媛 - ㅼ - min_height = 800 # 8mm - - # - ш린 議곗 - col_idx = [0] # closure - - def adjust_cell_sz(cell_match): - width = int(cell_match.group(1)) - height = int(cell_match.group(2)) - - # - new_height = max(height, min_height) - - return f'' - - tbl = re.sub( - r'', - adjust_cell_sz, - tbl - ) - - return tbl - - return re.sub(r']*>.*?', adjust_table, content, flags=re.DOTALL) - - def _inject_section_styles(self, role_positions: Dict[str, List[tuple]]): - """section*.xml ( ㅽ 留㼼 諛⑹ 踰 - 洹 : role_to_style_id 李얘린 - section_files = sorted(contents_dir.glob("section*.xml")) - print(f" [DEBUG] section files: {[f.name for f in section_files]}") - - total_modified = 0 - - for section_file in section_files: - print(f" [DEBUG] Processing: {section_file.name}") - original_content = section_file.read_text(encoding='utf-8') - print(f" [DEBUG] File size: {len(original_content)} bytes") - - content = original_content # - 蹂듭щ낯 - - # 癒몃━留щ━留 蹂댁〈 (placeholder濡 援 ) - header_footer_map = {} - placeholder_idx = 0 - - def save_header_footer(match): - nonlocal placeholder_idx - key = f"__HF_PLACEHOLDER_{placeholder_idx}__" - header_footer_map[key] = match.group(0) - placeholder_idx += 1 - return key - - # 癒몃━留щ━留 - 援 - content = re.sub(r']*>.*?', save_header_footer, content, flags=re.DOTALL) - content = re.sub(r']*>.*?', save_header_footer, content, flags=re.DOTALL) - - # 紐⑤ ㅼ - ㅽ 異異 - para_pattern = r'(]*>)(.*?)()' - - section_modified = 0 - - def replace_style(match): - nonlocal total_modified, section_modified - open_tag = match.group(1) - inner = match.group(2) - close_tag = match.group(3) - - # ㅽ 異異 ( 嫄 ) - text = re.sub(r'<[^>]+>', '', inner).strip() - if not text: - return match.group(0) - - # ㅽ 遺 - 쇰 - text_start = text[:50] # 泥 50 - - matched_role = None - matched_style_id = None - matched_para_id = None - matched_char_id = None - - # 紐 ⑦ - 留㼼 (뱀몄 ) - # Unicode: \u25a0 \u25b8 \u25c6 \u25b6 \u25cf \u25cb \u25aa \u25ba -\u2605 \u203b 쨌\u00b7 - prefix = r'^[\u25a0\u25b8\u25c6\u25b6\u25cf\u25cb\u25aa\u25ba\u261e\u2605\u203b\u00b7\s]*' - - # FIGURE_CAPTION: "[洹몃┝ 1-1]", "[洹몃┝ 1-2]" (媛 癒쇱 泥댄 !) - # 洹몃┝ = \uadf8\ub9bc - if re.match(r'^\[\uadf8\ub9bc\s*[\d-]+\]', text_start): - matched_role = 'FIGURE_CAPTION' - # TABLE_CAPTION: "< 1-1>", "[ 1-1]" - # = \ud45c - elif re.match(r'^[<\[]\ud45c\s*[\d-]+[>\]]', text_start): - matched_role = 'TABLE_CAPTION' - # H1: "1", "1 媛 " - elif re.match(prefix + r'\uc81c?\s*\d+\uc7a5?\s', text_start) or re.match(prefix + r'[1-9]\s+[\uac00-\ud7a3]', text_start): - matched_role = 'H1' - # H3: "1.1.1 " (H2蹂대 癒쇱 泥댄 !) - elif re.match(prefix + r'\d+\.\d+\.\d+\s', text_start): - matched_role = 'H3' - # H2: "1.1 " - elif re.match(prefix + r'\d+\.\d+\s', text_start): - matched_role = 'H2' - # H4: "媛. " - elif re.match(prefix + r'[\uac00-\ud7a3]\.\s', text_start): - matched_role = 'H4' - # H5: "1) " - elif re.match(prefix + r'\d+\)\s', text_start): - matched_role = 'H5' - # H6: "(1) " "媛) " - elif re.match(prefix + r'\(\d+\)\s', text_start): - matched_role = 'H6' - elif re.match(prefix + r'[\uac00-\ud7a3]\)\s', text_start): - matched_role = 'H6' - # LIST_ITEM: " ", " ", " " - elif re.match(r'^[\u25cb\u25cf\u25e6\u2022\u2023\u25b8]\s', text_start): - matched_role = 'LIST_ITEM' - elif re.match(r'^[-\u2013\u2014]\s', text_start): - matched_role = 'LIST_ITEM' - - # 留㼼 怨 ㅽ 쇰㈃ - if matched_role and matched_role in self.role_to_style_id: - matched_style_id = self.role_to_style_id[matched_role] - matched_para_id = self.role_to_para_id[matched_role] - matched_char_id = self.role_to_char_id[matched_role] - elif 'BODY' in self.role_to_style_id and len(text) > 20: - # 湲 - ㅽ몃 蹂몃Ц쇰 媛 - 二 - matched_role = 'BODY' - matched_style_id = self.role_to_style_id['BODY'] - matched_para_id = self.role_to_para_id['BODY'] - matched_char_id = self.role_to_char_id['BODY'] - - if matched_style_id: - # 1. hp:p 媛 - if 'styleIDRef="' in open_tag: - new_open = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{matched_style_id}"', open_tag) - else: - new_open = open_tag.replace('= 0: - new_inner = self._remove_manual_numbering(new_inner, matched_role) - - total_modified += 1 - section_modified += 1 - return new_open + new_inner + close_tag - - return match.group(0) - - new_content = re.sub(para_pattern, replace_style, content, flags=re.DOTALL) - - # - ш린 議곗 - new_content = self._adjust_tables(new_content) - - # outlineShapeIDRef瑜 1濡 蹂寃 (곕━媛 援댄 numbering id=1 ъ ) - new_content = re.sub( - r'outlineShapeIDRef="[^"]*"', - 'outlineShapeIDRef="1"', - new_content - ) - - - # 癒몃━留щ━留듭뱀 깆ㅼ몃 styleIDRef 蹂寃""" - # 李얘린 - pattern = r']*>' - matches = list(re.finditer(pattern, content)) - - if para_idx >= len(matches): - return content - - match = matches[para_idx] - old_tag = match.group(0) - - # styleIDRef - 蹂寃 異媛 - if 'styleIDRef=' in old_tag: - new_tag = re.sub(r'styleIDRef="[^"]*"', f'styleIDRef="{style_id}"', old_tag) - else: - # - 異媛 - new_tag = old_tag.replace(' str: - """ 媛 臾몃 - 踰 嫄 ( 踰 遺쇰源!) - - HTML - "1 DX 媛 " "DX 媛 " (쇰 "1" 遺 ) - HTML - "1.1 痢〓 DX" "痢〓 DX" (쇰 "1.1" 遺 ) - """ - # - 踰 ⑦ - - patterns = { - 'H1': r'^( \s*\d+\s* \s*)', # "1 " 嫄 - 'H2': r'^(\d+\.\d+\s+)', # "1.1 " 嫄 - 'H3': r'^(\d+\.\d+\.\d+\s+)', # "1.1.1 " 嫄 - 'H4': r'^([媛- ]\.\s+)', # "媛. " 嫄 - 'H5': r'^(\d+\)\s+)', # "1) " 嫄 - 'H6': r'^([媛- ]\)\s+|\(\d+\)\s+)', # "媛) " "(1) " 嫄 - 'H7': r'^([△™bㅲβ╈㎮ⓥ]+\s*)', # " " 嫄 - } - - if role not in patterns: - return inner - - pattern = patterns[role] - - # ㅼ - 踰 嫄 - def remove_number(match): - text = match.group(1) - # 泥 踰吏 - 留 踰 嫄 - new_text = re.sub(pattern, '', text, count=1) - return f'{new_text}' - - # 泥 踰吏 hp:t 泥 - new_inner = re.sub(r'([^<]*)', remove_number, inner, count=1) - - return new_inner - - def _repack_hwpx(self, output_path: str): - """HWPX 異""" - print(f" [DEBUG] Repacking to: {output_path}") - print(f" [DEBUG] Source dir: {self.temp_dir}") - - # 異 - section ш린 - - - temp_output = output_path + ".tmp" - - with zipfile.ZipFile(temp_output, 'w', zipfile.ZIP_DEFLATED) as zf: - # mimetype 異 吏몃 - mimetype_path = self.temp_dir / "mimetype" - if mimetype_path.exists(): - zf.write(mimetype_path, "mimetype", compress_type=zipfile.ZIP_STORED) - - # 몄 - file_count = 0 - for root, dirs, files in os.walk(self.temp_dir): - for file in files: - if file == "mimetype": - continue - file_path = Path(root) / file - arcname = file_path.relative_to(self.temp_dir) - zf.write(file_path, arcname) - file_count += 1 - - print(f" [DEBUG] Total files zipped: {file_count}") - - # - - - 쇰 蹂寃 - import time - for attempt in range(3): - try: - if os.path.exists(output_path): - os.remove(output_path) - os.rename(temp_output, output_path) - break - except PermissionError: - print(f" [DEBUG] 湲 湲 以... ({attempt + 1}/3)") - time.sleep(0.5) - else: - # 3踰 - ㅽ - - 쇰 吏 - print(f" [寃쎄 ] 댁곌린 ㅽ, - ъ : {temp_output}") - output_path = temp_output - - # 異 - 寃곌낵 - 移異 - ㅽ - - - Args: - hwpx_path: HWPX - elements: StyleAnalyzerъ ㅽ - - Returns: - HWPX - """ - # - - 移 - # 李멸 : section 0, para - 濡 媛 - role_positions: Dict[str, List[tuple]] = {} - - for idx, elem in enumerate(elements): - role = elem.role - if role not in role_positions: - role_positions[role] = [] - # (section_idx, para_idx) - - section 0 媛 - role_positions[role].append((0, idx)) - - injector = HwpxStyleInjector() - return injector.inject(hwpx_path, role_positions) - - -# -ㅽ -if __name__ == "__main__": - # -ㅽ몄 - test_positions = { - 'H1': [(0, 0), (0, 5)], - 'H2': [(0, 1), (0, 6)], - 'BODY': [(0, 2), (0, 3), (0, 4)], - } - - # injector = HwpxStyleInjector() - # injector.inject("test.hwpx", test_positions) - print("HwpxStyleInjector 紐⑤ 濡 - 猷")