diff --git a/03.Code/업로드용/converters/html_to_hwp.py b/03.Code/업로드용/converters/html_to_hwp.py deleted file mode 100644 index 508fb8b..0000000 --- a/03.Code/업로드용/converters/html_to_hwp.py +++ /dev/null @@ -1,236 +0,0 @@ -# -*- coding: utf-8 -*- -""" -HTML HWP 蹂 v11 - - - : sizeoption=0 ( ш린) width/height 吏 - - 踰: ctrl 肄 諛⑹쇰 - - 몄 v10 吏 - -pip install pyhwpx beautifulsoup4 pillow -""" - -from pyhwpx import Hwp -from bs4 import BeautifulSoup, NavigableString -import os, re - -# ㅽ ㅽ - 異媛 -from converters.style_analyzer import StyleAnalyzer, StyledElement -from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME -from converters.hwpx_style_injector import HwpxStyleInjector, inject_styles_to_hwpx - - -# PIL - import ( ш린 ) -try: - from PIL import Image - HAS_PIL = True -except ImportError: - HAS_PIL = False - print("[由] PIL - ш린濡쎌 -") - -class Config: - MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15 - HEADER_LEN, FOOTER_LEN = 10, 10 - MAX_IMAGE_WIDTH = 150 # mm (理 - 鍮 - ) - ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets" # 異媛 - -class StyleParser: - def __init__(self): - self.style_map = {} # ㅽ ( HwpStyle) - self.sty_gen = None # ㅽ - 깃린 - self.class_styles = { - 'h1': {'font-size': '20pt', 'color': '#008000'}, - 'h2': {'font-size': '16pt', 'color': '#03581d'}, - 'h3': {'font-size': '13pt', 'color': '#228B22'}, - 'p': {'font-size': '11pt', 'color': '#333333'}, - 'li': {'font-size': '11pt', 'color': '#333333'}, - 'th': {'font-size': '9pt', 'color': '#006400'}, - 'td': {'font-size': '9.5pt', 'color': '#333333'}, - 'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'}, - 'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'}, - 'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'}, - } - - def get_element_style(self, elem): - style = {} - tag = elem.name if hasattr(elem, 'name') else None - if tag and tag in self.class_styles: style.update(self.class_styles[tag]) - for cls in elem.get('class', []) if hasattr(elem, 'get') else []: - if cls in self.class_styles: style.update(self.class_styles[cls]) - return style - - def parse_size(self, s): - m = re.search(r'([\d.]+)', str(s)) if s else None - return float(m.group(1)) if m else 11 - - def parse_color(self, c): - if not c: return '#000000' - c = str(c).strip().lower() - if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper() - m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c) - return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000' - - def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900'] - -# 嫄 대━ -# "" - 'H2': re.compile(r'^(\d+)\.(\d+)\s*'), # "1.1 " "" - 'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " "" - 'H4': re.compile(r'^[媛- . " "" - 'H5': re.compile(r'^(\d+)\)\s*'), # "1) " "" - 'H6': re.compile(r'^\((\d+)\)\s*'), # "(1) " "" - 'H7': re.compile(r'^[△™bㅲβ╈㎮ⓥ]\s*'), # " " "" - 'LIST_ITEM': re.compile(r'^[\-]\s*'), # " " "" -} - -def strip_numbering(text: str, role: str) -> str: - """ - 곕 -ㅽ /湲고 嫄 - HWP 媛 湲곕μ - 깊濡 以蹂 諛⑹ - """ - if not text: - return text - - pattern = NUMBERING_PATTERNS.get(role) - if pattern: - return pattern.sub('', text).strip() - - return text.strip() - -# - 鍮 - 대━ ( 異媛) -# - 鍮 - 臾몄 mm 媛 諛 - 깆 - width 異異 - style_match = re.search(r'width\s*:\s*([^;]+)', width_str) - if style_match: - width_str = style_match.group(1).strip() - - # px mm (96 DPI 湲곗) - px_match = re.search(r'([\d.]+)\s*px', width_str) - if px_match: - return float(px_match.group(1)) * 25.4 / 96 - - # mm 洹몃濡 - mm_match = re.search(r'([\d.]+)\s*mm', width_str) - if mm_match: - return float(mm_match.group(1)) - - # % 蹂몃Ц(170mm) 湲곗 怨 - - pct_match = re.search(r'([\d.]+)\s*%', width_str) - if pct_match: - return float(pct_match.group(1)) * 170 / 100 - - # 踰 쇰㈃ px濡 媛 - 二 - num_match = re.search(r'^([\d.]+)$', width_str) - if num_match: - return float(num_match.group(1)) * 25.4 / 96 - - return None - - -def _parse_align(cell): - """ - - """ - align = cell.get('align', '').lower() - if align in ['left', 'center', 'right']: - return align - - style = cell.get('style', '') - align_match = re.search(r'text-align\s*:\s*(\w+)', style) - if align_match: - return align_match.group(1).lower() - - return None - - -def _parse_bg_color(cell): - """ -곌꼍 - bgcolor = cell.get('bgcolor', '') - if bgcolor: - return bgcolor if bgcolor.startswith('#') else f'#{bgcolor}' - - style = cell.get('style', '') - bg_match = re.search(r'background(?:-color)?\s*:\s*([^;]+)', style) - if bg_match: - color = bg_match.group(1).strip() - if color.startswith('#'): - return color - rgb_match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color) - if rgb_match: - r, g, b = int(rgb_match.group(1)), int(rgb_match.group(2)), int(rgb_match.group(3)) - return f'#{r:02X}{g:02X}{b:02X}' - - return None - - -class HtmlToHwpConverter: - def __init__(self, visible=True): - self.hwp = Hwp(visible=visible) - self.cfg = Config() - self.sp = StyleParser() - self.base_path = "" - self.is_first_h1 = True - self.image_count = 0 - self.table_widths = [] # - 鍮 - 蹂 - self.style_map = {} # ㅽ - 留ㅽ - self.sty_path = None # .sty - - def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm) - def _pt(self, pt): return self.hwp.PointToHwpUnit(pt) - def _rgb(self, c): - c = c.lstrip('#') - return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0) - - def _setup_page(self): - try: - self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet) - s = self.hwp.HParameterSet.HSecDef - s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT) - s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT) - s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP) - s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM) - s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN) - s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN) - self.hwp.HAction.Execute("PageSetup", s.HSet) - except: pass - - def _create_header(self, right_text=""): - print(f" 癒몃━留 - : {right_text if right_text else '(珥湲고)'}") - try: - self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) - self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0) - self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0) - self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) - - self.hwp.HAction.Run("ParagraphShapeAlignRight") - self._set_font(9, False, '#333333') - if right_text: - self.hwp.insert_text(right_text) - - self.hwp.HAction.Run("CloseEx") - except Exception as e: - print(f" [寃쎄 ] 癒몃━留щ━留 踰 ( ) - # 瑗щ━留닿린 - self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHead