# -*- coding: utf-8 -*- """ HTML HWP 蹂 v11 : sizeoption=0 ( ш린) width/height 吏 踰: ctrl 肄 諛⑹쇰 몄 v10 吏 pip install pyhwpx beautifulsoup4 pillow """ from pyhwpx import Hwp from bs4 import BeautifulSoup, NavigableString import os, re # ㅽ ㅽ 異媛 from converters.style_analyzer import StyleAnalyzer, StyledElement from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME from converters.hwpx_style_injector import HwpxStyleInjector, inject_styles_to_hwpx # PIL import ( ш린 ) try: from PIL import Image HAS_PIL = True except ImportError: HAS_PIL = False print("[由] PIL - ш린濡쎌 ") class Config: MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15 HEADER_LEN, FOOTER_LEN = 10, 10 MAX_IMAGE_WIDTH = 150 # mm (理 鍮 ) ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets" # 異媛 class StyleParser: def __init__(self): self.style_map = {} # ㅽ ( HwpStyle) self.sty_gen = None # ㅽ 깃린 self.class_styles = { 'h1': {'font-size': '20pt', 'color': '#008000'}, 'h2': {'font-size': '16pt', 'color': '#03581d'}, 'h3': {'font-size': '13pt', 'color': '#228B22'}, 'p': {'font-size': '11pt', 'color': '#333333'}, 'li': {'font-size': '11pt', 'color': '#333333'}, 'th': {'font-size': '9pt', 'color': '#006400'}, 'td': {'font-size': '9.5pt', 'color': '#333333'}, 'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'}, 'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'}, 'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'}, } def get_element_style(self, elem): style = {} tag = elem.name if hasattr(elem, 'name') else None if tag and tag in self.class_styles: style.update(self.class_styles[tag]) for cls in elem.get('class', []) if hasattr(elem, 'get') else []: if cls in self.class_styles: style.update(self.class_styles[cls]) return style def parse_size(self, s): m = re.search(r'([\d.]+)', str(s)) if s else None return float(m.group(1)) if m else 11 def parse_color(self, c): if not c: return '#000000' c = str(c).strip().lower() if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper() m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c) return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000' def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900'] # 嫄 대━ # "" 'H2': re.compile(r'^(\d+)\.(\d+)\s*'), # "1.1 " "" 'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " "" 'H4': re.compile(r'^[媛- . " "" 'H5': re.compile(r'^(\d+)\)\s*'), # "1) " "" 'H6': re.compile(r'^\((\d+)\)\s*'), # "(1) " "" 'H7': re.compile(r'^[△™bㅲβ╈㎮ⓥ]\s*'), # " " "" 'LIST_ITEM': re.compile(r'^[\-]\s*'), # " " "" } def strip_numbering(text: str, role: str) -> str: """ 곕 ㅽ /湲고 嫄 HWP 媛 湲곕μ 깊濡 以蹂 諛⑹ """ if not text: return text pattern = NUMBERING_PATTERNS.get(role) if pattern: return pattern.sub('', text).strip() return text.strip() # 鍮 대━ ( 異媛) # 鍮 臾몄 mm 媛 諛 깆 width 異異 style_match = re.search(r'width\s*:\s*([^;]+)', width_str) if style_match: width_str = style_match.group(1).strip() # px mm (96 DPI 湲곗) px_match = re.search(r'([\d.]+)\s*px', width_str) if px_match: return float(px_match.group(1)) * 25.4 / 96 # mm 洹몃濡 mm_match = re.search(r'([\d.]+)\s*mm', width_str) if mm_match: return float(mm_match.group(1)) # % 蹂몃Ц(170mm) 湲곗 怨 pct_match = re.search(r'([\d.]+)\s*%', width_str) if pct_match: return float(pct_match.group(1)) * 170 / 100 # 踰 쇰㈃ px濡 媛 二 num_match = re.search(r'^([\d.]+)$', width_str) if num_match: return float(num_match.group(1)) * 25.4 / 96 return None def _parse_align(cell): """ """ align = cell.get('align', '').lower() if align in ['left', 'center', 'right']: return align style = cell.get('style', '') align_match = re.search(r'text-align\s*:\s*(\w+)', style) if align_match: return align_match.group(1).lower() return None def _parse_bg_color(cell): """ 곌꼍 bgcolor = cell.get('bgcolor', '') if bgcolor: return bgcolor if bgcolor.startswith('#') else f'#{bgcolor}' style = cell.get('style', '') bg_match = re.search(r'background(?:-color)?\s*:\s*([^;]+)', style) if bg_match: color = bg_match.group(1).strip() if color.startswith('#'): return color rgb_match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color) if rgb_match: r, g, b = int(rgb_match.group(1)), int(rgb_match.group(2)), int(rgb_match.group(3)) return f'#{r:02X}{g:02X}{b:02X}' return None class HtmlToHwpConverter: def __init__(self, visible=True): self.hwp = Hwp(visible=visible) self.cfg = Config() self.sp = StyleParser() self.base_path = "" self.is_first_h1 = True self.image_count = 0 self.table_widths = [] # 鍮 蹂 self.style_map = {} # ㅽ 留ㅽ self.sty_path = None # .sty def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm) def _pt(self, pt): return self.hwp.PointToHwpUnit(pt) def _rgb(self, c): c = c.lstrip('#') return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0) def _setup_page(self): try: self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet) s = self.hwp.HParameterSet.HSecDef s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT) s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT) s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP) s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM) s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN) s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN) self.hwp.HAction.Execute("PageSetup", s.HSet) except: pass def _create_header(self, right_text=""): print(f" 癒몃━留 : {right_text if right_text else '(珥湲고)'}") try: self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0) self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0) self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) self.hwp.HAction.Run("ParagraphShapeAlignRight") self._set_font(9, False, '#333333') if right_text: self.hwp.insert_text(right_text) self.hwp.HAction.Run("CloseEx") except Exception as e: print(f" [寃쎄 ] 癒몃━留щ━留 踰 ( ) # 瑗щ━留닿린 self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHead