diff --git a/03.Code/업로드용/converters/html_to_hwp.py b/03.Code/업로드용/converters/html_to_hwp.py new file mode 100644 index 0000000..508fb8b --- /dev/null +++ b/03.Code/업로드용/converters/html_to_hwp.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +""" +HTML HWP 蹂 v11 + + + : sizeoption=0 ( ш린) width/height 吏 + + 踰: ctrl 肄 諛⑹쇰 + + 몄 v10 吏 + +pip install pyhwpx beautifulsoup4 pillow +""" + +from pyhwpx import Hwp +from bs4 import BeautifulSoup, NavigableString +import os, re + +# ㅽ ㅽ + 異媛 +from converters.style_analyzer import StyleAnalyzer, StyledElement +from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME +from converters.hwpx_style_injector import HwpxStyleInjector, inject_styles_to_hwpx + + +# PIL + import ( ш린 ) +try: + from PIL import Image + HAS_PIL = True +except ImportError: + HAS_PIL = False + print("[由] PIL - ш린濡쎌 +") + +class Config: + MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15 + HEADER_LEN, FOOTER_LEN = 10, 10 + MAX_IMAGE_WIDTH = 150 # mm (理 + 鍮 + ) + ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets" # 異媛 + +class StyleParser: + def __init__(self): + self.style_map = {} # ㅽ ( HwpStyle) + self.sty_gen = None # ㅽ + 깃린 + self.class_styles = { + 'h1': {'font-size': '20pt', 'color': '#008000'}, + 'h2': {'font-size': '16pt', 'color': '#03581d'}, + 'h3': {'font-size': '13pt', 'color': '#228B22'}, + 'p': {'font-size': '11pt', 'color': '#333333'}, + 'li': {'font-size': '11pt', 'color': '#333333'}, + 'th': {'font-size': '9pt', 'color': '#006400'}, + 'td': {'font-size': '9.5pt', 'color': '#333333'}, + 'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'}, + 'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'}, + 'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'}, + } + + def get_element_style(self, elem): + style = {} + tag = elem.name if hasattr(elem, 'name') else None + if tag and tag in self.class_styles: style.update(self.class_styles[tag]) + for cls in elem.get('class', []) if hasattr(elem, 'get') else []: + if cls in self.class_styles: style.update(self.class_styles[cls]) + return style + + def parse_size(self, s): + m = re.search(r'([\d.]+)', str(s)) if s else None + return float(m.group(1)) if m else 11 + + def parse_color(self, c): + if not c: return '#000000' + c = str(c).strip().lower() + if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper() + m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c) + return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000' + + def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900'] + +# 嫄 대━ +# "" + 'H2': re.compile(r'^(\d+)\.(\d+)\s*'), # "1.1 " "" + 'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " "" + 'H4': re.compile(r'^[媛- . " "" + 'H5': re.compile(r'^(\d+)\)\s*'), # "1) " "" + 'H6': re.compile(r'^\((\d+)\)\s*'), # "(1) " "" + 'H7': re.compile(r'^[△™bㅲβ╈㎮ⓥ]\s*'), # " " "" + 'LIST_ITEM': re.compile(r'^[\-]\s*'), # " " "" +} + +def strip_numbering(text: str, role: str) -> str: + """ + 곕 +ㅽ /湲고 嫄 + HWP 媛 湲곕μ + 깊濡 以蹂 諛⑹ + """ + if not text: + return text + + pattern = NUMBERING_PATTERNS.get(role) + if pattern: + return pattern.sub('', text).strip() + + return text.strip() + +# + 鍮 + 대━ ( 異媛) +# + 鍮 + 臾몄 mm 媛 諛 + 깆 + width 異異 + style_match = re.search(r'width\s*:\s*([^;]+)', width_str) + if style_match: + width_str = style_match.group(1).strip() + + # px mm (96 DPI 湲곗) + px_match = re.search(r'([\d.]+)\s*px', width_str) + if px_match: + return float(px_match.group(1)) * 25.4 / 96 + + # mm 洹몃濡 + mm_match = re.search(r'([\d.]+)\s*mm', width_str) + if mm_match: + return float(mm_match.group(1)) + + # % 蹂몃Ц(170mm) 湲곗 怨 + + pct_match = re.search(r'([\d.]+)\s*%', width_str) + if pct_match: + return float(pct_match.group(1)) * 170 / 100 + + # 踰 쇰㈃ px濡 媛 + 二 + num_match = re.search(r'^([\d.]+)$', width_str) + if num_match: + return float(num_match.group(1)) * 25.4 / 96 + + return None + + +def _parse_align(cell): + """ + + """ + align = cell.get('align', '').lower() + if align in ['left', 'center', 'right']: + return align + + style = cell.get('style', '') + align_match = re.search(r'text-align\s*:\s*(\w+)', style) + if align_match: + return align_match.group(1).lower() + + return None + + +def _parse_bg_color(cell): + """ +곌꼍 + bgcolor = cell.get('bgcolor', '') + if bgcolor: + return bgcolor if bgcolor.startswith('#') else f'#{bgcolor}' + + style = cell.get('style', '') + bg_match = re.search(r'background(?:-color)?\s*:\s*([^;]+)', style) + if bg_match: + color = bg_match.group(1).strip() + if color.startswith('#'): + return color + rgb_match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color) + if rgb_match: + r, g, b = int(rgb_match.group(1)), int(rgb_match.group(2)), int(rgb_match.group(3)) + return f'#{r:02X}{g:02X}{b:02X}' + + return None + + +class HtmlToHwpConverter: + def __init__(self, visible=True): + self.hwp = Hwp(visible=visible) + self.cfg = Config() + self.sp = StyleParser() + self.base_path = "" + self.is_first_h1 = True + self.image_count = 0 + self.table_widths = [] # + 鍮 + 蹂 + self.style_map = {} # ㅽ + 留ㅽ + self.sty_path = None # .sty + + def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm) + def _pt(self, pt): return self.hwp.PointToHwpUnit(pt) + def _rgb(self, c): + c = c.lstrip('#') + return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0) + + def _setup_page(self): + try: + self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet) + s = self.hwp.HParameterSet.HSecDef + s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT) + s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT) + s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP) + s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM) + s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN) + s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN) + self.hwp.HAction.Execute("PageSetup", s.HSet) + except: pass + + def _create_header(self, right_text=""): + print(f" 癒몃━留 + : {right_text if right_text else '(珥湲고)'}") + try: + self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) + self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0) + self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0) + self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet) + + self.hwp.HAction.Run("ParagraphShapeAlignRight") + self._set_font(9, False, '#333333') + if right_text: + self.hwp.insert_text(right_text) + + self.hwp.HAction.Run("CloseEx") + except Exception as e: + print(f" [寃쎄 ] 癒몃━留щ━留 踰 ( ) + # 瑗щ━留닿린 + self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHead