# -*- coding: utf-8 -*-
"""
HTML HWP 蹂 v11
: sizeoption=0 ( ш린) width/height 吏
踰: ctrl 肄 諛⑹쇰
몄 v10 吏
pip install pyhwpx beautifulsoup4 pillow
"""
from pyhwpx import Hwp
from bs4 import BeautifulSoup, NavigableString
import os, re
# ㅽ ㅽ
異媛
from converters.style_analyzer import StyleAnalyzer, StyledElement
from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME
from converters.hwpx_style_injector import HwpxStyleInjector, inject_styles_to_hwpx
# PIL
import ( ш린 )
try:
from PIL import Image
HAS_PIL = True
except ImportError:
HAS_PIL = False
print("[由] PIL - ш린濡쎌
")
class Config:
MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
HEADER_LEN, FOOTER_LEN = 10, 10
MAX_IMAGE_WIDTH = 150 # mm (理
鍮
)
ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets" # 異媛
class StyleParser:
def __init__(self):
self.style_map = {} # ㅽ ( HwpStyle)
self.sty_gen = None # ㅽ
깃린
self.class_styles = {
'h1': {'font-size': '20pt', 'color': '#008000'},
'h2': {'font-size': '16pt', 'color': '#03581d'},
'h3': {'font-size': '13pt', 'color': '#228B22'},
'p': {'font-size': '11pt', 'color': '#333333'},
'li': {'font-size': '11pt', 'color': '#333333'},
'th': {'font-size': '9pt', 'color': '#006400'},
'td': {'font-size': '9.5pt', 'color': '#333333'},
'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'},
'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'},
'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'},
}
def get_element_style(self, elem):
style = {}
tag = elem.name if hasattr(elem, 'name') else None
if tag and tag in self.class_styles: style.update(self.class_styles[tag])
for cls in elem.get('class', []) if hasattr(elem, 'get') else []:
if cls in self.class_styles: style.update(self.class_styles[cls])
return style
def parse_size(self, s):
m = re.search(r'([\d.]+)', str(s)) if s else None
return float(m.group(1)) if m else 11
def parse_color(self, c):
if not c: return '#000000'
c = str(c).strip().lower()
if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper()
m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c)
return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000'
def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']
# 嫄 대━
# ""
'H2': re.compile(r'^(\d+)\.(\d+)\s*'), # "1.1 " ""
'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " ""
'H4': re.compile(r'^[媛- . " ""
'H5': re.compile(r'^(\d+)\)\s*'), # "1) " ""
'H6': re.compile(r'^\((\d+)\)\s*'), # "(1) " ""
'H7': re.compile(r'^[△™bㅲβ╈㎮ⓥ]\s*'), # " " ""
'LIST_ITEM': re.compile(r'^[\-]\s*'), # " " ""
}
def strip_numbering(text: str, role: str) -> str:
"""
곕
ㅽ /湲고 嫄
HWP 媛 湲곕μ
깊濡 以蹂 諛⑹
"""
if not text:
return text
pattern = NUMBERING_PATTERNS.get(role)
if pattern:
return pattern.sub('', text).strip()
return text.strip()
#
鍮
대━ ( 異媛)
#
鍮
臾몄 mm 媛 諛
깆
width 異異
style_match = re.search(r'width\s*:\s*([^;]+)', width_str)
if style_match:
width_str = style_match.group(1).strip()
# px mm (96 DPI 湲곗)
px_match = re.search(r'([\d.]+)\s*px', width_str)
if px_match:
return float(px_match.group(1)) * 25.4 / 96
# mm 洹몃濡
mm_match = re.search(r'([\d.]+)\s*mm', width_str)
if mm_match:
return float(mm_match.group(1))
# % 蹂몃Ц(170mm) 湲곗 怨
pct_match = re.search(r'([\d.]+)\s*%', width_str)
if pct_match:
return float(pct_match.group(1)) * 170 / 100
# 踰 쇰㈃ px濡 媛
二
num_match = re.search(r'^([\d.]+)$', width_str)
if num_match:
return float(num_match.group(1)) * 25.4 / 96
return None
def _parse_align(cell):
"""
"""
align = cell.get('align', '').lower()
if align in ['left', 'center', 'right']:
return align
style = cell.get('style', '')
align_match = re.search(r'text-align\s*:\s*(\w+)', style)
if align_match:
return align_match.group(1).lower()
return None
def _parse_bg_color(cell):
"""
곌꼍
bgcolor = cell.get('bgcolor', '')
if bgcolor:
return bgcolor if bgcolor.startswith('#') else f'#{bgcolor}'
style = cell.get('style', '')
bg_match = re.search(r'background(?:-color)?\s*:\s*([^;]+)', style)
if bg_match:
color = bg_match.group(1).strip()
if color.startswith('#'):
return color
rgb_match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color)
if rgb_match:
r, g, b = int(rgb_match.group(1)), int(rgb_match.group(2)), int(rgb_match.group(3))
return f'#{r:02X}{g:02X}{b:02X}'
return None
class HtmlToHwpConverter:
def __init__(self, visible=True):
self.hwp = Hwp(visible=visible)
self.cfg = Config()
self.sp = StyleParser()
self.base_path = ""
self.is_first_h1 = True
self.image_count = 0
self.table_widths = [] #
鍮
蹂
self.style_map = {} # ㅽ
留ㅽ
self.sty_path = None # .sty
def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
def _rgb(self, c):
c = c.lstrip('#')
return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0)
def _setup_page(self):
try:
self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
s = self.hwp.HParameterSet.HSecDef
s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
self.hwp.HAction.Execute("PageSetup", s.HSet)
except: pass
def _create_header(self, right_text=""):
print(f" 癒몃━留
: {right_text if right_text else '(珥湲고)'}")
try:
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#333333')
if right_text:
self.hwp.insert_text(right_text)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [寃쎄 ] 癒몃━留щ━留 踰 ( )
# 瑗щ━留닿린
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHead