Cleanup: Deleting 03.Code/업로드용/converters/html_to_hwp.py
This commit is contained in:
@@ -1,236 +0,0 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
"""
|
||||
HTML HWP 蹂 v11
|
||||
|
||||
|
||||
: sizeoption=0 ( ш린) width/height 吏
|
||||
|
||||
踰: ctrl 肄 諛⑹쇰
|
||||
|
||||
몄 v10 吏
|
||||
|
||||
pip install pyhwpx beautifulsoup4 pillow
|
||||
"""
|
||||
|
||||
from pyhwpx import Hwp
|
||||
from bs4 import BeautifulSoup, NavigableString
|
||||
import os, re
|
||||
|
||||
# ㅽ ㅽ
|
||||
異媛
|
||||
from converters.style_analyzer import StyleAnalyzer, StyledElement
|
||||
from converters.hwp_style_mapping import HwpStyleMapper, DEFAULT_STYLES, ROLE_TO_STYLE_NAME
|
||||
from converters.hwpx_style_injector import HwpxStyleInjector, inject_styles_to_hwpx
|
||||
|
||||
|
||||
# PIL
|
||||
import ( ш린 )
|
||||
try:
|
||||
from PIL import Image
|
||||
HAS_PIL = True
|
||||
except ImportError:
|
||||
HAS_PIL = False
|
||||
print("[由] PIL - ш린濡쎌
|
||||
")
|
||||
|
||||
class Config:
|
||||
MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
|
||||
HEADER_LEN, FOOTER_LEN = 10, 10
|
||||
MAX_IMAGE_WIDTH = 150 # mm (理
|
||||
鍮
|
||||
)
|
||||
ASSETS_PATH = r"D:\for python\geulbeot-light\geulbeot-light\output\assets" # 異媛
|
||||
|
||||
class StyleParser:
|
||||
def __init__(self):
|
||||
self.style_map = {} # ㅽ ( HwpStyle)
|
||||
self.sty_gen = None # ㅽ
|
||||
깃린
|
||||
self.class_styles = {
|
||||
'h1': {'font-size': '20pt', 'color': '#008000'},
|
||||
'h2': {'font-size': '16pt', 'color': '#03581d'},
|
||||
'h3': {'font-size': '13pt', 'color': '#228B22'},
|
||||
'p': {'font-size': '11pt', 'color': '#333333'},
|
||||
'li': {'font-size': '11pt', 'color': '#333333'},
|
||||
'th': {'font-size': '9pt', 'color': '#006400'},
|
||||
'td': {'font-size': '9.5pt', 'color': '#333333'},
|
||||
'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'},
|
||||
'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'},
|
||||
'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'},
|
||||
}
|
||||
|
||||
def get_element_style(self, elem):
|
||||
style = {}
|
||||
tag = elem.name if hasattr(elem, 'name') else None
|
||||
if tag and tag in self.class_styles: style.update(self.class_styles[tag])
|
||||
for cls in elem.get('class', []) if hasattr(elem, 'get') else []:
|
||||
if cls in self.class_styles: style.update(self.class_styles[cls])
|
||||
return style
|
||||
|
||||
def parse_size(self, s):
|
||||
m = re.search(r'([\d.]+)', str(s)) if s else None
|
||||
return float(m.group(1)) if m else 11
|
||||
|
||||
def parse_color(self, c):
|
||||
if not c: return '#000000'
|
||||
c = str(c).strip().lower()
|
||||
if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper()
|
||||
m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c)
|
||||
return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000'
|
||||
|
||||
def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']
|
||||
|
||||
# 嫄 대━
|
||||
# ""
|
||||
'H2': re.compile(r'^(\d+)\.(\d+)\s*'), # "1.1 " ""
|
||||
'H3': re.compile(r'^(\d+)\.(\d+)\.(\d+)\s*'), # "1.1.1 " ""
|
||||
'H4': re.compile(r'^[媛- . " ""
|
||||
'H5': re.compile(r'^(\d+)\)\s*'), # "1) " ""
|
||||
'H6': re.compile(r'^\((\d+)\)\s*'), # "(1) " ""
|
||||
'H7': re.compile(r'^[△™bㅲβ╈㎮ⓥ]\s*'), # " " ""
|
||||
'LIST_ITEM': re.compile(r'^[\-]\s*'), # " " ""
|
||||
}
|
||||
|
||||
def strip_numbering(text: str, role: str) -> str:
|
||||
"""
|
||||
곕
|
||||
ㅽ /湲고 嫄
|
||||
HWP 媛 湲곕μ
|
||||
깊濡 以蹂 諛⑹
|
||||
"""
|
||||
if not text:
|
||||
return text
|
||||
|
||||
pattern = NUMBERING_PATTERNS.get(role)
|
||||
if pattern:
|
||||
return pattern.sub('', text).strip()
|
||||
|
||||
return text.strip()
|
||||
|
||||
#
|
||||
鍮
|
||||
대━ ( 異媛)
|
||||
#
|
||||
鍮
|
||||
臾몄 mm 媛 諛
|
||||
깆
|
||||
width 異異
|
||||
style_match = re.search(r'width\s*:\s*([^;]+)', width_str)
|
||||
if style_match:
|
||||
width_str = style_match.group(1).strip()
|
||||
|
||||
# px mm (96 DPI 湲곗)
|
||||
px_match = re.search(r'([\d.]+)\s*px', width_str)
|
||||
if px_match:
|
||||
return float(px_match.group(1)) * 25.4 / 96
|
||||
|
||||
# mm 洹몃濡
|
||||
mm_match = re.search(r'([\d.]+)\s*mm', width_str)
|
||||
if mm_match:
|
||||
return float(mm_match.group(1))
|
||||
|
||||
# % 蹂몃Ц(170mm) 湲곗 怨
|
||||
|
||||
pct_match = re.search(r'([\d.]+)\s*%', width_str)
|
||||
if pct_match:
|
||||
return float(pct_match.group(1)) * 170 / 100
|
||||
|
||||
# 踰 쇰㈃ px濡 媛
|
||||
二
|
||||
num_match = re.search(r'^([\d.]+)$', width_str)
|
||||
if num_match:
|
||||
return float(num_match.group(1)) * 25.4 / 96
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_align(cell):
|
||||
"""
|
||||
|
||||
"""
|
||||
align = cell.get('align', '').lower()
|
||||
if align in ['left', 'center', 'right']:
|
||||
return align
|
||||
|
||||
style = cell.get('style', '')
|
||||
align_match = re.search(r'text-align\s*:\s*(\w+)', style)
|
||||
if align_match:
|
||||
return align_match.group(1).lower()
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def _parse_bg_color(cell):
|
||||
"""
|
||||
곌꼍
|
||||
bgcolor = cell.get('bgcolor', '')
|
||||
if bgcolor:
|
||||
return bgcolor if bgcolor.startswith('#') else f'#{bgcolor}'
|
||||
|
||||
style = cell.get('style', '')
|
||||
bg_match = re.search(r'background(?:-color)?\s*:\s*([^;]+)', style)
|
||||
if bg_match:
|
||||
color = bg_match.group(1).strip()
|
||||
if color.startswith('#'):
|
||||
return color
|
||||
rgb_match = re.search(r'rgb\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', color)
|
||||
if rgb_match:
|
||||
r, g, b = int(rgb_match.group(1)), int(rgb_match.group(2)), int(rgb_match.group(3))
|
||||
return f'#{r:02X}{g:02X}{b:02X}'
|
||||
|
||||
return None
|
||||
|
||||
|
||||
class HtmlToHwpConverter:
|
||||
def __init__(self, visible=True):
|
||||
self.hwp = Hwp(visible=visible)
|
||||
self.cfg = Config()
|
||||
self.sp = StyleParser()
|
||||
self.base_path = ""
|
||||
self.is_first_h1 = True
|
||||
self.image_count = 0
|
||||
self.table_widths = [] #
|
||||
鍮
|
||||
蹂
|
||||
self.style_map = {} # ㅽ
|
||||
留ㅽ
|
||||
self.sty_path = None # .sty
|
||||
|
||||
def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
|
||||
def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
|
||||
def _rgb(self, c):
|
||||
c = c.lstrip('#')
|
||||
return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0)
|
||||
|
||||
def _setup_page(self):
|
||||
try:
|
||||
self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
|
||||
s = self.hwp.HParameterSet.HSecDef
|
||||
s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
|
||||
s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
|
||||
s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
|
||||
s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
|
||||
s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
|
||||
s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
|
||||
self.hwp.HAction.Execute("PageSetup", s.HSet)
|
||||
except: pass
|
||||
|
||||
def _create_header(self, right_text=""):
|
||||
print(f" 癒몃━留
|
||||
: {right_text if right_text else '(珥湲고)'}")
|
||||
try:
|
||||
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
|
||||
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
|
||||
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
|
||||
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
|
||||
|
||||
self.hwp.HAction.Run("ParagraphShapeAlignRight")
|
||||
self._set_font(9, False, '#333333')
|
||||
if right_text:
|
||||
self.hwp.insert_text(right_text)
|
||||
|
||||
self.hwp.HAction.Run("CloseEx")
|
||||
except Exception as e:
|
||||
print(f" [寃쎄 ] 癒몃━留щ━留 踰 ( )
|
||||
# 瑗щ━留닿린
|
||||
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHead
|
||||
Reference in New Issue
Block a user