📦 Initialize Geulbeot structure and merge Prompts & test projects

This commit is contained in:
2026-03-05 11:32:29 +09:00
commit 555a954458
687 changed files with 205247 additions and 0 deletions

View File

@@ -0,0 +1,573 @@
# -*- coding: utf-8 -*-
"""
HTML → HWP 변환기 v11
✅ 이미지: sizeoption=0 (원본 크기) 또는 width/height 지정
✅ 페이지번호: ctrl 코드 방식으로 수정
✅ 나머지는 v10 유지
pip install pyhwpx beautifulsoup4 pillow
"""
from pyhwpx import Hwp
from bs4 import BeautifulSoup, NavigableString
import os, re
# PIL 선택적 import (이미지 크기 확인용)
try:
from PIL import Image
HAS_PIL = True
except ImportError:
HAS_PIL = False
print("[알림] PIL 없음 - 이미지 원본 크기로 삽입")
class Config:
MARGIN_LEFT, MARGIN_RIGHT, MARGIN_TOP, MARGIN_BOTTOM = 20, 20, 20, 15
HEADER_LEN, FOOTER_LEN = 10, 10
MAX_IMAGE_WIDTH = 150 # mm (최대 이미지 너비)
class StyleParser:
def __init__(self):
self.class_styles = {
'h1': {'font-size': '20pt', 'color': '#008000'},
'h2': {'font-size': '16pt', 'color': '#03581d'},
'h3': {'font-size': '13pt', 'color': '#228B22'},
'p': {'font-size': '11pt', 'color': '#333333'},
'li': {'font-size': '11pt', 'color': '#333333'},
'th': {'font-size': '9pt', 'color': '#006400'},
'td': {'font-size': '9.5pt', 'color': '#333333'},
'toc-lvl-1': {'font-size': '13pt', 'font-weight': '900', 'color': '#006400'},
'toc-lvl-2': {'font-size': '11pt', 'color': '#333333'},
'toc-lvl-3': {'font-size': '10pt', 'color': '#666666'},
}
def get_element_style(self, elem):
style = {}
tag = elem.name if hasattr(elem, 'name') else None
if tag and tag in self.class_styles: style.update(self.class_styles[tag])
for cls in elem.get('class', []) if hasattr(elem, 'get') else []:
if cls in self.class_styles: style.update(self.class_styles[cls])
return style
def parse_size(self, s):
m = re.search(r'([\d.]+)', str(s)) if s else None
return float(m.group(1)) if m else 11
def parse_color(self, c):
if not c: return '#000000'
c = str(c).strip().lower()
if re.match(r'^#[0-9a-fA-F]{6}$', c): return c.upper()
m = re.search(r'rgb[a]?\s*\(\s*(\d+)\s*,\s*(\d+)\s*,\s*(\d+)', c)
return f'#{int(m.group(1)):02X}{int(m.group(2)):02X}{int(m.group(3)):02X}' if m else '#000000'
def is_bold(self, style): return style.get('font-weight', '') in ['bold', '700', '800', '900']
class HtmlToHwpConverter:
def __init__(self, visible=True):
self.hwp = Hwp(visible=visible)
self.cfg = Config()
self.sp = StyleParser()
self.base_path = ""
self.is_first_h1 = True
self.image_count = 0
def _mm(self, mm): return self.hwp.MiliToHwpUnit(mm)
def _pt(self, pt): return self.hwp.PointToHwpUnit(pt)
def _rgb(self, c):
c = c.lstrip('#')
return self.hwp.RGBColor(int(c[0:2],16), int(c[2:4],16), int(c[4:6],16)) if len(c)>=6 else self.hwp.RGBColor(0,0,0)
def _setup_page(self):
try:
self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
s = self.hwp.HParameterSet.HSecDef
s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
self.hwp.HAction.Execute("PageSetup", s.HSet)
except: pass
def _create_header(self, right_text=""):
print(f" → 머리말 생성: {right_text if right_text else '(초기화)'}")
try:
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#333333')
if right_text:
self.hwp.insert_text(right_text)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [경고] 머리말: {e}")
# ═══════════════════════════════════════════════════════════════
# 꼬리말 - 페이지 번호 (수정)
# ═══════════════════════════════════════════════════════════════
def _create_footer(self, left_text=""):
print(f" → 꼬리말: {left_text}")
# 1. 꼬리말 열기
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 1)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
# 2. 좌측 정렬 + 제목 8pt
self.hwp.HAction.Run("ParagraphShapeAlignLeft")
self._set_font(8, False, '#666666')
self.hwp.insert_text(left_text)
# 3. 꼬리말 닫기
self.hwp.HAction.Run("CloseEx")
# 4. 쪽번호 (우측 하단)
self.hwp.HAction.GetDefault("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
self.hwp.HParameterSet.HPageNumPos.DrawPos = self.hwp.PageNumPosition("BottomRight")
self.hwp.HAction.Execute("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
def _new_section_with_header(self, header_text):
"""새 구역 생성 후 머리말 설정"""
print(f" → 새 구역 머리말: {header_text}")
try:
self.hwp.HAction.Run("BreakSection")
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HAction.Run("SelectAll")
self.hwp.HAction.Run("Delete")
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#333333')
self.hwp.insert_text(header_text)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [경고] 구역 머리말: {e}")
def _set_font(self, size=11, bold=False, color='#000000'):
self.hwp.set_font(FaceName='맑은 고딕', Height=size, Bold=bold, TextColor=self._rgb(color))
def _set_para(self, align='justify', lh=170, left=0, indent=0, before=0, after=0):
acts = {'left':'ParagraphShapeAlignLeft','center':'ParagraphShapeAlignCenter',
'right':'ParagraphShapeAlignRight','justify':'ParagraphShapeAlignJustify'}
if align in acts: self.hwp.HAction.Run(acts[align])
try:
self.hwp.HAction.GetDefault("ParagraphShape", self.hwp.HParameterSet.HParaShape.HSet)
p = self.hwp.HParameterSet.HParaShape
p.LineSpaceType, p.LineSpacing = 0, lh
p.LeftMargin = self._mm(left)
p.IndentMargin = self._mm(indent)
p.SpaceBeforePara = self._pt(before)
p.SpaceAfterPara = self._pt(after)
p.BreakNonLatinWord = 0
self.hwp.HAction.Execute("ParagraphShape", p.HSet)
except: pass
def _set_cell_bg(self, color):
try:
self.hwp.HAction.GetDefault("CellBorderFill", self.hwp.HParameterSet.HCellBorderFill.HSet)
p = self.hwp.HParameterSet.HCellBorderFill
p.FillAttr.type = self.hwp.BrushType("NullBrush|WinBrush")
p.FillAttr.WinBrushFaceStyle = self.hwp.HatchStyle("None")
p.FillAttr.WinBrushHatchColor = self._rgb('#000000')
p.FillAttr.WinBrushFaceColor = self._rgb(color)
p.FillAttr.WindowsBrush = 1
self.hwp.HAction.Execute("CellBorderFill", p.HSet)
except: pass
def _underline_box(self, text, size=14, color='#008000'):
try:
self.hwp.HAction.GetDefault("TableCreate", self.hwp.HParameterSet.HTableCreation.HSet)
t = self.hwp.HParameterSet.HTableCreation
t.Rows, t.Cols, t.WidthType, t.HeightType = 1, 1, 0, 0
t.WidthValue, t.HeightValue = self._mm(168), self._mm(10)
self.hwp.HAction.Execute("TableCreate", t.HSet)
self.hwp.HAction.GetDefault("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
self.hwp.HParameterSet.HInsertText.Text = text
self.hwp.HAction.Execute("InsertText", self.hwp.HParameterSet.HInsertText.HSet)
self.hwp.HAction.Run("TableCellBlock")
self.hwp.HAction.GetDefault("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
self.hwp.HParameterSet.HCharShape.Height = self._pt(size)
self.hwp.HParameterSet.HCharShape.TextColor = self._rgb(color)
self.hwp.HAction.Execute("CharShape", self.hwp.HParameterSet.HCharShape.HSet)
self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
c = self.hwp.HParameterSet.HCellBorderFill
c.BorderTypeTop = self.hwp.HwpLineType("None")
c.BorderTypeRight = self.hwp.HwpLineType("None")
c.BorderTypeLeft = self.hwp.HwpLineType("None")
self.hwp.HAction.Execute("CellBorder", c.HSet)
self.hwp.HAction.GetDefault("CellBorder", self.hwp.HParameterSet.HCellBorderFill.HSet)
c = self.hwp.HParameterSet.HCellBorderFill
c.BorderColorBottom = self._rgb(color)
c.BorderWidthBottom = self.hwp.HwpLineWidth("0.4mm")
self.hwp.HAction.Execute("CellBorder", c.HSet)
self.hwp.HAction.Run("Cancel")
self.hwp.HAction.Run("CloseEx")
self.hwp.HAction.Run("MoveDocEnd")
except:
self._set_font(size, True, color)
self.hwp.insert_text(text)
self.hwp.BreakPara()
def _update_header(self, new_title):
"""머리말 텍스트 업데이트"""
try:
# 기존 머리말 편집 모드로 진입
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 2) # 편집 모드
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
# 기존 내용 삭제
self.hwp.HAction.Run("SelectAll")
self.hwp.HAction.Run("Delete")
# 새 내용 삽입
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#333333')
self.hwp.insert_text(new_title)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [경고] 머리말 업데이트: {e}")
def _insert_heading(self, elem):
lv = int(elem.name[1]) if elem.name in ['h1','h2','h3'] else 1
txt = elem.get_text(strip=True)
st = self.sp.get_element_style(elem)
sz = self.sp.parse_size(st.get('font-size','14pt'))
cl = self.sp.parse_color(st.get('color','#008000'))
if lv == 1:
if self.is_first_h1:
self._create_header(txt)
self.is_first_h1 = False
else:
self._new_section_with_header(txt)
self._set_para('left', 130, before=0, after=0)
self._underline_box(txt, sz, cl)
self.hwp.BreakPara()
self._set_para('left', 130, before=0, after=15)
self.hwp.BreakPara()
elif lv == 2:
self._set_para('left', 150, before=20, after=8)
self._set_font(sz, True, cl)
self.hwp.insert_text("" + txt)
self.hwp.BreakPara()
elif lv == 3:
self._set_para('left', 140, left=3, before=12, after=5)
self._set_font(sz, True, cl)
self.hwp.insert_text("" + txt)
self.hwp.BreakPara()
def _insert_paragraph(self, elem):
txt = elem.get_text(strip=True)
if not txt: return
st = self.sp.get_element_style(elem)
sz = self.sp.parse_size(st.get('font-size','11pt'))
cl = self.sp.parse_color(st.get('color','#333333'))
self._set_para('justify', 170, left=0, indent=3, before=0, after=3)
if elem.find(['b','strong']):
for ch in elem.children:
if isinstance(ch, NavigableString):
if str(ch).strip(): self._set_font(sz,False,cl); self.hwp.insert_text(str(ch))
elif ch.name in ['b','strong']:
if ch.get_text(): self._set_font(sz,True,cl); self.hwp.insert_text(ch.get_text())
else:
self._set_font(sz, self.sp.is_bold(st), cl)
self.hwp.insert_text(txt)
self.hwp.BreakPara()
def _insert_list(self, elem):
lt = elem.name
for i, li in enumerate(elem.find_all('li', recursive=False)):
st = self.sp.get_element_style(li)
cls = li.get('class', [])
txt = li.get_text(strip=True)
is_toc = any('toc-' in c for c in cls)
if 'toc-lvl-1' in cls: left, bef = 0, 8
elif 'toc-lvl-2' in cls: left, bef = 7, 3
elif 'toc-lvl-3' in cls: left, bef = 14, 1
else: left, bef = 4, 2
pf = f"{i+1}. " if lt == 'ol' else ""
sz = self.sp.parse_size(st.get('font-size','11pt'))
cl = self.sp.parse_color(st.get('color','#333333'))
bd = self.sp.is_bold(st)
if is_toc:
self._set_para('left', 170, left=left, indent=0, before=bef, after=1)
self._set_font(sz, bd, cl)
self.hwp.insert_text(pf + txt)
self.hwp.BreakPara()
else:
self._set_para('justify', 170, left=left, indent=0, before=bef, after=1)
self._set_font(sz, bd, cl)
self.hwp.insert_text(pf)
self.hwp.HAction.Run("ParagraphShapeIndentAtCaret")
self.hwp.insert_text(txt)
self.hwp.BreakPara()
def _insert_table(self, table_elem):
rows_data, cell_styles, occupied, max_cols = [], {}, {}, 0
for ri, tr in enumerate(table_elem.find_all('tr')):
row, ci = [], 0
for cell in tr.find_all(['td','th']):
while (ri,ci) in occupied: row.append(""); ci+=1
txt = cell.get_text(strip=True)
cs, rs = int(cell.get('colspan',1)), int(cell.get('rowspan',1))
cell_styles[(ri,ci)] = {'is_header': cell.name=='th' or ri==0}
row.append(txt)
for dr in range(rs):
for dc in range(cs):
if dr>0 or dc>0: occupied[(ri+dr,ci+dc)] = True
for _ in range(cs-1): row.append("")
ci += cs
rows_data.append(row)
max_cols = max(max_cols, len(row))
for row in rows_data:
while len(row) < max_cols: row.append("")
rc = len(rows_data)
if rc == 0 or max_cols == 0: return
print(f" 표: {rc}× {max_cols}")
self._set_para('left', 130, before=5, after=0)
self.hwp.create_table(rc, max_cols, treat_as_char=True)
for ri, row in enumerate(rows_data):
for ci in range(max_cols):
if (ri,ci) in occupied: self.hwp.HAction.Run("MoveRight"); continue
txt = row[ci] if ci < len(row) else ""
hdr = cell_styles.get((ri,ci),{}).get('is_header', False)
if hdr: self._set_cell_bg('#E8F5E9')
self.hwp.HAction.Run("ParagraphShapeAlignCenter")
self._set_font(9 if hdr else 9.5, hdr, '#006400' if hdr else '#333333')
self.hwp.insert_text(str(txt))
if not (ri==rc-1 and ci==max_cols-1): self.hwp.HAction.Run("MoveRight")
self.hwp.HAction.Run("Cancel")
self.hwp.HAction.Run("CloseEx")
self.hwp.HAction.Run("MoveDocEnd")
self._set_para('left', 130, before=5, after=5)
self.hwp.BreakPara()
# ═══════════════════════════════════════════════════════════════
# 이미지 삽입 - sizeoption 수정 ★
# ═══════════════════════════════════════════════════════════════
def _insert_image(self, src, caption=""):
self.image_count += 1
print(f" 📷 이미지 #{self.image_count}: {os.path.basename(src)}")
if not src:
return
# 상대경로 → 절대경로
if not os.path.isabs(src):
full_path = os.path.normpath(os.path.join(self.base_path, src))
else:
full_path = src
if not os.path.exists(full_path):
print(f" ❌ 파일 없음: {full_path}")
self._set_font(9, False, '#999999')
self._set_para('center', 130)
self.hwp.insert_text(f"[이미지 없음: {os.path.basename(src)}]")
self.hwp.BreakPara()
return
try:
self._set_para('center', 130, before=5, after=3)
# ★ sizeoption=0: 원본 크기
# ★ sizeoption=2: 지정 크기 (width, height 필요)
# ★ 둘 다 안되면 sizeoption 없이 시도
inserted = False
# 방법 1: sizeoption=0 (원본 크기)
try:
self.hwp.insert_picture(full_path, sizeoption=0)
inserted = True
print(f" ✅ 삽입 성공 (원본 크기)")
except Exception as e1:
pass
# 방법 2: width/height 지정
if not inserted and HAS_PIL:
try:
with Image.open(full_path) as img:
w_px, h_px = img.size
# px → mm 변환 (96 DPI 기준)
w_mm = w_px * 25.4 / 96
h_mm = h_px * 25.4 / 96
# 최대 너비 제한
if w_mm > self.cfg.MAX_IMAGE_WIDTH:
ratio = self.cfg.MAX_IMAGE_WIDTH / w_mm
w_mm = self.cfg.MAX_IMAGE_WIDTH
h_mm = h_mm * ratio
self.hwp.insert_picture(full_path, sizeoption=1,
width=self._mm(w_mm), height=self._mm(h_mm))
inserted = True
print(f" ✅ 삽입 성공 ({w_mm:.0f}×{h_mm:.0f}mm)")
except Exception as e2:
pass
# 방법 3: 기본값
if not inserted:
try:
self.hwp.insert_picture(full_path)
inserted = True
print(f" ✅ 삽입 성공 (기본)")
except Exception as e3:
print(f" ❌ 삽입 실패: {e3}")
self._set_font(9, False, '#FF0000')
self.hwp.insert_text(f"[이미지 오류: {os.path.basename(src)}]")
self.hwp.BreakPara()
if caption and inserted:
self._set_font(9.5, True, '#666666')
self._set_para('center', 130, before=0, after=5)
self.hwp.insert_text(caption)
self.hwp.BreakPara()
except Exception as e:
print(f" ❌ 오류: {e}")
def _insert_highlight_box(self, elem):
txt = elem.get_text(strip=True)
if not txt: return
self._set_para('left', 130, before=5, after=0)
self.hwp.create_table(1, 1, treat_as_char=True)
self._set_cell_bg('#E2ECE2')
self._set_font(11, False, '#333333')
self.hwp.insert_text(txt)
self.hwp.HAction.Run("Cancel")
self.hwp.HAction.Run("CloseEx")
self.hwp.HAction.Run("MoveDocEnd")
self._set_para('left', 130, before=0, after=5)
self.hwp.BreakPara()
def _process(self, elem):
if isinstance(elem, NavigableString): return
tag = elem.name
if not tag or tag in ['script','style','template','noscript','head']: return
if tag == 'figure':
img = elem.find('img')
if img:
figcaption = elem.find('figcaption')
caption = figcaption.get_text(strip=True) if figcaption else ""
self._insert_image(img.get('src', ''), caption)
return
if tag == 'img':
self._insert_image(elem.get('src', ''))
return
if tag in ['h1','h2','h3']: self._insert_heading(elem)
elif tag == 'p': self._insert_paragraph(elem)
elif tag == 'table': self._insert_table(elem)
elif tag in ['ul','ol']: self._insert_list(elem)
elif 'highlight-box' in elem.get('class',[]): self._insert_highlight_box(elem)
elif tag in ['div','section','article','main','body','html','span']:
for ch in elem.children: self._process(ch)
def convert(self, html_path, output_path):
print("="*60)
print("HTML → HWP 변환기 v11")
print(" ✓ 이미지: sizeoption 수정")
print(" ✓ 페이지번호: 다중 방법 시도")
print("="*60)
self.base_path = os.path.dirname(os.path.abspath(html_path))
self.is_first_h1 = True
self.image_count = 0
print(f"\n입력: {html_path}")
print(f"출력: {output_path}\n")
with open(html_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
title_tag = soup.find('title')
if title_tag:
full_title = title_tag.get_text(strip=True)
footer_title = full_title.split(':')[0].strip() # ":" 이전
else:
footer_title = ""
self.hwp.FileNew()
self._setup_page()
self._create_footer(footer_title)
raw = soup.find(id='raw-container')
if raw:
cover = raw.find(id='box-cover')
if cover:
print(" → 표지")
for ch in cover.children: self._process(ch)
self.hwp.HAction.Run("BreakPage")
toc = raw.find(id='box-toc')
if toc:
print(" → 목차")
self.is_first_h1 = True
self._underline_box("목 차", 20, '#008000')
self.hwp.BreakPara(); self.hwp.BreakPara()
self._insert_list(toc.find('ul') or toc)
self.hwp.HAction.Run("BreakPage")
summary = raw.find(id='box-summary')
if summary:
print(" → 요약")
self.is_first_h1 = True
self._process(summary)
self.hwp.HAction.Run("BreakPage")
content = raw.find(id='box-content')
if content:
print(" → 본문")
self.is_first_h1 = True
self._process(content)
else:
self._process(soup.find('body') or soup)
self.hwp.SaveAs(output_path)
print(f"\n✅ 저장: {output_path}")
print(f" 이미지: {self.image_count}개 처리")
def close(self):
try: self.hwp.Quit()
except: pass
def main():
html_path = r"D:\for python\survey_test\output\generated\report.html"
output_path = r"D:\for python\survey_test\output\generated\report_v12.hwp"
try:
conv = HtmlToHwpConverter(visible=True)
conv.convert(html_path, output_path)
input("\nEnter를 누르면 HWP가 닫힙니다...") # ← 선택사항
conv.close()
except Exception as e:
print(f"\n[에러] {e}")
import traceback; traceback.print_exc()
if __name__ == "__main__":
main()