Files
test/converters/html_to_hwp_briefing.py
2026-02-20 11:34:02 +09:00

616 lines
24 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
# -*- coding: utf-8 -*-
"""
HTML → HWP 변환기 (기획서 전용)
✅ 머리말/꼬리말: 보고서 방식 적용 (페이지 번호 포함)
✅ lead-box, section, data-table, strategy-grid, qa-grid, bottom-box 지원
✅ process-container (단계별 프로세스) 지원
✅ badge 스타일 텍스트 변환
✅ Navy 색상 테마
pip install pyhwpx beautifulsoup4
"""
from pyhwpx import Hwp
from bs4 import BeautifulSoup
import os
class Config:
"""페이지 설정"""
PAGE_WIDTH = 210
PAGE_HEIGHT = 297
MARGIN_LEFT = 20
MARGIN_RIGHT = 20
MARGIN_TOP = 20
MARGIN_BOTTOM = 15
HEADER_LEN = 10
FOOTER_LEN = 10
CONTENT_WIDTH = 170
class HtmlToHwpConverter:
"""HTML → HWP 변환기 (기획서 전용)"""
def __init__(self, visible=True):
self.hwp = Hwp(visible=visible)
self.cfg = Config()
self.colors = {}
self.is_first_h1 = True
# ─────────────────────────────────────────────────────────
# 초기화 및 유틸리티
# ─────────────────────────────────────────────────────────
def _init_colors(self):
"""색상 팔레트 초기화 (Navy 계열)"""
self.colors = {
'primary-navy': self.hwp.RGBColor(26, 54, 93), # #1a365d
'secondary-navy': self.hwp.RGBColor(44, 82, 130), # #2c5282
'accent-navy': self.hwp.RGBColor(49, 130, 206), # #3182ce
'dark-gray': self.hwp.RGBColor(45, 55, 72), # #2d3748
'medium-gray': self.hwp.RGBColor(74, 85, 104), # #4a5568
'light-gray': self.hwp.RGBColor(226, 232, 240), # #e2e8f0
'bg-light': self.hwp.RGBColor(247, 250, 252), # #f7fafc
'border-color': self.hwp.RGBColor(203, 213, 224), # #cbd5e0
'badge-safe': self.hwp.RGBColor(30, 111, 63), # #1e6f3f
'badge-caution': self.hwp.RGBColor(154, 91, 19), # #9a5b13
'badge-risk': self.hwp.RGBColor(161, 43, 43), # #a12b2b
'white': self.hwp.RGBColor(255, 255, 255),
'black': self.hwp.RGBColor(0, 0, 0),
}
def _mm(self, mm):
"""밀리미터를 HWP 단위로 변환"""
return self.hwp.MiliToHwpUnit(mm)
def _pt(self, pt):
"""포인트를 HWP 단위로 변환"""
return self.hwp.PointToHwpUnit(pt)
def _rgb(self, hex_color):
"""HEX 색상을 RGB로 변환"""
c = hex_color.lstrip('#')
return self.hwp.RGBColor(int(c[0:2], 16), int(c[2:4], 16), int(c[4:6], 16)) if len(c) >= 6 else self.hwp.RGBColor(0, 0, 0)
def _font(self, size=10, color='black', bold=False):
"""폰트 설정 (색상 이름 사용)"""
self.hwp.set_font(
FaceName='맑은 고딕',
Height=size,
Bold=bold,
TextColor=self.colors.get(color, self.colors['black'])
)
def _set_font(self, size=11, bold=False, hex_color='#000000'):
"""폰트 설정 (HEX 색상 사용)"""
self.hwp.set_font(
FaceName='맑은 고딕',
Height=size,
Bold=bold,
TextColor=self._rgb(hex_color)
)
def _align(self, align):
"""정렬 설정"""
actions = {
'left': 'ParagraphShapeAlignLeft',
'center': 'ParagraphShapeAlignCenter',
'right': 'ParagraphShapeAlignRight',
'justify': 'ParagraphShapeAlignJustify',
}
if align in actions:
self.hwp.HAction.Run(actions[align])
def _para(self, text='', size=10, color='black', bold=False, align='left'):
"""문단 삽입"""
self._align(align)
self._font(size, color, bold)
if text:
self.hwp.insert_text(text)
self.hwp.BreakPara()
def _exit_table(self):
"""표 편집 모드 종료"""
self.hwp.HAction.Run("Cancel")
self.hwp.HAction.Run("CloseEx")
self.hwp.HAction.Run("MoveDocEnd")
self.hwp.BreakPara()
def _setup_page(self):
"""페이지 설정"""
try:
self.hwp.HAction.GetDefault("PageSetup", self.hwp.HParameterSet.HSecDef.HSet)
s = self.hwp.HParameterSet.HSecDef
s.PageDef.LeftMargin = self._mm(self.cfg.MARGIN_LEFT)
s.PageDef.RightMargin = self._mm(self.cfg.MARGIN_RIGHT)
s.PageDef.TopMargin = self._mm(self.cfg.MARGIN_TOP)
s.PageDef.BottomMargin = self._mm(self.cfg.MARGIN_BOTTOM)
s.PageDef.HeaderLen = self._mm(self.cfg.HEADER_LEN)
s.PageDef.FooterLen = self._mm(self.cfg.FOOTER_LEN)
self.hwp.HAction.Execute("PageSetup", s.HSet)
print(f"[설정] 여백: 좌우 {self.cfg.MARGIN_LEFT}mm, 상 {self.cfg.MARGIN_TOP}mm, 하 {self.cfg.MARGIN_BOTTOM}mm")
except Exception as e:
print(f"[경고] 페이지 설정 실패: {e}")
# ─────────────────────────────────────────────────────────
# 머리말 / 꼬리말 (보고서 방식)
# ─────────────────────────────────────────────────────────
def _create_header(self, right_text=""):
"""머리말 생성 (우측 정렬)"""
print(f" → 머리말 생성: {right_text if right_text else '(초기화)'}")
try:
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#4a5568')
if right_text:
self.hwp.insert_text(right_text)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [경고] 머리말: {e}")
def _create_footer(self, left_text=""):
"""꼬리말 생성 (좌측 텍스트 + 우측 페이지 번호)"""
print(f" → 꼬리말: {left_text}")
# 1. 꼬리말 열기
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 1)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
# 2. 좌측 정렬 + 제목 8pt
self.hwp.HAction.Run("ParagraphShapeAlignLeft")
self._set_font(8, False, '#4a5568')
self.hwp.insert_text(left_text)
# 3. 꼬리말 닫기
self.hwp.HAction.Run("CloseEx")
# 4. 쪽번호 (우측 하단)
self.hwp.HAction.GetDefault("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
self.hwp.HParameterSet.HPageNumPos.DrawPos = self.hwp.PageNumPosition("BottomRight")
self.hwp.HAction.Execute("PageNumPos", self.hwp.HParameterSet.HPageNumPos.HSet)
def _new_section_with_header(self, header_text):
"""새 구역 생성 후 머리말 설정"""
print(f" → 새 구역 머리말: {header_text}")
try:
self.hwp.HAction.Run("BreakSection")
self.hwp.HAction.GetDefault("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterStyle", 0)
self.hwp.HParameterSet.HHeaderFooter.HSet.SetItem("HeaderFooterCtrlType", 0)
self.hwp.HAction.Execute("HeaderFooter", self.hwp.HParameterSet.HHeaderFooter.HSet)
self.hwp.HAction.Run("SelectAll")
self.hwp.HAction.Run("Delete")
self.hwp.HAction.Run("ParagraphShapeAlignRight")
self._set_font(9, False, '#4a5568')
self.hwp.insert_text(header_text)
self.hwp.HAction.Run("CloseEx")
except Exception as e:
print(f" [경고] 구역 머리말: {e}")
# ─────────────────────────────────────────────────────────
# 셀 배경색 설정
# ─────────────────────────────────────────────────────────
def _set_cell_bg(self, color_name):
"""셀 배경색 설정 (색상 이름)"""
self.hwp.HAction.GetDefault("CellBorderFill", self.hwp.HParameterSet.HCellBorderFill.HSet)
pset = self.hwp.HParameterSet.HCellBorderFill
pset.FillAttr.type = self.hwp.BrushType("NullBrush|WinBrush")
pset.FillAttr.WinBrushFaceStyle = self.hwp.HatchStyle("None")
pset.FillAttr.WinBrushHatchColor = self.hwp.RGBColor(0, 0, 0)
pset.FillAttr.WinBrushFaceColor = self.colors.get(color_name, self.colors['white'])
pset.FillAttr.WindowsBrush = 1
self.hwp.HAction.Execute("CellBorderFill", pset.HSet)
# ─────────────────────────────────────────────────────────
# HTML 요소 변환 (기획서 전용)
# ─────────────────────────────────────────────────────────
def _convert_lead_box(self, elem):
"""lead-box 변환 (핵심 기조 박스)"""
content = elem.find("div")
if not content:
return
text = content.get_text(strip=True)
text = ' '.join(text.split())
print(f" → lead-box")
self.hwp.create_table(1, 1, treat_as_char=True)
self._set_cell_bg('bg-light')
self._font(11.5, 'dark-gray', False)
self.hwp.insert_text(text)
self._exit_table()
def _convert_strategy_grid(self, elem):
"""strategy-grid 변환 (2x2 전략 박스)"""
items = elem.find_all(class_="strategy-item")
if not items:
return
print(f" → strategy-grid: {len(items)} items")
self.hwp.create_table(2, 2, treat_as_char=True)
for i, item in enumerate(items[:4]):
if i > 0:
self.hwp.HAction.Run("MoveRight")
self._set_cell_bg('bg-light')
title = item.find(class_="strategy-title")
if title:
self._font(10, 'primary-navy', True)
self.hwp.insert_text(title.get_text(strip=True))
self.hwp.BreakPara()
p = item.find("p")
if p:
self._font(9.5, 'dark-gray', False)
self.hwp.insert_text(p.get_text(strip=True))
self._exit_table()
def _convert_process_container(self, elem):
"""process-container 변환 (단계별 프로세스)"""
steps = elem.find_all(class_="process-step")
if not steps:
return
print(f" → process-container: {len(steps)} steps")
rows = len(steps)
self.hwp.create_table(rows, 2, treat_as_char=True)
for i, step in enumerate(steps):
if i > 0:
self.hwp.HAction.Run("MoveRight")
# 번호 셀
num = step.find(class_="step-num")
self._set_cell_bg('primary-navy')
self._font(10, 'white', True)
self._align('center')
if num:
self.hwp.insert_text(num.get_text(strip=True))
self.hwp.HAction.Run("MoveRight")
# 내용 셀
content = step.find(class_="step-content")
self._set_cell_bg('bg-light')
self._font(10.5, 'dark-gray', False)
self._align('left')
if content:
self.hwp.insert_text(content.get_text(strip=True))
self._exit_table()
def _convert_data_table(self, table):
"""data-table 변환 (badge 포함)"""
data = []
thead = table.find("thead")
if thead:
ths = thead.find_all("th")
data.append([th.get_text(strip=True) for th in ths])
tbody = table.find("tbody")
if tbody:
for tr in tbody.find_all("tr"):
row = []
for td in tr.find_all("td"):
badge = td.find(class_="badge")
if badge:
badge_class = ' '.join(badge.get('class', []))
badge_text = badge.get_text(strip=True)
if 'badge-safe' in badge_class:
row.append(f"[✓ {badge_text}]")
elif 'badge-caution' in badge_class:
row.append(f"[△ {badge_text}]")
elif 'badge-risk' in badge_class:
row.append(f"[✗ {badge_text}]")
else:
row.append(f"[{badge_text}]")
else:
row.append(td.get_text(strip=True))
data.append(row)
if not data:
return
rows = len(data)
cols = len(data[0]) if data else 0
print(f" → data-table: {rows}×{cols}")
self.hwp.create_table(rows, cols, treat_as_char=True)
for row_idx, row in enumerate(data):
for col_idx, cell_text in enumerate(row):
is_header = (row_idx == 0)
is_first_col = (col_idx == 0 and not is_header)
is_safe = '[✓' in str(cell_text)
is_caution = '[△' in str(cell_text)
is_risk = '[✗' in str(cell_text)
if is_header:
self._set_cell_bg('primary-navy')
self._font(9, 'white', True)
elif is_first_col:
self._set_cell_bg('bg-light')
self._font(9.5, 'primary-navy', True)
elif is_safe:
self._font(9.5, 'badge-safe', True)
elif is_caution:
self._font(9.5, 'badge-caution', True)
elif is_risk:
self._font(9.5, 'badge-risk', True)
else:
self._font(9.5, 'dark-gray', False)
self._align('center')
self.hwp.insert_text(str(cell_text))
if not (row_idx == rows - 1 and col_idx == cols - 1):
self.hwp.HAction.Run("MoveRight")
self._exit_table()
def _convert_qa_grid(self, elem):
"""qa-grid 변환 (Q&A 2단 박스)"""
items = elem.find_all(class_="qa-item")
if not items:
return
print(f" → qa-grid: {len(items)} items")
self.hwp.create_table(1, 2, treat_as_char=True)
for i, item in enumerate(items[:2]):
if i > 0:
self.hwp.HAction.Run("MoveRight")
self._set_cell_bg('bg-light')
text = item.get_text(strip=True)
strong = item.find("strong")
if strong:
q_text = strong.get_text(strip=True)
a_text = text.replace(q_text, '').strip()
self._font(9.5, 'primary-navy', True)
self.hwp.insert_text(q_text)
self.hwp.BreakPara()
self._font(9.5, 'dark-gray', False)
self.hwp.insert_text(a_text)
else:
self._font(9.5, 'dark-gray', False)
self.hwp.insert_text(text)
self._exit_table()
def _convert_bottom_box(self, elem):
"""bottom-box 변환 (핵심 결론 박스)"""
left = elem.find(class_="bottom-left")
right = elem.find(class_="bottom-right")
if not left or not right:
return
left_text = ' '.join(left.get_text().split())
right_text = right.get_text(strip=True)
print(f" → bottom-box")
self.hwp.create_table(1, 2, treat_as_char=True)
# 좌측 (Navy 배경)
self._set_cell_bg('primary-navy')
self._font(10.5, 'white', True)
self._align('center')
self.hwp.insert_text(left_text)
self.hwp.HAction.Run("MoveRight")
# 우측 (연한 배경)
self._set_cell_bg('bg-light')
self._font(10.5, 'primary-navy', True)
self._align('center')
self.hwp.insert_text(right_text)
self._exit_table()
def _convert_section(self, section):
"""section 변환"""
title = section.find(class_="section-title")
if title:
self._para("" + title.get_text(strip=True), 12, 'primary-navy', True)
strategy_grid = section.find(class_="strategy-grid")
if strategy_grid:
self._convert_strategy_grid(strategy_grid)
process = section.find(class_="process-container")
if process:
self._convert_process_container(process)
table = section.find("table", class_="data-table")
if table:
self._convert_data_table(table)
ul = section.find("ul")
if ul:
for li in ul.find_all("li", recursive=False):
keyword = li.find(class_="keyword")
if keyword:
kw_text = keyword.get_text(strip=True)
full = li.get_text(strip=True)
rest = full.replace(kw_text, '', 1).strip()
self._font(10.5, 'primary-navy', True)
self.hwp.insert_text("" + kw_text + " ")
self._font(10.5, 'dark-gray', False)
self.hwp.insert_text(rest)
self.hwp.BreakPara()
else:
self._para("" + li.get_text(strip=True), 10.5, 'dark-gray')
qa_grid = section.find(class_="qa-grid")
if qa_grid:
self._convert_qa_grid(qa_grid)
self._para()
def _convert_sheet(self, sheet, is_first_page=False, footer_title=""):
"""한 페이지(sheet) 변환"""
# 첫 페이지에서만 머리말/꼬리말 설정
if is_first_page:
# 머리말: page-header에서 텍스트 추출
header = sheet.find(class_="page-header")
if header:
left = header.find(class_="header-left")
right = header.find(class_="header-right")
# 우측 텍스트 사용 (부서명 등)
header_text = right.get_text(strip=True) if right else ""
if header_text:
self._create_header(header_text)
# 꼬리말: 제목 + 페이지번호
self._create_footer(footer_title)
# 대제목
title = sheet.find(class_="header-title")
if title:
title_text = title.get_text(strip=True)
if '[첨부]' in title_text:
self._para(title_text, 15, 'primary-navy', True, 'left')
self._font(10, 'secondary-navy', False)
self._align('left')
self.hwp.insert_text("" * 60)
self.hwp.BreakPara()
else:
self._para(title_text, 23, 'primary-navy', True, 'center')
self._font(10, 'secondary-navy', False)
self._align('center')
self.hwp.insert_text("" * 45)
self.hwp.BreakPara()
self._para()
# 리드 박스
lead_box = sheet.find(class_="lead-box")
if lead_box:
self._convert_lead_box(lead_box)
self._para()
# 섹션들
for section in sheet.find_all(class_="section"):
self._convert_section(section)
# 하단 박스
bottom_box = sheet.find(class_="bottom-box")
if bottom_box:
self._para()
self._convert_bottom_box(bottom_box)
# ─────────────────────────────────────────────────────────
# 메인 변환 함수
# ─────────────────────────────────────────────────────────
def convert(self, html_path, output_path):
"""HTML → HWP 변환 실행"""
print("=" * 60)
print("HTML → HWP 변환기 (기획서 전용)")
print(" ✓ 머리말/꼬리말: 보고서 방식")
print(" ✓ Navy 테마, 기획서 요소")
print("=" * 60)
print(f"\n[입력] {html_path}")
with open(html_path, 'r', encoding='utf-8') as f:
soup = BeautifulSoup(f.read(), 'html.parser')
# 제목 추출 (꼬리말용)
title_tag = soup.find('title')
if title_tag:
full_title = title_tag.get_text(strip=True)
footer_title = full_title.split(':')[0].strip()
else:
footer_title = ""
self.hwp.FileNew()
self._init_colors()
self._setup_page()
# 페이지별 변환
sheets = soup.find_all(class_="sheet")
total = len(sheets)
print(f"[변환] 총 {total} 페이지\n")
for i, sheet in enumerate(sheets, 1):
print(f"[{i}/{total}] 페이지 처리 중...")
self._convert_sheet(sheet, is_first_page=(i == 1), footer_title=footer_title)
if i < total:
self.hwp.HAction.Run("BreakPage")
# 저장
self.hwp.SaveAs(output_path)
print(f"\n✅ 저장 완료: {output_path}")
def close(self):
"""HWP 종료"""
try:
self.hwp.Quit()
except:
pass
def main():
"""메인 실행"""
html_path = r"D:\for python\geulbeot-light\geulbeot-light\output\briefing.html"
output_path = r"D:\for python\geulbeot-light\geulbeot-light\output\briefing.hwp"
print("=" * 60)
print("HTML → HWP 변환기 (기획서)")
print("=" * 60)
print()
try:
converter = HtmlToHwpConverter(visible=True)
converter.convert(html_path, output_path)
print("\n" + "=" * 60)
print("✅ 변환 완료!")
print("=" * 60)
input("\nEnter를 누르면 HWP가 닫힙니다...")
converter.close()
except FileNotFoundError:
print(f"\n[에러] 파일을 찾을 수 없습니다: {html_path}")
print("경로를 확인해주세요.")
except Exception as e:
print(f"\n[에러] {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
main()