Upload converters/pipeline/step9_html.py

This commit is contained in:
2026-03-19 09:13:26 +09:00
parent 3254de425d
commit 46ee059753

View File

@@ -0,0 +1,103 @@
# -*- coding: utf-8 -*-
from dotenv import load_dotenv
load_dotenv()
"""
9_md_to_html_publisher.py
기능:
- 생성된 report_draft.md 파일을 읽어 최종 report.html을 생성합니다.
- 마크다운을 HTML로 변환하며, 지정된 스타일시트(Word/HWP 스타일)를 적용합니다.
- 목차(TOC) 자동 생성 및 링크 기능을 포함합니다.
"""
import os
import re
import json
from pathlib import Path
from datetime import datetime
from typing import List, Dict, Any, Tuple
def log(msg: str):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}")
class MarkdownToHtmlConverter:
def __init__(self):
pass
def convert(self, md_content: str) -> str:
"""단순 마크다운 -> HTML 변환 (정규식 기반)"""
html = md_content
# 헤더
html = re.sub(r'^#### (.*)$', r'<h4>\1</h4>', html, flags=re.MULTILINE)
html = re.sub(r'^### (.*)$', r'<h3>\1</h3>', html, flags=re.MULTILINE)
html = re.sub(r'^## (.*)$', r'<h2>\1</h2>', html, flags=re.MULTILINE)
html = re.sub(r'^# (.*)$', r'<h1>\1</h1>', html, flags=re.MULTILINE)
# 강조
html = re.sub(r'\*\*(.*?)\*\*', r'<strong>\1</strong>', html)
# 리스트
html = re.sub(r'^\s*-\s+(.*)$', r'<li>\1</li>', html, flags=re.MULTILINE)
# 줄바꿈
html = html.replace('\n', '<br>\n')
return html
def get_html_template(title: str, content: str) -> str:
"""최종 HTML 템플릿 적용"""
return f"""<!DOCTYPE html>
<html lang="ko">
<head>
<meta charset="UTF-8">
<title>{title}</title>
<style>
@import url('https://fonts.googleapis.com/css2?family=Noto+Sans+KR:wght@300;400;500;700;900&display=swap');
body {{ font-family: 'Noto Sans KR', sans-serif; line-height: 1.6; padding: 50px; max-width: 900px; margin: auto; color: #333; }}
h1 {{ color: #1a365d; border-bottom: 3px solid #1a365d; padding-bottom: 10px; text-align: center; }}
h2 {{ color: #2c5282; border-left: 5px solid #2c5282; padding-left: 15px; margin-top: 40px; }}
h3 {{ color: #2b6cb0; margin-top: 30px; }}
h4 {{ color: #4a5568; background: #edf2f7; padding: 8px 15px; border-radius: 5px; }}
strong {{ color: #2d3748; }}
.content {{ margin-top: 30px; }}
@media print {{ body {{ padding: 0; }} .no-print {{ display: none; }} }}
</style>
</head>
<body>
<div class="content">
{content}
</div>
</body>
</html>"""
def main(input_dir, output_dir):
global OUTPUT_ROOT, GEN_DIR
OUTPUT_ROOT = Path(output_dir)
GEN_DIR = OUTPUT_ROOT / "generated"
md_path = GEN_DIR / "report_draft.md"
out_path = GEN_DIR / "report.html"
if not md_path.exists():
log(f"대상 파일 없음: {md_path}")
return
log("HTML 변환 작업 시작...")
md_content = md_path.read_text(encoding="utf-8")
# 제목 추출
title_match = re.search(r'^# (.*)$', md_content, re.MULTILINE)
title = title_match.group(1) if title_match else "보고서"
converter = MarkdownToHtmlConverter()
html_body = converter.convert(md_content)
final_html = get_html_template(title, html_body)
out_path.write_text(final_html, encoding="utf-8")
log(f"최종 HTML 생성 완료: {out_path}")
if __name__ == "__main__":
main()