From 46ee059753a3fc932c30e5f9faa5d5d54ad8522a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=EC=9D=B4=EA=B2=BD=EB=AF=BC?= Date: Thu, 19 Mar 2026 09:13:26 +0900 Subject: [PATCH] Upload converters/pipeline/step9_html.py --- .../converters/pipeline/step9_html.py | 103 ++++++++++++++++++ 1 file changed, 103 insertions(+) create mode 100644 03.Code/업로드용/converters/pipeline/step9_html.py diff --git a/03.Code/업로드용/converters/pipeline/step9_html.py b/03.Code/업로드용/converters/pipeline/step9_html.py new file mode 100644 index 0000000..ee603d8 --- /dev/null +++ b/03.Code/업로드용/converters/pipeline/step9_html.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- +from dotenv import load_dotenv +load_dotenv() + +""" +9_md_to_html_publisher.py + +기능: +- 생성된 report_draft.md 파일을 읽어 최종 report.html을 생성합니다. +- 마크다운을 HTML로 변환하며, 지정된 스타일시트(Word/HWP 스타일)를 적용합니다. +- 목차(TOC) 자동 생성 및 링크 기능을 포함합니다. +""" + +import os +import re +import json +from pathlib import Path +from datetime import datetime +from typing import List, Dict, Any, Tuple + +def log(msg: str): + print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}") + +class MarkdownToHtmlConverter: + def __init__(self): + pass + + def convert(self, md_content: str) -> str: + """단순 마크다운 -> HTML 변환 (정규식 기반)""" + html = md_content + + # 헤더 + html = re.sub(r'^#### (.*)$', r'

\1

', html, flags=re.MULTILINE) + html = re.sub(r'^### (.*)$', r'

\1

', html, flags=re.MULTILINE) + html = re.sub(r'^## (.*)$', r'

\1

', html, flags=re.MULTILINE) + html = re.sub(r'^# (.*)$', r'

\1

', html, flags=re.MULTILINE) + + # 강조 + html = re.sub(r'\*\*(.*?)\*\*', r'\1', html) + + # 리스트 + html = re.sub(r'^\s*-\s+(.*)$', r'
  • \1
  • ', html, flags=re.MULTILINE) + + # 줄바꿈 + html = html.replace('\n', '
    \n') + + return html + +def get_html_template(title: str, content: str) -> str: + """최종 HTML 템플릿 적용""" + return f""" + + + + {title} + + + +
    + {content} +
    + +""" + +def main(input_dir, output_dir): + global OUTPUT_ROOT, GEN_DIR + OUTPUT_ROOT = Path(output_dir) + GEN_DIR = OUTPUT_ROOT / "generated" + + md_path = GEN_DIR / "report_draft.md" + out_path = GEN_DIR / "report.html" + + if not md_path.exists(): + log(f"대상 파일 없음: {md_path}") + return + + log("HTML 변환 작업 시작...") + md_content = md_path.read_text(encoding="utf-8") + + # 제목 추출 + title_match = re.search(r'^# (.*)$', md_content, re.MULTILINE) + title = title_match.group(1) if title_match else "보고서" + + converter = MarkdownToHtmlConverter() + html_body = converter.convert(md_content) + + final_html = get_html_template(title, html_body) + + out_path.write_text(final_html, encoding="utf-8") + log(f"최종 HTML 생성 완료: {out_path}") + +if __name__ == "__main__": + main()