#!/usr/bin/env python3 """HTML / HTM → Markdown (html2text, body_width=0)""" from __future__ import annotations from pathlib import Path def convert_html(html_path: Path, output_dir: Path) -> dict: """HTML → MD. AGENT_GUIDE 스펙 dict 반환.""" html_path = Path(html_path) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) md_path = output_dir / f'{html_path.stem}.md' result = { "status": "ok", "input": str(html_path), "output": str(md_path), "format": "html", } try: import html2text h = html2text.HTML2Text() h.body_width = 0 h.ignore_links = False h.ignore_images = False content = html_path.read_text(encoding='utf-8', errors='ignore') md = h.handle(content) md_path.write_text(md, encoding='utf-8') except Exception as e: result['status'] = 'error' result['error'] = str(e) return result