#!/usr/bin/env python3
"""HTML / HTM → Markdown (html2text, body_width=0)"""
from __future__ import annotations
from pathlib import Path
def convert_html(html_path: Path, output_dir: Path) -> dict:
"""HTML → MD. AGENT_GUIDE 스펙 dict 반환."""
html_path = Path(html_path)
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
md_path = output_dir / f'{html_path.stem}.md'
result = {
"status": "ok", "input": str(html_path),
"output": str(md_path), "format": "html",
}
try:
import html2text
h = html2text.HTML2Text()
h.body_width = 0
h.ignore_links = False
h.ignore_images = False
content = html_path.read_text(encoding='utf-8', errors='ignore')
md = h.handle(content)
md_path.write_text(md, encoding='utf-8')
except Exception as e:
result['status'] = 'error'
result['error'] = str(e)
return result