#!/usr/bin/env python3 """HWP → Markdown (HwpToPdfConverter.exe → HWPX → MD)""" from __future__ import annotations import subprocess from pathlib import Path _EXE_PATH = Path(__file__).parent.parent / 'HwpToHwpxConverter_260420' / 'HwpToPdfConverter.exe' def _exe_hwp_to_hwpx(hwp_path: Path, timeout: int = 30) -> Path | None: """exe로 HWP → HWPX 변환. 성공 시 생성된 .hwpx 경로 반환.""" if not _EXE_PATH.exists(): print(f' [경고] exe 없음: {_EXE_PATH}') return None hwpx_path = hwp_path.with_suffix('.hwpx') existed_before = hwpx_path.exists() try: subprocess.run( [str(_EXE_PATH), str(hwp_path)], timeout=timeout, capture_output=True, ) if hwpx_path.exists() and (not existed_before or hwpx_path.stat().st_mtime > hwp_path.stat().st_mtime): return hwpx_path print(f' [경고] exe 실행 후 .hwpx 파일 없음') return None except subprocess.TimeoutExpired: print(f' [경고] exe 타임아웃 ({timeout}초)') return None except Exception as e: print(f' [경고] exe 오류: {e}') return None def convert_hwp(hwp_path: Path, output_dir: Path) -> dict: """HWP → MD. AGENT_GUIDE 스펙 dict 반환.""" hwp_path = Path(hwp_path) output_dir = Path(output_dir) output_dir.mkdir(parents=True, exist_ok=True) md_path = output_dir / f'{hwp_path.stem}.md' result = { "status": "ok", "input": str(hwp_path), "output": str(md_path), "format": "hwp", } try: hwpx_path = _exe_hwp_to_hwpx(hwp_path) if hwpx_path: from converters.hwpx import convert_hwpx r = convert_hwpx(hwpx_path, output_dir) if r['status'] == 'ok': result['images'] = r.get('images', []) return result result['error'] = r.get('error', 'hwpx 변환 실패') else: result['error'] = 'HWP → HWPX 변환 실패 — HwpToPdfConverter.exe 확인 필요' result['status'] = 'error' print(f' [경고] HWP 변환 실패: {hwp_path.name}') except Exception as e: result['status'] = 'error' result['error'] = str(e) return result