- hwp.py: COM/pyhwp 제거, HwpToPdfConverter.exe → hwpx 컨버터 재사용으로 단순화 - hwpx.py, hml.py: 이미지 경로의 공백/대괄호 URL 인코딩(%20, %5B, %5D) 추가 (Obsidian 등 Markdown 뷰어에서 [기본이론] 포함 파일명 이미지 표시 오류 수정) Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
65 lines
2.2 KiB
Python
65 lines
2.2 KiB
Python
#!/usr/bin/env python3
|
|
"""HWP → Markdown (HwpToPdfConverter.exe → HWPX → MD)"""
|
|
from __future__ import annotations
|
|
|
|
import subprocess
|
|
from pathlib import Path
|
|
|
|
_EXE_PATH = Path(__file__).parent.parent / 'HwpToHwpxConverter_260420' / 'HwpToPdfConverter.exe'
|
|
|
|
|
|
def _exe_hwp_to_hwpx(hwp_path: Path, timeout: int = 30) -> Path | None:
|
|
"""exe로 HWP → HWPX 변환. 성공 시 생성된 .hwpx 경로 반환."""
|
|
if not _EXE_PATH.exists():
|
|
print(f' [경고] exe 없음: {_EXE_PATH}')
|
|
return None
|
|
hwpx_path = hwp_path.with_suffix('.hwpx')
|
|
existed_before = hwpx_path.exists()
|
|
try:
|
|
subprocess.run(
|
|
[str(_EXE_PATH), str(hwp_path)],
|
|
timeout=timeout,
|
|
capture_output=True,
|
|
)
|
|
if hwpx_path.exists() and (not existed_before or hwpx_path.stat().st_mtime > hwp_path.stat().st_mtime):
|
|
return hwpx_path
|
|
print(f' [경고] exe 실행 후 .hwpx 파일 없음')
|
|
return None
|
|
except subprocess.TimeoutExpired:
|
|
print(f' [경고] exe 타임아웃 ({timeout}초)')
|
|
return None
|
|
except Exception as e:
|
|
print(f' [경고] exe 오류: {e}')
|
|
return None
|
|
|
|
|
|
def convert_hwp(hwp_path: Path, output_dir: Path) -> dict:
|
|
"""HWP → MD. AGENT_GUIDE 스펙 dict 반환."""
|
|
hwp_path = Path(hwp_path)
|
|
output_dir = Path(output_dir)
|
|
output_dir.mkdir(parents=True, exist_ok=True)
|
|
md_path = output_dir / f'{hwp_path.stem}.md'
|
|
|
|
result = {
|
|
"status": "ok", "input": str(hwp_path),
|
|
"output": str(md_path), "format": "hwp",
|
|
}
|
|
try:
|
|
hwpx_path = _exe_hwp_to_hwpx(hwp_path)
|
|
if hwpx_path:
|
|
from converters.hwpx import convert_hwpx
|
|
r = convert_hwpx(hwpx_path, output_dir)
|
|
if r['status'] == 'ok':
|
|
result['images'] = r.get('images', [])
|
|
return result
|
|
result['error'] = r.get('error', 'hwpx 변환 실패')
|
|
else:
|
|
result['error'] = 'HWP → HWPX 변환 실패 — HwpToPdfConverter.exe 확인 필요'
|
|
|
|
result['status'] = 'error'
|
|
print(f' [경고] HWP 변환 실패: {hwp_path.name}')
|
|
except Exception as e:
|
|
result['status'] = 'error'
|
|
result['error'] = str(e)
|
|
return result
|