Files
llm-gateway-sub-backup/workspace/utils/image_converter.py
2025-08-11 18:56:38 +09:00

36 lines
1.1 KiB
Python

import io
from pathlib import Path
from typing import List
import httpx
async def prepare_images_from_file(
file_url: str, filename: str, max_pages: int = 5, dpi: int = 180
) -> List[bytes]:
"""presigned URL → bytes. PDF이면 앞쪽 max_pages 페이지만 이미지로 변환하여 bytes 리스트 반환"""
async with httpx.AsyncClient() as client:
resp = await client.get(file_url, timeout=None)
resp.raise_for_status()
file_bytes = resp.content
ext = Path(filename).suffix.lower()
if ext in [".pdf", ".tif", ".tiff"]:
try:
from pdf2image import convert_from_bytes
except ImportError as e:
raise RuntimeError(
"pdf2image가 필요합니다. `pip install pdf2image poppler-utils`"
) from e
pil_images = convert_from_bytes(file_bytes, dpi=dpi)
images = []
for i, im in enumerate(pil_images[:max_pages]):
buf = io.BytesIO()
im.save(buf, format="PNG")
images.append(buf.getvalue())
return images
else:
# 단일 이미지
return [file_bytes]