원 레포랑 완전 분리

This commit is contained in:
ai-cell-a100-1
2025-08-11 18:56:38 +09:00
commit 7217d3cbaa
86 changed files with 6631 additions and 0 deletions

View File

@@ -0,0 +1,35 @@
import io
from pathlib import Path
from typing import List
import httpx
async def prepare_images_from_file(
file_url: str, filename: str, max_pages: int = 5, dpi: int = 180
) -> List[bytes]:
"""presigned URL → bytes. PDF이면 앞쪽 max_pages 페이지만 이미지로 변환하여 bytes 리스트 반환"""
async with httpx.AsyncClient() as client:
resp = await client.get(file_url, timeout=None)
resp.raise_for_status()
file_bytes = resp.content
ext = Path(filename).suffix.lower()
if ext in [".pdf", ".tif", ".tiff"]:
try:
from pdf2image import convert_from_bytes
except ImportError as e:
raise RuntimeError(
"pdf2image가 필요합니다. `pip install pdf2image poppler-utils`"
) from e
pil_images = convert_from_bytes(file_bytes, dpi=dpi)
images = []
for i, im in enumerate(pil_images[:max_pages]):
buf = io.BytesIO()
im.save(buf, format="PNG")
images.append(buf.getvalue())
return images
else:
# 단일 이미지
return [file_bytes]