Initial commit

This commit is contained in:
kyy
2025-08-12 10:10:59 +09:00
commit 82d40f625a
20 changed files with 1196 additions and 0 deletions

3
router/__init__.py Normal file
View File

@@ -0,0 +1,3 @@
from .ocr_router import router as ocr_router
__all__ = ["ocr_router"]

127
router/ocr_router.py Normal file
View File

@@ -0,0 +1,127 @@
import json
import os
import tempfile
from datetime import datetime
from typing import List
from celery import chain
from celery.result import AsyncResult
from fastapi import APIRouter, File, HTTPException, UploadFile
from fastapi.responses import JSONResponse
from tasks import (
call_upstage_ocr_api,
celery_app,
parse_ocr_text,
store_ocr_result,
)
from utils.checking_keys import create_key
from utils.redis_utils import get_redis_client
router = APIRouter(prefix="/ocr", tags=["OCR"])
redis_client = get_redis_client()
async def _process_ocr_request(files: List[UploadFile], ocr_task):
results = []
for file in files:
if not file.filename:
raise HTTPException(status_code=400, detail="파일 이름이 없습니다.")
tmp_path = ""
try:
suffix = os.path.splitext(file.filename)[-1]
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
content = await file.read()
tmp_file.write(content)
tmp_path = tmp_file.name
except Exception as e:
raise HTTPException(status_code=500, detail=f"파일 저장 실패: {str(e)}")
finally:
await file.close()
request_id = create_key()
task_id = create_key()
task_chain = chain(
ocr_task.s(
tmp_path=tmp_path, request_id=request_id, file_name=file.filename
),
store_ocr_result.s(request_id=request_id, task_id=task_id),
)
task_chain.apply_async(task_id=task_id)
try:
redis_client.hset("ocr_task_mapping", request_id, task_id)
except Exception as e:
if tmp_path and os.path.exists(tmp_path):
os.remove(tmp_path)
raise HTTPException(
status_code=500, detail=f"작업 정보 저장 오류: {str(e)}"
)
try:
log_entry = {
"status": "작업 접수",
"timestamp": datetime.now().isoformat(),
"task_id": task_id,
"initial_file": file.filename,
}
redis_client.rpush(f"ocr_status:{request_id}", json.dumps(log_entry))
except Exception:
pass
results.append(
{
"message": "OCR 작업이 접수되었습니다.",
"request_id": request_id,
"task_id": task_id,
"status_check_url": f"/ocr/progress/{request_id}",
"filename": file.filename,
}
)
return JSONResponse(content={"results": results})
@router.post("/paddle", summary="[Paddle] 파일 업로드 기반 비동기 OCR")
async def ocr_paddle_endpoint(files: List[UploadFile] = File(...)):
return await _process_ocr_request(files, parse_ocr_text)
@router.post("/upstage", summary="[Upstage] 파일 업로드 기반 비동기 OCR")
async def ocr_upstage_endpoint(files: List[UploadFile] = File(...)):
return await _process_ocr_request(files, call_upstage_ocr_api)
@router.get("/progress/{request_id}", summary="📊 OCR 진행 상태 및 결과 조회")
async def check_progress(request_id: str):
task_id = redis_client.hget("ocr_task_mapping", request_id)
if not task_id:
raise HTTPException(status_code=404, detail=f"ID {request_id} 작업을 찾을 수 없습니다.")
result = AsyncResult(task_id, app=celery_app)
status = result.status
try:
logs = redis_client.lrange(f"ocr_status:{request_id}", 0, -1)
parsed_logs = [json.loads(log) for log in logs]
except Exception as e:
parsed_logs = [{"status": "로그 조회 실패", "error": str(e)}]
final_result = None
if status == "SUCCESS":
try:
result_str = redis_client.get(f"ocr_result:{task_id}")
if result_str:
final_result = json.loads(result_str)
except Exception as e:
final_result = {"error": f"결과 조회 실패: {str(e)}"}
return JSONResponse(
content={
"request_id": request_id,
"task_id": task_id,
"celery_status": status,
"progress_logs": parsed_logs,
"final_result": final_result,
}
)