128 lines
4.2 KiB
Python
128 lines
4.2 KiB
Python
import json
|
|
import os
|
|
import tempfile
|
|
from datetime import datetime
|
|
from typing import List
|
|
|
|
from celery import chain
|
|
from celery.result import AsyncResult
|
|
from fastapi import APIRouter, File, HTTPException, UploadFile
|
|
from fastapi.responses import JSONResponse
|
|
from tasks import (
|
|
call_upstage_ocr_api,
|
|
celery_app,
|
|
parse_ocr_text,
|
|
store_ocr_result,
|
|
)
|
|
from utils.checking_keys import create_key
|
|
from utils.redis_utils import get_redis_client
|
|
|
|
router = APIRouter(prefix="/ocr", tags=["OCR"])
|
|
redis_client = get_redis_client()
|
|
|
|
|
|
async def _process_ocr_request(files: List[UploadFile], ocr_task):
|
|
results = []
|
|
for file in files:
|
|
if not file.filename:
|
|
raise HTTPException(status_code=400, detail="파일 이름이 없습니다.")
|
|
|
|
tmp_path = ""
|
|
try:
|
|
suffix = os.path.splitext(file.filename)[-1]
|
|
with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file:
|
|
content = await file.read()
|
|
tmp_file.write(content)
|
|
tmp_path = tmp_file.name
|
|
except Exception as e:
|
|
raise HTTPException(status_code=500, detail=f"파일 저장 실패: {str(e)}")
|
|
finally:
|
|
await file.close()
|
|
|
|
request_id = create_key()
|
|
task_id = create_key()
|
|
|
|
task_chain = chain(
|
|
ocr_task.s(
|
|
tmp_path=tmp_path, request_id=request_id, file_name=file.filename
|
|
),
|
|
store_ocr_result.s(request_id=request_id, task_id=task_id),
|
|
)
|
|
task_chain.apply_async(task_id=task_id)
|
|
|
|
try:
|
|
redis_client.hset("ocr_task_mapping", request_id, task_id)
|
|
except Exception as e:
|
|
if tmp_path and os.path.exists(tmp_path):
|
|
os.remove(tmp_path)
|
|
raise HTTPException(
|
|
status_code=500, detail=f"작업 정보 저장 오류: {str(e)}"
|
|
)
|
|
|
|
try:
|
|
log_entry = {
|
|
"status": "작업 접수",
|
|
"timestamp": datetime.now().isoformat(),
|
|
"task_id": task_id,
|
|
"initial_file": file.filename,
|
|
}
|
|
redis_client.rpush(f"ocr_status:{request_id}", json.dumps(log_entry))
|
|
except Exception:
|
|
pass
|
|
|
|
results.append(
|
|
{
|
|
"message": "OCR 작업이 접수되었습니다.",
|
|
"request_id": request_id,
|
|
"task_id": task_id,
|
|
"status_check_url": f"/ocr/progress/{request_id}",
|
|
"filename": file.filename,
|
|
}
|
|
)
|
|
return JSONResponse(content={"results": results})
|
|
|
|
|
|
@router.post("/paddle", summary="[Paddle] 파일 업로드 기반 비동기 OCR")
|
|
async def ocr_paddle_endpoint(files: List[UploadFile] = File(...)):
|
|
return await _process_ocr_request(files, parse_ocr_text)
|
|
|
|
|
|
@router.post("/upstage", summary="[Upstage] 파일 업로드 기반 비동기 OCR")
|
|
async def ocr_upstage_endpoint(files: List[UploadFile] = File(...)):
|
|
return await _process_ocr_request(files, call_upstage_ocr_api)
|
|
|
|
|
|
@router.get("/progress/{request_id}", summary="📊 OCR 진행 상태 및 결과 조회")
|
|
async def check_progress(request_id: str):
|
|
task_id = redis_client.hget("ocr_task_mapping", request_id)
|
|
if not task_id:
|
|
raise HTTPException(status_code=404, detail=f"ID {request_id} 작업을 찾을 수 없습니다.")
|
|
|
|
result = AsyncResult(task_id, app=celery_app)
|
|
status = result.status
|
|
|
|
try:
|
|
logs = redis_client.lrange(f"ocr_status:{request_id}", 0, -1)
|
|
parsed_logs = [json.loads(log) for log in logs]
|
|
except Exception as e:
|
|
parsed_logs = [{"status": "로그 조회 실패", "error": str(e)}]
|
|
|
|
final_result = None
|
|
if status == "SUCCESS":
|
|
try:
|
|
result_str = redis_client.get(f"ocr_result:{task_id}")
|
|
if result_str:
|
|
final_result = json.loads(result_str)
|
|
except Exception as e:
|
|
final_result = {"error": f"결과 조회 실패: {str(e)}"}
|
|
|
|
return JSONResponse(
|
|
content={
|
|
"request_id": request_id,
|
|
"task_id": task_id,
|
|
"celery_status": status,
|
|
"progress_logs": parsed_logs,
|
|
"final_result": final_result,
|
|
}
|
|
)
|