import json import os import tempfile from datetime import datetime from typing import List from celery import chain from celery.result import AsyncResult from fastapi import APIRouter, File, HTTPException, UploadFile from fastapi.responses import JSONResponse from tasks import ( call_upstage_ocr_api, celery_app, parse_ocr_text, store_ocr_result, ) from utils.checking_keys import create_key from utils.redis_utils import get_redis_client router = APIRouter(prefix="/ocr", tags=["OCR"]) redis_client = get_redis_client() async def _process_ocr_request(files: List[UploadFile], ocr_task): results = [] for file in files: if not file.filename: raise HTTPException(status_code=400, detail="파일 이름이 없습니다.") tmp_path = "" try: suffix = os.path.splitext(file.filename)[-1] with tempfile.NamedTemporaryFile(delete=False, suffix=suffix) as tmp_file: content = await file.read() tmp_file.write(content) tmp_path = tmp_file.name except Exception as e: raise HTTPException(status_code=500, detail=f"파일 저장 실패: {str(e)}") finally: await file.close() request_id = create_key() task_id = create_key() task_chain = chain( ocr_task.s( tmp_path=tmp_path, request_id=request_id, file_name=file.filename ), store_ocr_result.s(request_id=request_id, task_id=task_id), ) task_chain.apply_async(task_id=task_id) try: redis_client.hset("ocr_task_mapping", request_id, task_id) except Exception as e: if tmp_path and os.path.exists(tmp_path): os.remove(tmp_path) raise HTTPException( status_code=500, detail=f"작업 정보 저장 오류: {str(e)}" ) try: log_entry = { "status": "작업 접수", "timestamp": datetime.now().isoformat(), "task_id": task_id, "initial_file": file.filename, } redis_client.rpush(f"ocr_status:{request_id}", json.dumps(log_entry)) except Exception: pass results.append( { "message": "OCR 작업이 접수되었습니다.", "request_id": request_id, "task_id": task_id, "status_check_url": f"/ocr/progress/{request_id}", "filename": file.filename, } ) return JSONResponse(content={"results": results}) @router.post("/paddle", summary="[Paddle] 파일 업로드 기반 비동기 OCR") async def ocr_paddle_endpoint(files: List[UploadFile] = File(...)): return await _process_ocr_request(files, parse_ocr_text) @router.post("/upstage", summary="[Upstage] 파일 업로드 기반 비동기 OCR") async def ocr_upstage_endpoint(files: List[UploadFile] = File(...)): return await _process_ocr_request(files, call_upstage_ocr_api) @router.get("/progress/{request_id}", summary="📊 OCR 진행 상태 및 결과 조회") async def check_progress(request_id: str): task_id = redis_client.hget("ocr_task_mapping", request_id) if not task_id: raise HTTPException(status_code=404, detail=f"ID {request_id} 작업을 찾을 수 없습니다.") result = AsyncResult(task_id, app=celery_app) status = result.status try: logs = redis_client.lrange(f"ocr_status:{request_id}", 0, -1) parsed_logs = [json.loads(log) for log in logs] except Exception as e: parsed_logs = [{"status": "로그 조회 실패", "error": str(e)}] final_result = None if status == "SUCCESS": try: result_str = redis_client.get(f"ocr_result:{task_id}") if result_str: final_result = json.loads(result_str) except Exception as e: final_result = {"error": f"결과 조회 실패: {str(e)}"} return JSONResponse( content={ "request_id": request_id, "task_id": task_id, "celery_status": status, "progress_logs": parsed_logs, "final_result": final_result, } )