import json import logging from datetime import datetime from celery.result import AsyncResult from fastapi import APIRouter, HTTPException from fastapi.responses import JSONResponse from tasks import ( celery_app, run_ocr_pipeline, # πŸ” μƒˆλ‘œ λ§Œλ“  체인 ν•¨μˆ˜ μž„ν¬νŠΈ ) from utils.checking_keys import create_key from utils.redis_utils import get_redis_client router = APIRouter(prefix="/ocr", tags=["OCR"]) redis_client = get_redis_client() logger = logging.getLogger(__name__) @router.post("", summary="πŸ” presigned URL 기반 비동기 OCR 처리") async def ocr_endpoint(file_requests: dict): """ Presigned URLκ³Ό OCR λͺ¨λΈμ„ μ§€μ •ν•˜μ—¬ 비동기 OCR μž‘μ—…μ„ μš”μ²­ν•©λ‹ˆλ‹€. - **`file_url`**: OCR을 μˆ˜ν–‰ν•  νŒŒμΌμ— μ ‘κ·Όν•  수 μžˆλŠ” Presigned URL - **`filename`**: 원본 파일의 이름 - **`ocr_model`**: μ‚¬μš©ν•  OCR λͺ¨λΈ (`tesseract`, `pp-ocr`, `pp-structure`, `upstage` 쀑 선택) μš”μ²­μ΄ μ ‘μˆ˜λ˜λ©΄, μž‘μ—… 좔적을 μœ„ν•œ `request_id`와 `task_id`κ°€ μ¦‰μ‹œ λ°˜ν™˜λ©λ‹ˆλ‹€. """ results = [] file_url = file_requests.get("file_url") filename = file_requests.get("filename") ocr_model = file_requests.get("ocr_model") if not file_url or not filename: raise HTTPException(status_code=400, detail="file_url, filename ν•„μˆ˜") request_id = create_key() task_id = create_key() run_ocr_pipeline(file_url, filename, request_id, task_id, ocr_model) # Redis에 request_id β†’ task_id λ§€ν•‘ μ €μž₯ try: redis_client.hset("ocr_task_mapping", request_id, task_id) except Exception as e: raise HTTPException(status_code=500, detail=f"μž‘μ—… 정보 μ €μž₯ 였λ₯˜: {str(e)}") # μž‘μ—… 둜그 redis에 기둝 try: log_entry = { "status": "μž‘μ—… μ ‘μˆ˜", "timestamp": datetime.now().isoformat(), "task_id": task_id, "initial_file": filename, } redis_client.rpush(f"ocr_status:{request_id}", json.dumps(log_entry)) except Exception: pass # μž‘μ—…μ„ λ“±λ‘ν•œ ν›„, μ‹€μ œ OCR 처리λ₯Ό 기닀리지 μ•Šκ³  μ¦‰μ‹œ 응닡 results.append( { "message": "OCR μž‘μ—…μ΄ μ ‘μˆ˜λ˜μ—ˆμŠ΅λ‹ˆλ‹€.", "request_id": request_id, "task_id": task_id, "status_check_url": f"/ocr/progress/{request_id}", } ) return JSONResponse(content={"results": results}) # μ‹€μ œ OCR κ²°κ³ΌλŠ” GET /ocr/progress/{request_id} μ—”λ“œν¬μΈνŠΈλ₯Ό 톡해 λ³„λ„λ‘œ 쑰회 @router.get("/progress/{request_id}", summary="πŸ“Š OCR μ§„ν–‰ μƒνƒœ 및 κ²°κ³Ό 쑰회") async def check_progress(request_id: str): """ `request_id`λ₯Ό μ΄μš©ν•΄ OCR μž‘μ—…μ˜ μ§„ν–‰ μƒνƒœμ™€ μ΅œμ’… κ²°κ³Όλ₯Ό μ‘°νšŒν•©λ‹ˆλ‹€. - **`celery_status`**: Celery μž‘μ—…μ˜ ν˜„μž¬ μƒνƒœ (`PENDING`, `STARTED`, `SUCCESS`, `FAILURE` λ“±) - **`progress_logs`**: μž‘μ—… μ ‘μˆ˜λΆ€ν„° μ™„λ£ŒκΉŒμ§€μ˜ 단계별 μ§„ν–‰ 상황 둜그 - **`final_result`**: OCR μ²˜λ¦¬κ°€ μ„±κ³΅μ μœΌλ‘œ μ™„λ£Œλ˜μ—ˆμ„ λ•Œ, μΆ”μΆœλœ ν…μŠ€νŠΈμ™€ μ’Œν‘œ 정보가 ν¬ν•¨λœ μ΅œμ’… κ²°κ³Ό """ # request_id β†’ task_id λ§€ν•‘ 확인 task_id = redis_client.hget("ocr_task_mapping", request_id) if not task_id: raise HTTPException( status_code=404, detail=f"Meeting ID {request_id} μž‘μ—… μ—†μŒ" ) # Celery μž‘μ—… μƒνƒœ 쑰회 result = AsyncResult(task_id, app=celery_app) status = result.status # μž‘μ—… 둜그 쑰회 try: logs = redis_client.lrange(f"ocr_status:{request_id}", 0, -1) parsed_logs = [json.loads(log) for log in logs] except Exception as e: parsed_logs = [ { "status": "둜그 κ°€μ Έμ˜€κΈ° μ‹€νŒ¨", "error": str(e), "timestamp": datetime.now().isoformat(), } ] # μ΅œμ’… κ²°κ³Ό Redisμ—μ„œ 쑰회 final_result = None try: result_str = redis_client.get(f"ocr_result:{task_id}") if result_str: final_result = json.loads(result_str) except Exception as e: final_result = {"error": f"κ²°κ³Ό 쑰회 μ‹€νŒ¨: {str(e)}"} return JSONResponse( content={ "request_id": request_id, "task_id": task_id, "celery_status": status, "progress_logs": parsed_logs, "final_result": final_result, } )