인퍼런스 레이어 구축

2025-10-23 17:44:32 +09:00
parent 7d5a46b11d
commit 039842b465
24 changed files with 3407 additions and 0 deletions
--- a/api_gateway/Dockerfile
+++ b/api_gateway/Dockerfile
@@ -0,0 +1,18 @@
+# 1. Python 베이스 이미지 선택
+FROM python:3.9-slim
+
+# 2. 작업 디렉토리 설정
+WORKDIR /app
+
+# 3. requirements.txt 복사 및 의존성 설치
+COPY requirements.txt .
+RUN pip install --no-cache-dir -r requirements.txt
+
+# 4. 소스 코드 복사
+COPY main.py .
+
+# 5. 서버 실행 포트 노출
+EXPOSE 8000
+
+# 6. uvicorn을 사용하여 FastAPI 앱 실행
+CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
--- a/api_gateway/main.py
+++ b/api_gateway/main.py
@@ -0,0 +1,64 @@
+import httpx
+from fastapi import FastAPI, HTTPException, Request
+from fastapi.responses import JSONResponse
+import time
+
+app = FastAPI()
+
+# Docker Compose 내에서 사용할 서비스 이름과 포트
+MODEL_SERVICES = {
+    "deepseek_ocr": "http://deepseek_ocr:8000/process",
+    "paddle_vl": "http://paddle_vl:8000/process",
+    "chandra": "http://chandra:8000/process",
+}
+
+@app.post("/api/v1/infer")
+async def infer(request: Request):
+    """
+    사용자로부터 요청을 받아 적절한 모델 서비스로 전달하고,
+    응답 시간과 함께 결과를 반환합니다.
+    """
+    try:
+        payload = await request.json()
+        model_name = payload.get("model")
+
+        if not model_name or model_name not in MODEL_SERVICES:
+            raise HTTPException(status_code=400, detail="A valid 'model' name must be provided.")
+
+        service_url = MODEL_SERVICES[model_name]
+        
+        # 요청에서 'model' 필드는 제거하고 나머지 페이로드를 모델 서비스로 전달
+        downstream_payload = {k: v for k, v in payload.items() if k != 'model'}
+
+        start_time = time.time()
+
+        async with httpx.AsyncClient(timeout=300.0) as client:
+            response = await client.post(service_url, json=downstream_payload)
+            response.raise_for_status() # HTTP 오류 발생 시 예외 처리
+
+        end_time = time.time()
+        
+        # 응답 시간 계산
+        inference_time = end_time - start_time
+
+        # 모델 서비스의 응답에 응답 시간 추가
+        model_response_data = response.json()
+        
+        final_response = {
+            "model_name": model_name,
+            "inference_time_seconds": round(inference_time, 4),
+            "model_output": model_response_data
+        }
+        
+        return JSONResponse(content=final_response)
+
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=503, detail=f"Service unavailable: {e}")
+    except HTTPException as e:
+        raise e # FastAPI에서 발생한 HTTP 예외는 그대로 다시 발생
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"An internal error occurred: {str(e)}")
+
+@app.get("/")
+def health_check():
+    return {"status": "API Gateway is running"}
--- a/api_gateway/requirements.txt
+++ b/api_gateway/requirements.txt
@@ -0,0 +1,3 @@
+fastapi
+uvicorn
+httpx