llm-gateway-sub-backup/workspace/routers/general_router.py

import asyncio
import json
from typing import Optional

from config.setting import (
    PGN_REDIS_DB,
    PGN_REDIS_HOST,
    PGN_REDIS_PORT,
)
from fastapi import APIRouter, Depends, File, Form, Request, UploadFile
from fastapi.responses import JSONResponse
from redis import Redis
from services.inference_service import InferenceHandler
from utils.checking_files import (
    clone_upload_file,
    validate_all_files,
)
from utils.checking_keys import create_key, get_api_key

# Redis 클라이언트 (LLM Gateway 전용)
redis_client = Redis(
    host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True
)


router = APIRouter(prefix="/general", tags=["General"])


# ✅ 공통 비동기 추론 엔드포인트 생성기
def register_general_route(
    path: str, mode: str, default_model: str, summary: str, description: str
):
    @router.post(path, summary=summary, description=description)
    async def general_endpoint(
        request_info: Request,
        input_file: UploadFile = File(...),
        prompt_file: UploadFile = File(...),
        schema_file: Optional[UploadFile] = File(default=None),
        model: Optional[str] = Form(default=default_model),
        api_key: str = Depends(get_api_key),
    ):
        validate_all_files(input_file)

        # ✅ 고유한 요청 ID 생성
        request_id = create_key()
        result_id = create_key()

        cloned_input = clone_upload_file(input_file) if input_file else None
        cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
        cloned_schema = clone_upload_file(schema_file) if schema_file else None

        effective_mode = "structured" if schema_file and schema_file.filename else mode

        # ✅ 백그라운드에서 작업 실행
        asyncio.create_task(
            InferenceHandler.handle_general_background(
                request_id=request_id,
                result_id=result_id,
                input_file=cloned_input,
                schema_file=cloned_schema,
                prompt_file=cloned_prompt,
                mode=effective_mode,
                model=model,
                request_info=request_info,
                api_key=api_key,
            )
        )

        # ✅ request_id → result_id 매핑 저장
        redis_client.hset("pipeline_result_mapping", request_id, result_id)

        return JSONResponse(
            content={
                "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
                "request_id": request_id,
                "status_check_url": f"/general/progress/{request_id}",
            }
        )

    # FastAPI 문서화용 정보 부여
    general_endpoint.__name__ = f"general_{mode}"
    general_endpoint.__doc__ = description
    return general_endpoint


# ✅ 내부 모델용 등록
general_inner = register_general_route(
    path="/inner",
    mode="inner",
    default_model="gemma3:27b",
    summary="내부 LLM 기반 범용 추론 요청 (비동기)",
    description="""### **요약**
내부망에 배포된 LLM(Ollama 기반)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 이 엔드포인트는 파일(PDF, 이미지 등)에서 텍스트를 추출하고, 사용자가 제공한 프롬프트를 적용하여 결과를 생성합니다.

### **작동 방식**
1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
2.  **백그라운드 처리**:
    -   `input_file`이 문서나 이미지일 경우, **OCR API**를 호출하여 텍스트를 추출합니다.
    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
    -   내부 LLM(Ollama)에 추론을 요청합니다.
    -   `schema_file`이 제공되면, LLM이 스키마에 맞는 JSON을 생성하도록 요청합니다.
3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.

### **입력 (multipart/form-data)**
-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
    -   내부적으로 OCR을 통해 텍스트가 자동 추출됩니다.
-   `prompt_file` (**필수**): LLM에 전달할 명령어(프롬프트)가 포함된 `.txt` 파일.
-   `schema_file` (선택): 결과물의 구조를 정의하는 `.json` 스키마 파일. 제공 시, 출력은 이 스키마를 따르는 JSON 형식으로 강제됩니다.
-   `model` (선택): 사용할 내부 LLM 모델 이름. (기본값: `gemma3:27b`)

### **출력 (application/json)**
-   **초기 응답**:
    ```json
    {
      "message": "작업이 백그라운드에서 실행 중입니다.",
      "request_id": "고유한 요청 ID",
      "status_check_url": "/general/progress/고유한 요청 ID"
    }
    ```
-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
""",
)

# ✅ 외부 모델용 등록
general_outer = register_general_route(
    path="/outer",
    mode="outer",
    default_model="gemini-2.5-flash",
    summary="외부 LLM 기반 범용 추론 요청 (비동기)",
    description="""### **요약**
외부 상용 LLM(예: GPT, Gemini, Claude)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 기능과 작동 방식은 내부 LLM용 엔드포인트와 동일하나, 외부 API를 호출하는 점이 다릅니다.

### **작동 방식**
1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
2.  **백그라운드 처리**:
    -   `input_file`에서 **OCR API**를 통해 텍스트를 추출합니다.
    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
    -   외부 LLM API(OpenAI, Google, Anthropic 등)에 추론을 요청합니다.
    -   `schema_file`이 제공되면, LLM이 스키마에 맞는 JSON을 생성하도록 요청합니다.
3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.

### **입력 (multipart/form-data)**
-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
-   `prompt_file` (**필수**): LLM에 전달할 프롬프트가 포함된 `.txt` 파일.
-   `schema_file` (선택): 결과물의 구조를 정의하는 `.json` 스키마 파일.
-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)

### **출력 (application/json)**
-   **초기 응답**:
    ```json
    {
      "message": "작업이 백그라운드에서 실행 중입니다.",
      "request_id": "고유한 요청 ID",
      "status_check_url": "/general/progress/고유한 요청 ID"
    }
    ```
-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
""",
)


# ✅ 상태 로그 조회 API
@router.get(
    "/progress/{request_id}",
    summary="범용 추론 작업 상태 및 결과 조회",
    description="""### **요약**
`POST /general/inner` 또는 `POST /general/outer` 요청 시 반환된 `request_id`를 사용하여, 해당 작업의 진행 상태와 최종 결과를 조회합니다.

### **작동 방식**
-   `request_id`를 기반으로 Redis에 저장된 작업 로그와 결과 데이터를 조회합니다.
-   작업이 진행 중일 때는 현재까지의 로그를, 완료되었을 때는 로그와 함께 최종 결과(`final_result`)를 반환합니다.

### **입력**
-   `request_id`: 조회할 작업의 고유 ID.

### **출력 (application/json)**
-   **성공 시**:
    ```json
    {
      "request_id": "요청 시 사용된 ID",
      "progress_logs": [
        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }
      ],
      "final_result": {
        "filename": "입력 파일",
        "processed": "LLM의 최종 응답 내용"
      }
    }
    ```
-   **ID가 유효하지 않을 경우 (404 Not Found)**:
    ```json
    {
      "message": "{request_id}에 대한 상태 로그가 없습니다."
    }
    ```
""",
)
async def get_pipeline_status(request_id: str):
    # 상태 로그 조회
    redis_key = f"pipeline_status:{request_id}"
    logs = redis_client.lrange(redis_key, 0, -1)
    if not logs:
        return JSONResponse(
            status_code=404,
            content={"message": f"{request_id}에 대한 상태 로그가 없습니다."},
        )
    parsed_logs = [json.loads(log) for log in logs] if logs else []

    # request_id → result_id 매핑 조회
    result_id = redis_client.hget("pipeline_result_mapping", request_id)

    final_result = None
    if result_id:
        # 최종 결과 조회
        result_key = f"pipeline_result:{result_id}"
        result_str = redis_client.get(result_key)
        if result_str:
            try:
                final_result = json.loads(result_str)
            except json.JSONDecodeError:
                final_result = {"error": "결과 디코딩 실패"}

    return JSONResponse(
        content={
            "request_id": request_id,
            "progress_logs": parsed_logs,
            "final_result": final_result,
        }
    )