원 레포랑 완전 분리

2025-08-11 18:56:38 +09:00
commit 7217d3cbaa
86 changed files with 6631 additions and 0 deletions
--- a/workspace/api.py
+++ b/workspace/api.py
@@ -0,0 +1,136 @@
+import logging
+from contextlib import asynccontextmanager
+
+from config.setting import SUMMARY_HTML_DIR
+from fastapi import Depends, FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.staticfiles import StaticFiles
+from prometheus_fastapi_instrumentator import Instrumentator
+from routers import *
+from services.api_key_service import load_api_keys_from_file
+from utils.checking_keys import get_admin_key, get_api_key
+from utils.minio_utils import get_minio_client
+from utils.redis_utils import get_redis_client
+
+logging.basicConfig(
+    level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s - %(message)s"
+)
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 애플리케이션 시작 시 파일에서 API 키 로드
+    print("Loading API keys from file...")
+    load_api_keys_from_file()
+    yield
+
+
+app = FastAPI(
+    title="LLM GATEWAY",
+    description="LLM 모델이 업로드된 문서를 분석하여 구조화된 JSON으로 변환하는 API 서비스입니다.",
+    docs_url=None,
+    lifespan=lifespan,
+)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://172.16.42.101",
+        "http://gsim.hanmaceng.co.kr",
+        "http://gsim.hanmaceng.co.kr:6464",
+        "https://overseas.projectmastercloud.com",
+        "http://localhost:5174",  # 이민규 연구원
+    ],
+    allow_origin_regex=r"http://(localhost:5174|172\.16\.\d{1,3}\.\d{1,3}|gsim\.hanmaceng\.co\.kr)(:\d+)?",
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# API 키 검증을 위한 의존성 설정
+api_key_dependency = Depends(get_api_key)
+admin_key_dependency = Depends(get_admin_key)
+
+# ✅ Prometheus Metrics Exporter 활성화
+instrumentator = Instrumentator()
+
+# 커스텀 라벨 콜백 함수
+def custom_labels(info):
+    # info.request 는 Starlette의 Request 객체
+    return {"job_id": info.request.headers.get("X-Job-ID", "unknown")}
+
+instrumentator = Instrumentator()
+
+instrumentator.add(custom_labels)
+instrumentator.instrument(app).expose(app)
+
+# ✅ 생성된 요약 HTML 결과 파일 서빙 경로 등록
+app.mount(
+    "/view/generated_html", StaticFiles(directory=SUMMARY_HTML_DIR), name="summary_html"
+)
+app.mount(
+    "/static", StaticFiles(directory="/workspace/workspace/static"), name="static"
+)
+
+# 🔑 가이드북, 상태 확인, 문서는 API 키 없이 접근 가능
+app.include_router(guide_router)  # ✅ 가이드북 HTML 서빙
+
+# ⭐️ 관리자 전용: API 키 관리 라우터 (마스터 키 필요, 문서에서 숨김)
+app.include_router(
+    api_key_router,
+    dependencies=[admin_key_dependency],
+    include_in_schema=False,
+)
+
+# 🔑 아래의 모든 라우터는 일반 API 키 검증이 필요
+app.include_router(model_router, dependencies=[api_key_dependency])  # ✅ 모델 관리 API
+app.include_router(
+    download_router, dependencies=[api_key_dependency]
+)  # ✅ 다운로드 API
+app.include_router(
+    general_router, dependencies=[api_key_dependency]
+)  # ✅ 일반 추론 API
+app.include_router(
+    extract_router, dependencies=[api_key_dependency]
+)  # ✅ 문서 추출 API
+app.include_router(dummy_router, dependencies=[api_key_dependency])  # ✅ 더미 API
+app.include_router(ocr_router, dependencies=[api_key_dependency])  # ✅ OCR API
+app.include_router(
+    stt_router, dependencies=[api_key_dependency], include_in_schema=False
+)  # STT로 호출하는 기능 임시 제외 - 2025.07.29
+app.include_router(llm_summation, dependencies=[api_key_dependency])
+app.include_router(yolo_router, dependencies=[api_key_dependency])
+app.include_router(stt_router, dependencies=[api_key_dependency])  # ✅
+
+# /docs URL에 커스터마이징된 Swagger UI 연결
+app.mount(
+    "/docs", StaticFiles(directory="/workspace/swagger-ui", html=True), name="docs"
+)
+
+
+@app.get("/health/API")
+async def health_check():
+    """애플리케이션 상태 확인"""
+    return {"status": "API ok"}
+
+
+@app.get("/health/Redis")
+def redis_health_check():
+    client = get_redis_client()
+    if client is None:
+        raise HTTPException(status_code=500, detail="Redis connection failed")
+    try:
+        client.ping()
+        return {"status": "Redis ok"}
+    except Exception:
+        raise HTTPException(status_code=500, detail="Redis ping failed")
+
+
+@app.get("/health/MinIO")
+def minio_health_check():
+    try:
+        client = get_minio_client()
+        return {"status": "MinIO ok"}
+    except Exception as e:
+        raise HTTPException(
+            status_code=500, detail=f"MinIO health check failed: {str(e)}"
+        )
--- a/workspace/config/init.py
+++ b/workspace/config/init.py
--- a/workspace/config/setting.py
+++ b/workspace/config/setting.py
@@ -0,0 +1,59 @@
+import os
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]  # /workspace
+STATIC_DIR = PROJECT_ROOT / "static"
+
+# 프롬프트 & 스키마 경로
+DEFAULT_PROMPT_PATH = STATIC_DIR / "prompt" / "default_prompt_v0.1.txt"
+STRUCTURED_PROMPT_PATH = STATIC_DIR / "prompt" / "structured_prompt_v0.1.txt"
+I18N_PROMPT_PATH = STATIC_DIR / "prompt" / "i18n_test_prompt_kor.txt"
+D6C_PROMPT_PATH = STATIC_DIR / "prompt" / "d6c_test_prompt_eng.txt"
+STRUCTURED_SCHEMA_PATH = STATIC_DIR / "structured_schema.json"
+
+# html 경로
+EXTRACT_DEFAULT_PATH = STATIC_DIR / "html" / "extract_guide.html"
+EXTRACT_STRUCTURED_PATH = STATIC_DIR / "html" / "extraction_structured_guide.html"
+GENERAL_GUIDE_PATH = STATIC_DIR / "html" / "general_guide.html"
+SCHEMA_FILE_PATH = STATIC_DIR / "html" / "schema_file_guide.html"
+SUMMARY_HTML_DIR = STATIC_DIR / "html" / "generated"
+
+# Ollama 모델 API 엔드포인트
+OLLAMA_URL = [
+    "http://pgn_ollama_gemma:11534/api/generate",
+    "http://pgn_ollama_gpt_oss:11634/api/generate",
+    "http://pgn_ollama_qwen:11734/api/generate",
+]
+
+# OCR API URL
+OCR_API_URL = "http://ocr_api_8890:8890/ocr"
+OCR_STATUS_PUBLIC_URL = "http://localhost:8890/ocr/progress"
+
+# OCR Redis 기본 설정
+OCR_REDIS_HOST = "ocr_redis"
+OCR_REDIS_PORT = 6379
+OCR_REDIS_DB = 0
+
+# llm_gateway 서비스 Redis 설정
+PGN_REDIS_HOST = "pgn_redis"
+PGN_REDIS_PORT = 6379
+PGN_REDIS_DB = 2
+
+# ✅ MinIO 설정
+MINIO_ENDPOINT = "172.16.10.175:9000"  # Docker 네트워크용 내부 주소
+MINIO_ACCESS_KEY = "kyy"
+MINIO_SECRET_KEY = "hLAk3aQfH8HTs7ELTcyR"
+MINIO_BUCKET_NAME = "ocr-gateway"  # 미리 생성한 버킷명
+MINIO_RESULTS_BUCKET_NAME = "ocr-gateway-results"  # 결과 저장용 버킷
+
+# 파일 저장 경로
+UPLOAD_DIR = "/workspace/temp_upload"
+CACHED_PROMPT_DIR = "/workspace/cached"
+LOG_DIR = "/workspace/logs"
+os.makedirs(UPLOAD_DIR, exist_ok=True)
+os.makedirs(SUMMARY_HTML_DIR, exist_ok=True)
+os.makedirs(CACHED_PROMPT_DIR, exist_ok=True)
+os.makedirs(LOG_DIR, exist_ok=True)
+
+# 허용된 확장자 목록
+ALLOWED_EXTENSIONS = {".pdf", ".docx", ".jpg", ".jpeg", ".png"}
--- a/workspace/interface/init.py
+++ b/workspace/interface/init.py
--- a/workspace/interface/streamlit_ui.py
+++ b/workspace/interface/streamlit_ui.py
@@ -0,0 +1,34 @@
+import streamlit as st
+from urllib.parse import quote
+import requests
+import os
+
+API_URL = "http://api:8888/upload"  # FastAPI 서비스 이름 기준
+DOWNLOAD_URL = "http://172.16.10.176:8888/download/"
+
+st.set_page_config(page_title="HANMAC PGN Documents", layout="centered")
+
+st.image("/app/app/static/logo.png", use_container_width=True)
+st.markdown(
+    "<h2 style='text-align: center;'>🔹PM Oversea DEMO🔹</h2>",
+    unsafe_allow_html=True
+)
+
+uploaded_files = st.file_uploader("PDF 파일 업로드", type=["pdf"], accept_multiple_files=True)
+
+if st.button("업로드 및 처리") and uploaded_files:
+    files = [("files", (f.name, f.getvalue(), "application/pdf")) for f in uploaded_files]
+    with st.spinner("서버에 업로드 및 처리 중..."):
+        response = requests.post(API_URL, files=files)
+
+    if response.status_code == 200:
+        st.success("처리 완료! 결과를 아래에서 확인하세요.")
+        results = response.json()["results"]
+        for r in results:
+            filename = r["filename"]
+            json_path = r["saved_path"]
+            encoded_filename = quote(os.path.basename(json_path))
+            download_link = f"{DOWNLOAD_URL}{encoded_filename}"
+            st.markdown(f"✅ **{filename}** 처리 완료\n[JSON 다운로드]({download_link})")
+    else:
+        st.error("❌ 처리 중 오류가 발생했습니다.")
--- a/workspace/routers/init.py
+++ b/workspace/routers/init.py
@@ -0,0 +1,25 @@
+from .api_key_router import router as api_key_router
+from .download_router import router as download_router
+from .dummy_router import router as dummy_router
+from .extract_router import router as extract_router
+from .general_router import router as general_router
+from .guide_router import router as guide_router
+from .llm_summation import router as llm_summation
+from .model_router import router as model_router
+from .ocr_router import router as ocr_router
+from .stt_router import router as stt_router
+from .yolo_router import router as yolo_router
+
+__all__ = [
+    "api_key_router",
+    "download_router",
+    "dummy_router",
+    "extract_router",
+    "general_router",
+    "guide_router",
+    "model_router",
+    "ocr_router",
+    "stt_router",
+    "llm_summation",
+    "yolo_router",
+]
--- a/workspace/routers/api_key_router.py
+++ b/workspace/routers/api_key_router.py
@@ -0,0 +1,42 @@
+from fastapi import APIRouter, Body, HTTPException
+from services import api_key_service
+
+router = APIRouter(prefix="/manage", tags=["API Key Management"])
+
+
+@router.post("/keys", summary="Create a new API Key")
+def create_key(
+    client_name: str = Body(
+        ...,
+        embed=True,
+        description="Name of the client or service that will use this key.",
+    ),
+):
+    """
+    새로운 API 키를 생성하고 시스템에 등록합니다.
+    """
+    if not client_name:
+        raise HTTPException(status_code=400, detail="Client name is required.")
+
+    new_key_info = api_key_service.create_api_key(client_name)
+    return {"message": "API Key created successfully", "key_info": new_key_info}
+
+
+@router.get("/keys", summary="List all API Keys")
+def list_keys():
+    """
+    현재 시스템에 등록된 모든 API 키의 정보를 조회합니다.
+    """
+    keys = api_key_service.list_api_keys()
+    return {"keys": keys}
+
+
+@router.delete("/keys/{api_key}", summary="Revoke an API Key")
+def revoke_key(api_key: str):
+    """
+    지정된 API 키를 시스템에서 영구적으로 삭제(폐기)합니다.
+    """
+    success = api_key_service.revoke_api_key(api_key)
+    if not success:
+        raise HTTPException(status_code=404, detail="API Key not found.")
+    return {"message": f"API Key '{api_key}' has been revoked."}
--- a/workspace/routers/download_router.py
+++ b/workspace/routers/download_router.py
@@ -0,0 +1,22 @@
+from fastapi import APIRouter
+from services.download_service import DownloadService
+
+router = APIRouter(tags=["Model Management"])
+
+
+# ✅ GET:기본 프롬프트 다운로드
+@router.get("/default_prompt", summary="기본 프롬프트 파일 다운로드")
+async def download_default_prompt():
+    return DownloadService.download_default_prompt()
+
+
+# ✅ GET:구조화 프롬프트 파일 다운로드
+@router.get("/structured_prompt", summary="구조화 프롬프트 파일 다운로드")
+async def download_structured_prompt():
+    return DownloadService.download_structured_prompt()
+
+
+# ✅ GET:구조화 필드 정의 파일 다운로드
+@router.get("/structured_schema", summary="구조화 포맷 정의 파일 다운로드")
+async def download_structured_schema():
+    return DownloadService.download_structured_schema()
--- a/workspace/routers/dummy_router.py
+++ b/workspace/routers/dummy_router.py
@@ -0,0 +1,68 @@
+from typing import Optional
+
+from fastapi import APIRouter, Depends, Form, Request
+from services.inference_service import InferenceHandler
+from utils.checking_keys import get_api_key
+
+router = APIRouter(prefix="/dummy", tags=["Dummy"])
+
+
+# ✅ POST:DUMMY
+@router.post(
+    "/extract/outer",
+    summary="더미 응답 생성",
+    description="""### **요약**
+실제 모델 추론이나 파일 업로드 없이, 지정된 모델의 응답 형식을 테스트하기 위한 더미(dummy) 결과를 생성합니다.
+
+### **작동 방식**
+-   요청 시, 시스템에 미리 저장된 더미 응답(`dummy_response.json`)을 즉시 반환합니다.
+-   실제 OCR, LLM 추론 등 어떠한 백그라운드 작업도 수행하지 않습니다.
+-   네트워크나 모델 성능에 관계없이 API 응답 구조를 빠르게 확인하는 용도로 사용됩니다.
+
+### **입력 (multipart/form-data)**
+-   `model` (선택): 응답 형식의 기준이 될 모델 이름. (기본값: `dummy`)
+    -   이 값은 실제 추론에 사용되지 않으며, 형식 테스트용으로만 기능합니다.
+
+### **출력 (application/json)**
+-   **즉시 반환**:
+    ```json
+    {
+        "filename": "dummy_input.pdf",
+        "dummy_model": {
+            "ocr_model": "dummy",
+            "llm_model": "dummy",
+            "api_url": "dummy"
+        },
+        "time": {
+            "duration_sec": "0.00",
+            "started_at": "...",
+            "ended_at": "..."
+        },
+        "fields": {},
+        "parsed": "dummy",
+        "generated": "dummy",
+        "processed": {
+            "dummy response"
+        }
+    }
+    ```
+""",
+)
+async def extract_outer(
+    request_info: Request,
+    model: Optional[str] = Form(
+        default="dummy", description="실제 추론 없이 포맷 테스트용으로 사용됩니다."
+    ),
+    api_key: str = Depends(get_api_key),
+):
+    return await InferenceHandler.handle_extract_background(
+        request_id=None,
+        result_id=None,
+        input_file=None,
+        schema_file=None,
+        prompt_file=None,
+        mode="dummy",
+        model_list=[model],
+        request_info=request_info,
+        api_key=api_key,
+    )
--- a/workspace/routers/extract_router.py
+++ b/workspace/routers/extract_router.py
@@ -0,0 +1,728 @@
+import asyncio
+import io
+import json
+from typing import Optional
+from urllib.parse import urlparse
+
+import requests
+from config.setting import (
+    D6C_PROMPT_PATH,
+    I18N_PROMPT_PATH,
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+)
+from fastapi import APIRouter, Depends, File, Form, Request, UploadFile
+from fastapi.responses import JSONResponse
+from redis import Redis
+from services.inference_service import InferenceHandler
+from utils.checking_files import (
+    clone_upload_file,
+    validate_all_files,
+)
+from utils.checking_keys import create_key, get_api_key
+from utils.minio_utils import fetch_result_from_minio
+
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(
+    host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True
+)
+
+router = APIRouter(prefix="/extract", tags=["Extraction"])
+
+
+# ✅ 공통 비동기 추론 엔드포인트 생성기
+def register_extract_route(
+    path: str, mode: str, default_model: str, summary: str, description: str
+):
+    @router.post(path, summary=summary, description=description)
+    async def extract_endpoint(
+        request_info: Request,
+        input_file: UploadFile = File(...),
+        prompt_file: Optional[UploadFile] = File(
+            default=None,
+            description="⚠️ prompt_file 업로드하지 않을 경우, **'Send empty value'** 체크박스를 반드시 해제해주세요.",
+        ),
+        model: Optional[str] = Form(default=default_model),
+        api_key: str = Depends(get_api_key),
+    ):
+        validate_all_files(input_file)
+
+        # ✅ 고유한 요청 ID 생성
+        request_id = create_key()
+        result_id = create_key()
+
+        cloned_input = clone_upload_file(input_file) if input_file else None
+        cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+        # ✅ 백그라운드에서 작업 실행
+        asyncio.create_task(
+            InferenceHandler.handle_extract_background(
+                request_id=request_id,
+                result_id=result_id,
+                input_file=cloned_input,
+                schema_file=None,
+                prompt_file=cloned_prompt,
+                mode=mode,
+                model_list=[model],
+                request_info=request_info,
+                api_key=api_key,
+            )
+        )
+
+        # ✅ request_id → result_id 매핑 저장
+        redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+        return JSONResponse(
+            content={
+                "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+                "request_id": request_id,
+                "status_check_url": f"/extract/progress/{request_id}",
+            }
+        )
+
+    # FastAPI 문서화용 정보 부여
+    extract_endpoint.__name__ = f"extract_{mode}"
+    extract_endpoint.__doc__ = description
+    return extract_endpoint
+
+
+# ✅ 내부 모델용 등록
+extract_inner = register_extract_route(
+    path="/inner",
+    mode="inner",
+    default_model="gemma3:27b",
+    summary="내부 LLM 기반 문서 정보 추출 (비동기)",
+    description="""### **요약**
+내부망에 배포된 LLM(Ollama 기반)을 사용하여 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다. 이 엔드포인트는 사전 정의된 기본 프롬프트를 사용하며, 비동기적으로 처리됩니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`을 받아 고유 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에 대해 **OCR API**를 호출하여 텍스트를 추출합니다.
+    -   시스템에 내장된 기본 프롬프트와 추출된 텍스트를 조합합니다. (`prompt_file`을 업로드하여 기본 프롬프트를 대체할 수 있습니다.)
+    -   내부 LLM(Ollama)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: `GET /extract/progress/{request_id}`로 작업 상태와 최종 결과를 조회합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출의 대상이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+-   `prompt_file` (선택): 기본 프롬프트 대신 사용할 사용자 정의 `.txt` 프롬프트 파일.
+-   `model` (선택): 사용할 내부 LLM 모델 이름. (기본값: `gemma3:27b`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/extract/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /extract/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 외부 모델용 등록
+extract_outer = register_extract_route(
+    path="/outer",
+    mode="outer",
+    default_model="gemini-2.5-flash",
+    summary="외부 LLM 기반 문서 정보 추출 (비동기)",
+    description="""### **요약**
+외부 상용 LLM(예: GPT, Gemini)을 사용하여 문서에서 정보를 추출하고 응답을 생성합니다. 내부 LLM 엔드포인트와 작동 방식은 동일하나, 외부 API를 호출합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`을 받아 `request_id`를 생성 후 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에서 **OCR API**를 통해 텍스트를 추출합니다.
+    -   내장된 기본 프롬프트(또는 사용자 정의 `prompt_file`)와 텍스트를 조합합니다.
+    -   외부 LLM API(OpenAI, Google 등)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: `GET /extract/progress/{request_id}`로 작업 상태와 최종 결과를 조회합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (선택): 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/extract/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /extract/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 멀티모달 GPT 테스트용 등록
+extract_outer_gpt = register_extract_route(
+    path="/outer/gpt",
+    mode="multimodal",
+    default_model="gpt-4o",
+    summary="멀티모달 GPT 테스트용",
+    description="""### **요약**
+GPT-4o와 같은 멀티모달 모델을 사용하여, 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 멀티모달 출력을 생성하도록 특화된 프롬프트(`multimodal_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `prompt_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+
+# ✅ 멀티모달 Gemini 테스트용 등록
+extract_outer_gemini = register_extract_route(
+    path="/outer/gemini",
+    mode="multimodal",
+    default_model="gemini-2.5-flash",
+    summary="멀티모달 Gemini 테스트용",
+    description="""### **요약**
+Gemini와 같은 멀티모달 모델을 사용하여, 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 멀티모달 출력을 생성하도록 특화된 프롬프트(`multimodal_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `prompt_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+
+@router.post(
+    "/inner/d6c",
+    summary="국내 문서 테스트용",
+)
+async def extract_d6c(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    model: Optional[str] = Form(default="gemma3:27b"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(I18N_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=I18N_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+@router.post(
+    "/inner/i18n",
+    summary="해외 문서 테스트용",
+)
+async def extract_i18n(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    model: Optional[str] = Form(default="gemma3:27b"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(D6C_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=D6C_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ structured 모드: 구조화 JSON 스키마 기반 추론
+@router.post(
+    "/inner/structured",
+    summary="구조화된 JSON 정보 추출 (비동기)",
+    description="""### **요약**
+사용자가 제공한 `schema_file`에 정의된 JSON 스키마에 따라, 문서에서 정보를 추출하여 구조화된 JSON으로 반환합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 구조화된 출력을 생성하도록 특화된 프롬프트(`structured_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `schema_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `schema_file` (**필수**): 원하는 출력 JSON 구조를 정의하는 `.json` 파일.
+-   `prompt_file` (**필수**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+async def extract_structured_inner(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    schema_file: UploadFile = File(...),
+    prompt_file: UploadFile = File(...),
+    model: Optional[str] = Form(default="gemma3:27b"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    # ✅ 고유한 요청 ID 생성
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+    cloned_schema = clone_upload_file(schema_file) if schema_file else None
+    cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+    # ✅ 백그라운드에서 작업 실행
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=cloned_schema,
+            prompt_file=cloned_prompt,
+            mode="structured",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    # ✅ request_id → result_id 매핑 저장
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ structured 모드: 구조화 JSON 스키마 기반 추론
+@router.post(
+    "/outer/structured",
+    summary="구조화된 JSON 정보 추출 (비동기)",
+    description="""### **요약**
+사용자가 제공한 `schema_file`에 정의된 JSON 스키마에 따라, 문서에서 정보를 추출하여 구조화된 JSON으로 반환합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 구조화된 출력을 생성하도록 특화된 프롬프트(`structured_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `schema_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `schema_file` (**선택**): 원하는 출력 JSON 구조를 정의하는 `.json` 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+async def extract_structured_outer(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    schema_file: Optional[UploadFile] = File(
+        default=None,
+        description="⚠️ schema_file 업로드하지 않을 경우, **'Send empty value'** 체크박스를 반드시 해제해주세요.",
+    ),
+    prompt_file: Optional[UploadFile] = File(
+        default=None,
+        description="⚠️ prompt_file 업로드하지 않을 경우, **'Send empty value'** 체크박스를 반드시 해제해주세요.",
+    ),
+    model: Optional[str] = Form(default="gemma3:27b"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    # ✅ 고유한 요청 ID 생성
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+    cloned_schema = clone_upload_file(schema_file) if schema_file else None
+    cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+    # ✅ 백그라운드에서 작업 실행
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=cloned_schema,
+            prompt_file=cloned_prompt,
+            mode="structured",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    # ✅ request_id → result_id 매핑 저장
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ 상태 로그 조회 API
+@router.get(
+    "/progress/{request_id}",
+    summary="정보 추출 작업 상태 및 결과 조회",
+    description="""### **요약**
+`POST /extract/*` 계열 엔드포인트 요청 시 반환된 `request_id`를 사용하여, 해당 정보 추출 작업의 진행 상태와 최종 결과를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 기반으로 Redis에 저장된 작업 로그와 결과 데이터를 조회합니다.
+-   작업이 진행 중일 때는 현재까지의 로그를, 완료되었을 때는 로그와 함께 최종 결과(`final_result`)를 반환합니다.
+
+### **입력**
+-   `request_id`: 조회할 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }      
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "processed": "LLM의 최종 응답 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "message": "{request_id}에 대한 상태 로그가 없습니다."
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    redis_key = f"pipeline_status:{request_id}"
+
+    # 1. 상태 로그 조회
+    logs = redis_client.lrange(redis_key, 0, -1)
+    if not logs:
+        return JSONResponse(
+            status_code=404,
+            content={"message": f"{request_id}에 대한 상태 로그가 없습니다."},
+        )
+    parsed_logs = [json.loads(log) for log in logs] if logs else []
+
+    # 2. request_id → result_id 매핑 조회
+    result_id = redis_client.hget("pipeline_result_mapping", request_id)
+
+    final_result = None
+    if result_id:
+        # 3. Redis에서 최종 결과 조회
+        result_key = f"pipeline_result:{result_id}"
+        result_str = redis_client.get(result_key)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+                return JSONResponse(
+                    content={
+                        "request_id": request_id,
+                        "progress_logs": parsed_logs,
+                        "final_result": final_result,
+                    }
+                )
+            except json.JSONDecodeError:
+                final_result = {
+                    "massage": "[REDIS] 결과 존재하지만, 디코딩에 실패했습니다."
+                }
+        else:
+            print(f"[REDIS] request_id {request_id} 가 Redis에 없습니다.")
+
+    # 4. Redis에 결과가 없으면 MinIO에서 조회
+    try:
+        print(f"[MINIO] MinIO에서 결과를 가져오는 중: {request_id}")
+        final_result = fetch_result_from_minio(request_id)
+        if final_result:
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": final_result,
+                }
+            )
+        else:
+            # MinIO에서 결과가 없으면 작업 진행 상태 실시간 확인
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": "작업이 진행 중입니다. 결과는 아직 생성되지 않았습니다.",
+                }
+            )
+    except Exception as e:
+        print(f"[MINIO] MinIO 결과 조회 중 실패했습니다: {e}")
+
+## 조찬영
+@router.post(
+    "/inner2/d6c",
+    summary="국내 문서 테스트용",
+)
+async def extract2_d6c(
+    request_info: Request,
+    minio_url: str = Form(...),
+    model: Optional[str] = Form(default="qwen3:30b"),
+    api_key: str = Depends(get_api_key),
+):
+    try:
+        response = requests.get(minio_url)
+        response.raise_for_status()  # 4xx/5xx 응답에 대해 HTTPError 발생
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 403:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": "제공된 MinIO URL이 만료되었거나 접근 권한이 없습니다."
+                },
+            )
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": f"URL에서 파일을 가져오는 데 실패했습니다: {e.response.status_code} {e.response.reason}"
+                },
+            )
+    except requests.exceptions.RequestException as e:
+        return JSONResponse(
+            status_code=400,
+            content={"message": f"URL에 연결하는 중 오류가 발생했습니다: {e}"},
+        )
+
+    # URL에서 쿼리 파라미터를 제외한 파일 이름 추출
+    parsed_url = urlparse(minio_url)
+    file_name = parsed_url.path.split("/")[-1]
+
+    # 다운로드한 파일 데이터로 UploadFile 객체 생성
+    input_file = UploadFile(filename=file_name, file=io.BytesIO(response.content))
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(I18N_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=I18N_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+@router.post(
+    "/inner2/i18n",
+    summary="해외 문서 테스트용",
+)
+async def extract2_i18n(
+    request_info: Request,
+    minio_url: str = Form(...),
+    model: Optional[str] = Form(default="qwen3:30b"),
+    api_key: str = Depends(get_api_key),
+):
+    try:
+        response = requests.get(minio_url)
+        response.raise_for_status()  # 4xx/5xx 응답에 대해 HTTPError 발생
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 403:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": "제공된 MinIO URL이 만료되었거나 접근 권한이 없습니다."
+                },
+            )
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": f"URL에서 파일을 가져오는 데 실패했습니다: {e.response.status_code} {e.response.reason}"
+                },
+            )
+    except requests.exceptions.RequestException as e:
+        return JSONResponse(
+            status_code=400,
+            content={"message": f"URL에 연결하는 중 오류가 발생했습니다: {e}"},
+        )
+
+    # URL에서 쿼리 파라미터를 제외한 파일 이름 추출
+    parsed_url = urlparse(minio_url)
+    file_name = parsed_url.path.split("/")[-1]
+
+    # 다운로드한 파일 데이터로 UploadFile 객체 생성
+    input_file = UploadFile(filename=file_name, file=io.BytesIO(response.content))
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(D6C_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=D6C_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[model],
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+## 조찬영
--- a/workspace/routers/general_router.py
+++ b/workspace/routers/general_router.py
@@ -0,0 +1,235 @@
+import asyncio
+import json
+from typing import Optional
+
+from config.setting import (
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+)
+from fastapi import APIRouter, Depends, File, Form, Request, UploadFile
+from fastapi.responses import JSONResponse
+from redis import Redis
+from services.inference_service import InferenceHandler
+from utils.checking_files import (
+    clone_upload_file,
+    validate_all_files,
+)
+from utils.checking_keys import create_key, get_api_key
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(
+    host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True
+)
+
+
+router = APIRouter(prefix="/general", tags=["General"])
+
+
+# ✅ 공통 비동기 추론 엔드포인트 생성기
+def register_general_route(
+    path: str, mode: str, default_model: str, summary: str, description: str
+):
+    @router.post(path, summary=summary, description=description)
+    async def general_endpoint(
+        request_info: Request,
+        input_file: UploadFile = File(...),
+        prompt_file: UploadFile = File(...),
+        schema_file: Optional[UploadFile] = File(default=None),
+        model: Optional[str] = Form(default=default_model),
+        api_key: str = Depends(get_api_key),
+    ):
+        validate_all_files(input_file)
+
+        # ✅ 고유한 요청 ID 생성
+        request_id = create_key()
+        result_id = create_key()
+
+        cloned_input = clone_upload_file(input_file) if input_file else None
+        cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+        cloned_schema = clone_upload_file(schema_file) if schema_file else None
+
+        effective_mode = "structured" if schema_file and schema_file.filename else mode
+
+        # ✅ 백그라운드에서 작업 실행
+        asyncio.create_task(
+            InferenceHandler.handle_general_background(
+                request_id=request_id,
+                result_id=result_id,
+                input_file=cloned_input,
+                schema_file=cloned_schema,
+                prompt_file=cloned_prompt,
+                mode=effective_mode,
+                model=model,
+                request_info=request_info,
+                api_key=api_key,
+            )
+        )
+
+        # ✅ request_id → result_id 매핑 저장
+        redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+        return JSONResponse(
+            content={
+                "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+                "request_id": request_id,
+                "status_check_url": f"/general/progress/{request_id}",
+            }
+        )
+
+    # FastAPI 문서화용 정보 부여
+    general_endpoint.__name__ = f"general_{mode}"
+    general_endpoint.__doc__ = description
+    return general_endpoint
+
+
+# ✅ 내부 모델용 등록
+general_inner = register_general_route(
+    path="/inner",
+    mode="inner",
+    default_model="gemma3:27b",
+    summary="내부 LLM 기반 범용 추론 요청 (비동기)",
+    description="""### **요약**
+내부망에 배포된 LLM(Ollama 기반)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 이 엔드포인트는 파일(PDF, 이미지 등)에서 텍스트를 추출하고, 사용자가 제공한 프롬프트를 적용하여 결과를 생성합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`이 문서나 이미지일 경우, **OCR API**를 호출하여 텍스트를 추출합니다.
+    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
+    -   내부 LLM(Ollama)에 추론을 요청합니다.
+    -   `schema_file`이 제공되면, LLM이 스키마에 맞는 JSON을 생성하도록 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+    -   내부적으로 OCR을 통해 텍스트가 자동 추출됩니다.
+-   `prompt_file` (**필수**): LLM에 전달할 명령어(프롬프트)가 포함된 `.txt` 파일.
+-   `schema_file` (선택): 결과물의 구조를 정의하는 `.json` 스키마 파일. 제공 시, 출력은 이 스키마를 따르는 JSON 형식으로 강제됩니다.
+-   `model` (선택): 사용할 내부 LLM 모델 이름. (기본값: `gemma3:27b`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/general/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 외부 모델용 등록
+general_outer = register_general_route(
+    path="/outer",
+    mode="outer",
+    default_model="gemini-2.5-flash",
+    summary="외부 LLM 기반 범용 추론 요청 (비동기)",
+    description="""### **요약**
+외부 상용 LLM(예: GPT, Gemini, Claude)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 기능과 작동 방식은 내부 LLM용 엔드포인트와 동일하나, 외부 API를 호출하는 점이 다릅니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에서 **OCR API**를 통해 텍스트를 추출합니다.
+    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
+    -   외부 LLM API(OpenAI, Google, Anthropic 등)에 추론을 요청합니다.
+    -   `schema_file`이 제공되면, LLM이 스키마에 맞는 JSON을 생성하도록 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+-   `prompt_file` (**필수**): LLM에 전달할 프롬프트가 포함된 `.txt` 파일.
+-   `schema_file` (선택): 결과물의 구조를 정의하는 `.json` 스키마 파일.
+-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/general/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+
+# ✅ 상태 로그 조회 API
+@router.get(
+    "/progress/{request_id}",
+    summary="범용 추론 작업 상태 및 결과 조회",
+    description="""### **요약**
+`POST /general/inner` 또는 `POST /general/outer` 요청 시 반환된 `request_id`를 사용하여, 해당 작업의 진행 상태와 최종 결과를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 기반으로 Redis에 저장된 작업 로그와 결과 데이터를 조회합니다.
+-   작업이 진행 중일 때는 현재까지의 로그를, 완료되었을 때는 로그와 함께 최종 결과(`final_result`)를 반환합니다.
+
+### **입력**
+-   `request_id`: 조회할 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "processed": "LLM의 최종 응답 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "message": "{request_id}에 대한 상태 로그가 없습니다."
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    # 상태 로그 조회
+    redis_key = f"pipeline_status:{request_id}"
+    logs = redis_client.lrange(redis_key, 0, -1)
+    if not logs:
+        return JSONResponse(
+            status_code=404,
+            content={"message": f"{request_id}에 대한 상태 로그가 없습니다."},
+        )
+    parsed_logs = [json.loads(log) for log in logs] if logs else []
+
+    # request_id → result_id 매핑 조회
+    result_id = redis_client.hget("pipeline_result_mapping", request_id)
+
+    final_result = None
+    if result_id:
+        # 최종 결과 조회
+        result_key = f"pipeline_result:{result_id}"
+        result_str = redis_client.get(result_key)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+            except json.JSONDecodeError:
+                final_result = {"error": "결과 디코딩 실패"}
+
+    return JSONResponse(
+        content={
+            "request_id": request_id,
+            "progress_logs": parsed_logs,
+            "final_result": final_result,
+        }
+    )
--- a/workspace/routers/guide_router.py
+++ b/workspace/routers/guide_router.py
@@ -0,0 +1,46 @@
+from config.setting import (
+    EXTRACT_DEFAULT_PATH,
+    GENERAL_GUIDE_PATH,
+    SCHEMA_FILE_PATH,
+)
+from fastapi import APIRouter
+from fastapi.responses import FileResponse, HTMLResponse
+
+router = APIRouter(tags=["Guide Book"])
+
+
+# ✅ /schema_json 가이드 HTML
+@router.get(
+    "/schema_file_guide",
+    summary="schema 파일 작성 가이드북 HTML 보기",
+    description=(
+        "📄 본 가이드북은 <strong>/general</strong> 및 <strong>/extract/structured</strong> "
+        "엔드포인트에 첨부되는 <strong>schema_file</strong> 작성법을 설명합니다.<br><br>"
+        "가이드북은 <a href='/schema_file_guide' target='_blank'>여기</a>에서 확인하세요."
+    ),
+    response_class=HTMLResponse,
+)
+async def schema_guide():
+    return FileResponse(SCHEMA_FILE_PATH, media_type="text/html")
+
+
+# ✅ /general 가이드 HTML
+@router.get(
+    "/general_guide",
+    summary="/general 가이드북 HTML 보기",
+    description="가이드북을 <a href='/general_guide' target='_blank'>여기</a>에서 확인하세요.",
+    response_class=HTMLResponse,
+)
+async def general_guide():
+    return FileResponse(GENERAL_GUIDE_PATH, media_type="text/html")
+
+
+# ✅ /extract 가이드 HTML
+@router.get(
+    "/extract_guide",
+    summary="/extract 가이드북 HTML 보기",
+    description="가이드북을 <a href='/extract_guide' target='_blank'>여기</a>에서 확인하세요.",
+    response_class=HTMLResponse,
+)
+async def extract_guide():
+    return FileResponse(EXTRACT_DEFAULT_PATH, media_type="text/html")
--- a/workspace/routers/llm_summation.py
+++ b/workspace/routers/llm_summation.py
@@ -0,0 +1,86 @@
+import logging
+
+from fastapi import APIRouter, BackgroundTasks, Depends
+from pydantic import BaseModel
+from services.report import (
+    ask_ollama_qwen,
+    dialog_ask_gemini,
+    run_all_models,
+    tasks_store,
+    total_summation,
+)
+from utils.checking_keys import create_key
+from utils.logging_utils import EndpointLogger
+
+# ------------------------------------------
+
+# 로깅 설정
+logger = logging.getLogger(__name__)
+
+router = APIRouter(tags=["summary"])
+
+
+class SummaryRequest(BaseModel):
+    text: str
+
+
+@router.post("/summary")  # STT 요약 모델
+async def summarize(
+    request: SummaryRequest, endpoint_logger: EndpointLogger = Depends(EndpointLogger)
+):
+    endpoint_logger.log(
+        model="gpt-4.1-mini, qwen3:custom, gemini-2.5-flash, claude-3-7-sonnet-latest",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    results = await total_summation(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/ollama_summary")  # ollama 모델 전용
+async def ollama_summary(
+    request: SummaryRequest, endpoint_logger: EndpointLogger = Depends(EndpointLogger)
+):
+    endpoint_logger.log(
+        model="qwen3:custom",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    results = await ask_ollama_qwen(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/gemini_summary")
+async def gemini_summary(request: SummaryRequest):
+    results = await dialog_ask_gemini(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/task_summary")  # 모델 별 전체 요약
+async def task_summary(
+    request: SummaryRequest,
+    background_tasks: BackgroundTasks,
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    endpoint_logger.log(
+        model="gpt-4.1-mini, qwen3:custom, gemini-2.5-flash, claude-3-7-sonnet-latest",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    task_id = create_key()
+    background_tasks.add_task(run_all_models, request.text, task_id)
+    return {"task_id": task_id}
+
+
+@router.get("/task_summary/{task_id}")  # 모델 별 요약 조회
+async def get_status(task_id: str):
+    task = tasks_store.get(task_id)
+    if not task:
+        return {"error": "Invalid task_id"}
+    return task
--- a/workspace/routers/model_router.py
+++ b/workspace/routers/model_router.py
@@ -0,0 +1,17 @@
+from fastapi import APIRouter
+from services.model_service import ModelInfoService
+
+router = APIRouter(tags=["Model Management"])
+
+
+# ✅ GET:사용 가능한 모델 조회 API
+@router.get(
+    "/info",
+    summary="'/extract', '/general' 에서 사용 가능한 모델 목록 확인",
+    description="""
+    ✅ 'inner(내부용)' 와 'outer(외부용)' 모델의 사용 가능한 목록을 확인합니다.<br>
+    ✅ 'Try it out' → 'Execute' 순서로 클릭합니다.<br>
+    """,
+)
+async def get_model_info():
+    return await ModelInfoService.get_model_info()
--- a/workspace/routers/ocr_router.py
+++ b/workspace/routers/ocr_router.py
@@ -0,0 +1,168 @@
+import logging
+
+import httpx
+from config.setting import (
+    MINIO_BUCKET_NAME,
+    OCR_API_URL,
+)
+from fastapi import APIRouter, Depends, File, HTTPException, UploadFile
+from fastapi.responses import JSONResponse
+from utils.checking_files import validate_all_files
+from utils.checking_keys import create_key
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2  # ✅ MinIO 유틸 함수 import
+
+router = APIRouter(prefix="/ocr", tags=["OCR"])
+logger = logging.getLogger(__name__)
+
+
+@router.post(
+    "",
+    summary="문서 OCR 요청 (비동기)",
+    description="""### **요약**
+문서 파일(PDF, 이미지 등)을 받아 텍스트를 추출하는 OCR(광학 문자 인식) 작업을 비동기적으로 요청합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `file`을 받아 고유 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   업로드된 파일을 내부 저장소(MinIO)에 저장합니다.
+    -   별도의 OCR 서버에 텍스트 추출 작업을 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /ocr/progress/{request_id}`로 작업 상태를, `GET /ocr/result/{request_id}`로 최종 텍스트 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `file` (**필수**): 텍스트를 추출할 문서 파일.
+    -   지원 형식: `.pdf`, `.jpg`, `.png`, `.jpeg` 등 OCR 서버가 지원하는 형식.
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    [
+      {
+        "request_id": "고유한 요청 ID",
+        "status": "작업 접수",
+        "message": "아래 URL을 통해 작업 상태 및 결과를 확인하세요."
+      }
+    ]
+    ```
+-   **최종 결과**: `GET /ocr/result/{request_id}`를 통해 확인 가능.
+""",
+)
+async def ocr_only(
+    file: UploadFile = File(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    validate_all_files(file)
+    results = []
+    endpoint_logger.log(
+        model="paddle-ocr",
+        input_filename=file.filename,
+        context_length=0,  # OCR은 context_length가 필요하지 않음
+    )
+
+    async with httpx.AsyncClient() as client:
+        # ✅ 1. 고유 ID 생성
+        request_id = create_key()
+        bucket_name = MINIO_BUCKET_NAME
+        object_name = f"{request_id}/{file.filename}"
+
+        # ✅ 2. MinIO에 파일 업로드 후 presigned URL 생성
+        # presigned_url = upload_file_to_minio(file, request_id)
+        presigned_url = upload_file_to_minio_v2(
+            file=file, bucket_name=bucket_name, object_name=object_name
+        )
+        logger.info(f"[MinIO] ✅ presigned URL 생성 완료: {presigned_url}")
+
+        try:
+            # ✅ 3. OCR API에 presigned URL 전달
+            resp = await client.post(
+                OCR_API_URL,
+                json=[
+                    {
+                        "file_url": presigned_url,
+                        "filename": file.filename,
+                    }
+                ],
+                timeout=None,
+            )
+            resp.raise_for_status()
+
+        # except httpx.ReadTimeout:
+        #     logger.error("[OCR] OCR 서버 지연 가능성")
+        #     raise HTTPException(
+        #         status_code=504, detail="OCR 서버 응답이 지연되고 있습니다."
+        #     )
+
+        # except httpx.HTTPStatusError as e:
+        #     logger.error(
+        #         f"[OCR] ❌ HTTP 에러 발생: {e.response.status_code} - {e.response.text}"
+        #     )
+        #     raise HTTPException(
+        #         status_code=e.response.status_code, detail="OCR 서버 오류 발생"
+        #     )
+
+        except Exception:
+            logger.exception("[OCR] ❌ 예기치 못한 오류 발생")
+            raise HTTPException(
+                status_code=500, detail="OCR 요청 처리 중 내부 오류 발생"
+            )
+
+        # ✅ 4. OCR 응답에서 request_id 추출
+        for item in resp.json().get("results", []):
+            ocr_request_id = item.get("request_id")
+
+            result_item = {
+                "request_id": ocr_request_id,
+                "status": "작업 접수",
+                "message": "아래 URL을 통해 작업 상태 및 결과를 확인하세요.",
+            }
+            results.append(result_item)
+
+    return JSONResponse(content=results)
+
+
+@router.get(
+    "/progress/{request_id}",
+    summary="OCR 작업 상태 조회",
+    description="""### **요약**
+`POST /ocr` 요청 시 반환된 `request_id`를 사용하여 OCR 작업의 현재 진행 상태를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 OCR 서버에 전달하여 해당 작업의 상태를 가져옵니다.
+-   상태는 보통 'PENDING', 'IN_PROGRESS', 'SUCCESS', 'FAILURE' 등으로 표시됩니다.
+
+### **입력**
+-   `request_id`: 조회할 OCR 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "parsed": "OCR 결과 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "detail": "Meeting ID {request_id} 작업 없음"
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(f"{OCR_API_URL}/progress/{request_id}")
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"OCR 상태 조회 실패: {str(e)}")
--- a/workspace/routers/stt_router.py
+++ b/workspace/routers/stt_router.py
@@ -0,0 +1,144 @@
+# llmgateway/routers/stt_proxy.py
+import logging
+
+import httpx
+from fastapi import APIRouter, Depends, File, Form, HTTPException, UploadFile
+from fastapi.responses import JSONResponse
+from utils.checking_keys import create_key
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2
+
+router = APIRouter(tags=["STT Gateway"])
+
+STT_API_BASE_URL = "http://stt_fastapi:8899/ccp"  # docker-compose 내 서비스명 기반
+MULTI_STT_API_BASE_URL = (
+    "http://stt_fastapi:8899/dialog"  # docker-compose 내 서비스명 기반
+)
+logger = logging.getLogger(__name__)
+
+
+# 파일 업로드 → stt_api에 Presigned URL 전달
+@router.post("/audio")
+async def proxy_audio(
+    audio_file: UploadFile = File(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    request_id = create_key()
+    bucket_name = "stt-gateway"
+    object_name = f"{request_id}/{audio_file.filename}"
+
+    try:
+        # upload_file_to_minio_v2는 presigned URL을 반환합니다.
+        presigned_url = upload_file_to_minio_v2(
+            file=audio_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    # 로깅
+    endpoint_logger.log(model="N/A", input_filename=audio_file.filename)
+
+    # stt_fastapi에 Presigned URL 정보 전달
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "file_url": presigned_url,
+                "language": "ko",
+            }
+            response = await client.post(f"{STT_API_BASE_URL}/audio", json=payload)
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"STT API 호출 실패: {str(e)}")
+
+
+# 상태 조회 → stt_api에 중계 및 오류 로깅
+@router.get("/progress/{request_id}")
+async def proxy_progress(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(f"{STT_API_BASE_URL}/progress/{request_id}")
+            response.raise_for_status()  # HTTP 오류 발생 시 예외 처리
+            
+            # 응답 데이터 확인 및 로깅
+            data = response.json()
+            if data.get("celery_status") == "FAILURE":
+                # 상세 오류 정보를 포함하여 에러 로그 기록
+                error_details = data.get("progress_logs", [])
+                logger.error(f"[ERROR] STT task failed for request_id {request_id}. Details: {error_details}")
+
+            return JSONResponse(content=data, status_code=response.status_code)
+
+    except httpx.HTTPStatusError as e:
+        logger.error(f"STT progress check failed with status {e.response.status_code} for request_id {request_id}: {e.response.text}")
+        raise HTTPException(status_code=e.response.status_code, detail=f"STT 상태 조회 실패: {e.response.text}")
+    except Exception as e:
+        logger.error(f"An unexpected error occurred while checking STT progress for request_id {request_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"STT 상태 조회 실패: {str(e)}")
+
+
+# 다중 입력 회의 → stt_api에 Presigned URL 전달
+@router.post("/dialog_processing")
+async def proxy_dialog_processing(
+    audio_file: UploadFile = File(...),
+    meeting_tag: str = Form(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    bucket_name = "stt-gateway"
+    request_id = create_key()
+    object_name = f"{meeting_tag}_{request_id}/{audio_file.filename}"
+
+    try:
+        presigned_url = upload_file_to_minio_v2(
+            file=audio_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed for dialog_processing: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    # 로깅
+    endpoint_logger.log(model="N/A", input_filename=audio_file.filename)
+
+    # stt_fastapi에 Presigned URL 정보 전달
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "file_url": presigned_url,
+                "meeting_tag": meeting_tag,
+            }
+            resp = await client.post(
+                f"{MULTI_STT_API_BASE_URL}/dialog_processing", json=payload
+            )
+        return JSONResponse(status_code=resp.status_code, content=resp.json())
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+
+@router.get("/start_parallel_stt/{meeting_tag}")
+async def proxy_start_parallel_stt(meeting_tag: str):
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(
+                f"{MULTI_STT_API_BASE_URL}/start_parallel_stt/{meeting_tag}"
+            )
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+    return JSONResponse(status_code=resp.status_code, content=resp.json())
+
+
+@router.get("/dialog_result/{task_id}")
+async def proxy_get_progress(task_id: str):
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(
+                f"{MULTI_STT_API_BASE_URL}/result/parallel/{task_id}"
+            )
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+    return JSONResponse(status_code=resp.status_code, content=resp.json())
--- a/workspace/routers/yolo_router.py
+++ b/workspace/routers/yolo_router.py
@@ -0,0 +1,80 @@
+# llmgateway/routers/stt_proxy.py
+import io
+import logging
+
+import httpx
+from fastapi import APIRouter, Depends, File, HTTPException, Request, UploadFile
+from fastapi.responses import JSONResponse, StreamingResponse
+from utils.checking_keys import create_key
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2
+
+router = APIRouter(tags=["YOLO Gateway"])
+
+YOLO_BASE_URL = "http://yolo_gateway:8891"  # docker-compose 내 서비스명 기반
+
+logger = logging.getLogger(__name__)
+
+
+@router.post("/detect_view")
+async def proxy_audio(
+    request_info: Request,
+    image_file: UploadFile = File(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    request_id = create_key()
+    bucket_name = "yolo-gateway"
+    object_name = f"{request_id}/{image_file.filename}"
+
+    try:
+        presigned_url = upload_file_to_minio_v2(
+            file=image_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    endpoint_logger.log(
+        model="yolo11x", input_filename=image_file.filename, context_length=0
+    )
+
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "request_id": request_id,
+                "file_url": presigned_url,
+            }
+            response = await client.post(f"{YOLO_BASE_URL}/detect", json=payload)
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"YOLO API 호출 실패: {str(e)}")
+
+
+# YOLO 서버의 이미지 프록시
+@router.get("/detect_view/images/{request_id}")
+async def proxy_get_image(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            yolo_url = f"{YOLO_BASE_URL}/images/{request_id}"
+            response = await client.get(yolo_url)
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"YOLO 이미지 요청 실패: {str(e)}")
+
+    return StreamingResponse(io.BytesIO(response.content), media_type="image/jpeg")
+
+
+# YOLO 서버의 JSON 결과 프록시
+@router.get("/detect_view/results/{request_id}")
+async def proxy_get_results(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            yolo_url = f"{YOLO_BASE_URL}/results/{request_id}"
+            response = await client.get(yolo_url)
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"YOLO 결과 요청 실패: {str(e)}")
+
+    return JSONResponse(content=response.json(), status_code=response.status_code)
--- a/workspace/services/init.py
+++ b/workspace/services/init.py
--- a/workspace/services/api_key_service.py
+++ b/workspace/services/api_key_service.py
@@ -0,0 +1,167 @@
+import json
+import os
+import secrets
+import time
+
+from utils.redis_utils import get_redis_client
+
+# Redis에 API 키를 저장할 때 사용할 접두사
+API_KEY_PREFIX = "api_key:"
+# Docker 컨테이너의 /workspace 디렉토리에 파일을 저장하도록 절대 경로 사용
+API_KEYS_FILE = "/workspace/api_keys.json"
+
+
+def _read_keys_from_file():
+    """Helper function to read all keys from the JSON file."""
+    if not os.path.exists(API_KEYS_FILE):
+        return {}
+    with open(API_KEYS_FILE, "r") as f:
+        try:
+            return json.load(f)
+        except json.JSONDecodeError:
+            return {}
+
+
+def _write_keys_to_file(keys):
+    """Helper function to write all keys to the JSON file."""
+    with open(API_KEYS_FILE, "w") as f:
+        json.dump(keys, f, indent=4)
+
+
+import redis
+
+
+def load_api_keys_from_file():
+    """
+    JSON 파일에서 API 키를 읽어 Redis에 로드합니다.
+    Redis 연결 실패 시 몇 초간 재시도하여 시작 시점의 문제를 해결합니다.
+    """
+    keys_from_file = _read_keys_from_file()
+    if not keys_from_file:
+        print("API key file not found or empty. Skipping loading.")
+        return
+
+    redis_client = get_redis_client()
+    max_retries = 5
+    retry_delay = 2  # 초
+
+    for i in range(max_retries):
+        try:
+            # Redis 연결 테스트
+            redis_client.ping()
+
+            # 연결 성공 시 키 로드
+            for key_name, key_data in keys_from_file.items():
+                if not redis_client.exists(key_name):
+                    redis_client.hset(key_name, mapping=key_data)
+                    print(f"Loaded API key from file: {key_name}")
+
+            print("Successfully loaded all keys into Redis.")
+            return  # 성공 시 함수 종료
+
+        except redis.exceptions.ConnectionError as e:
+            print(f"Could not connect to Redis (attempt {i+1}/{max_retries}): {e}")
+            if i < max_retries - 1:
+                print(f"Retrying in {retry_delay} seconds...")
+                time.sleep(retry_delay)
+            else:
+                print("Failed to load API keys into Redis after multiple retries.")
+                break
+
+
+def generate_api_key(prefix="sk") -> str:
+    """안전한 API 키를 생성합니다. (예: sk-xxxxxxxx)"""
+    return f"{prefix}-{secrets.token_hex(16)}"
+
+
+def create_api_key(client_name: str, key_prefix="sk") -> dict:
+    """
+    새로운 API 키를 생성하고 Redis와 파일에 저장합니다.
+    """
+    api_key = generate_api_key(prefix=key_prefix)
+    redis_client = get_redis_client()
+
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+    key_data = {
+        "client_name": client_name,
+        "created_at": str(int(time.time())),
+        "is_active": "true",
+    }
+
+    # Redis에 저장 (hset 사용)
+    redis_client.hset(key_storage_name, mapping=key_data)
+
+    # 파일에 즉시 저장
+    all_keys = _read_keys_from_file()
+    all_keys[key_storage_name] = key_data
+    _write_keys_to_file(all_keys)
+
+    return {"api_key": api_key, **key_data}
+
+
+def validate_api_key(api_key: str) -> bool:
+    """
+    제공된 API 키가 유효한지 검증합니다. decode_responses=True로 인해 모든 값은 문자열입니다.
+    1. Redis에서 먼저 확인합니다.
+    2. Redis에 없으면 api_keys.json 파일에서 확인합니다.
+    3. 파일에서 유효한 키를 찾으면 Redis에 다시 동기화합니다.
+    """
+    if not api_key:
+        return False
+
+    redis_client = get_redis_client()
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+
+    # 1. Redis에서 확인 (decode_responses=True이므로 반환값은 문자열)
+    is_active_in_redis = redis_client.hget(key_storage_name, "is_active")
+    if is_active_in_redis == "true":
+        return True
+
+    # 2. Redis에 없으면 파일에서 확인
+    all_keys_from_file = _read_keys_from_file()
+    key_data_from_file = all_keys_from_file.get(key_storage_name)
+
+    if key_data_from_file and key_data_from_file.get("is_active") == "true":
+        # 3. 파일에 유효한 키가 있으면 Redis에 다시 기록 (Self-healing, hset 사용)
+        redis_client.hset(key_storage_name, mapping=key_data_from_file)
+        print(f"Key '{key_storage_name}' not found in Redis, but restored from file.")
+        return True
+
+    return False
+
+
+def revoke_api_key(api_key: str) -> bool:
+    """
+    API 키를 Redis와 파일에서 삭제하여 폐기합니다.
+    """
+    redis_client = get_redis_client()
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+
+    # Redis에서 삭제
+    result = redis_client.delete(key_storage_name)
+
+    if result > 0:
+        # 파일에서도 삭제
+        all_keys = _read_keys_from_file()
+        if key_storage_name in all_keys:
+            del all_keys[key_storage_name]
+            _write_keys_to_file(all_keys)
+        return True
+    return False
+
+
+def list_api_keys() -> list:
+    """
+    저장된 모든 API 키의 목록을 반환합니다.
+    (주의: 실제 환경에서는 키 자체를 노출하지 않는 것이 좋습니다)
+    """
+    redis_client = get_redis_client()
+    keys = []
+
+    # decode_responses=True이므로 모든 키와 값은 문자열.
+    for key_name in redis_client.scan_iter(f"{API_KEY_PREFIX}*"):
+        key_data = redis_client.hgetall(key_name)
+        key_data["api_key"] = key_name.replace(API_KEY_PREFIX, "", 1)
+        keys.append(key_data)
+
+    return keys
--- a/workspace/services/download_service.py
+++ b/workspace/services/download_service.py
@@ -0,0 +1,38 @@
+from fastapi.responses import FileResponse
+from config.setting import DEFAULT_PROMPT_PATH, STRUCTURED_PROMPT_PATH, STRUCTURED_SCHEMA_PATH
+
+class DownloadService:
+    @staticmethod
+    def download_default_prompt():
+        return FileResponse(
+            DEFAULT_PROMPT_PATH,
+            media_type="text/plain",
+            filename="default_prompt.txt",
+            headers=DownloadService._no_cache_headers()
+        )
+
+    @staticmethod
+    def download_structured_prompt():
+        return FileResponse(
+            STRUCTURED_PROMPT_PATH,
+            media_type="text/plain",
+            filename="structured_prompt.txt",
+            headers=DownloadService._no_cache_headers()
+        )
+
+    @staticmethod
+    def download_structured_schema():
+        return FileResponse(
+            STRUCTURED_SCHEMA_PATH,
+            media_type="application/json",
+            filename="structured_schema.json",
+            headers=DownloadService._no_cache_headers()
+        )
+
+    @staticmethod
+    def _no_cache_headers():
+        return {
+            "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0",
+            "Pragma": "no-cache",
+            "Expires": "0"
+        }
--- a/workspace/services/dummy_service.py
+++ b/workspace/services/dummy_service.py
@@ -0,0 +1,20 @@
+import json
+from fastapi.responses import JSONResponse
+from config.setting import STATIC_DIR
+
+class DummyService:
+    @staticmethod
+    async def extract_dummy():
+        """
+        static 디렉터리의 더미 JSON 응답 파일을 반환합니다.
+        """
+        dummy_path = STATIC_DIR / "dummy_response.json"
+        try:
+            with open(dummy_path, "r", encoding="utf-8") as f:
+                dummy_data = json.load(f)
+            return JSONResponse(content=dummy_data)
+        except Exception as e:
+            return JSONResponse(
+                status_code=500,
+                content={"error": f"❌ 더미 파일을 불러오지 못했습니다: {e}"}
+            )
--- a/workspace/services/inference_service.py
+++ b/workspace/services/inference_service.py
@@ -0,0 +1,281 @@
+import asyncio
+import json
+import logging
+import os
+import shutil
+from pathlib import Path
+from typing import List, Optional
+
+from config.setting import (
+    DEFAULT_PROMPT_PATH,
+    MINIO_BUCKET_NAME,
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+    STRUCTURED_PROMPT_PATH,
+    UPLOAD_DIR,
+)
+from fastapi import HTTPException, UploadFile
+from redis import Redis
+from utils.logging_utils import log_user_request
+from utils.minio_utils import save_result_to_minio, upload_file_to_minio_v2
+from utils.prompt_cache import compute_file_hash, save_prompt_file_if_not_exists
+
+from services.dummy_service import DummyService
+from services.model_service import ModelInfoService
+from services.pipeline_runner import PipelineRunner
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(
+    host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True
+)
+
+logger = logging.getLogger(__name__)
+
+
+class InferenceHandler:
+    # ☑️ /general-공통처리함수
+    @staticmethod
+    async def handle_general_background(
+        request_id: str,
+        result_id: str,
+        input_file: UploadFile,
+        prompt_file: UploadFile,
+        mode: str,
+        model: str,
+        api_key: str,
+        schema_file: Optional[UploadFile] = None,
+        request_info: Optional[str] = None,
+    ):
+        logger.info(f"[INPUT_FILE_NAME]: {input_file.filename}")
+
+        try:
+            # ✅ prompt_file이 없으면 사용자 에러 응답 반환
+            if not prompt_file or not prompt_file.filename:
+                raise HTTPException(
+                    status_code=400,
+                    detail="❌ 프롬프트 파일(prompt_file)은 반드시 업로드해야 합니다.",
+                )
+
+            # ✅ 파일 저장
+            filename = input_file.filename
+            file_path = os.path.join(UPLOAD_DIR, filename)
+            with open(file_path, "wb") as f:
+                shutil.copyfileobj(input_file.file, f)
+
+            # 🔽 프롬프트 해시 처리 + 캐시 저장 + 내용 읽기
+            file_hash = compute_file_hash(prompt_file)
+            cached_prompt_path = save_prompt_file_if_not_exists(file_hash, prompt_file)
+
+            with open(cached_prompt_path, encoding="utf-8") as f:
+                prompt = f.read()
+
+            custom_mode = True
+
+            # ✅ schema_file 있으면 structured 모드로 전환 및 로딩
+            schema_override = None
+            if schema_file and schema_file.filename:
+                schema_override = json.loads(await schema_file.read())
+                mode = "structured"  # override
+
+            # ✅ 모델 정보 수집
+            info_response = await ModelInfoService.get_model_info()
+            info = json.loads(info_response.body.decode("utf-8"))
+            inner_models = info["models"]["inner_model"]["model_list"]
+            outer_models = info["models"]["outer_model"]["model_list"]
+            model_url_map = await ModelInfoService.get_ollama_model_map()
+
+            # presigned_url = upload_file_to_minio(input_file, request_id)
+            presigned_url = upload_file_to_minio_v2(
+                file=input_file,
+                bucket_name=MINIO_BUCKET_NAME,
+                object_name=f"{request_id}/{filename}",
+            )
+            logger.info(f"[MinIO] presigned URL 생성 완료: {presigned_url}")
+
+            # ✅ run_pipeline 재사용 (schema_override는 일반 추론이므로 None)
+            results_minio = await PipelineRunner.run_pipeline(
+                request_info=request_info,
+                request_id=request_id,
+                file_path=presigned_url,
+                filename=filename,
+                prompt=prompt,
+                prompt_filename=Path(cached_prompt_path).name,
+                custom_mode=custom_mode,
+                mode=mode,
+                model=model,
+                inner_models=inner_models,
+                outer_models=outer_models,
+                model_url_map=model_url_map,
+                api_key=api_key,
+                schema_override=schema_override,
+                prompt_mode="general",
+            )
+            # ✅ 결과 Redis 저장
+            results_redis = {k: v for k, v in results_minio.items() if k != "fields"}
+            redis_key = f"pipeline_result:{result_id}"
+            redis_client.set(
+                redis_key, json.dumps(results_redis, ensure_ascii=False), ex=60 * 60
+            )
+            logger.info(f"[REDIS] 결과 Redis 저장 완료: {result_id}")
+
+        except Exception as e:
+            logger.error(f"[PIPELINE] ❌ result_id={result_id} 처리 실패: {e}")
+            redis_client.set(
+                f"pipeline_result:{result_id}",
+                json.dumps({"error": str(e)}),
+                ex=60 * 60,
+            )
+
+        # ✅ 결과 MinIO 저장 (전체본)
+        try:
+            minio_key = f"{request_id}/{input_file.filename.rsplit('.', 1)[0]}.json"
+            presigned_url = save_result_to_minio(
+                result_dict=results_minio,
+                object_name=minio_key,
+            )
+            logger.info(f"[MinIO] 결과 MinIO 저장 완료: {presigned_url}")
+
+        except Exception as e:
+            logger.error(f"[MinIO] 결과 저장 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="결과 파일 저장 중 오류가 발생했습니다.",
+            )
+
+    @staticmethod
+    async def handle_extract_background(
+        request_id: str,
+        result_id: str,
+        input_file: UploadFile,
+        schema_file: Optional[UploadFile],
+        prompt_file: Optional[UploadFile],
+        mode: str,
+        model_list: List[str],
+        api_key: str,
+        request_info: Optional[str] = None,
+    ):
+        # ✅ dummy 요청 처리
+        if model_list == ["dummy"]:
+            try:
+                log_user_request(
+                    request_info=request_info,
+                    endpoint="dummy/extract/outer",
+                    input_filename="None",
+                    model="dummy",
+                    prompt_filename="None",
+                    context_length=0,
+                    api_key=api_key,
+                )
+                return await DummyService.extract_dummy()
+            except Exception as e:
+                logger.info(f"Failed to log 'dummy/extract/outer' request: {e}")
+
+        try:
+            if prompt_file and prompt_file.filename:
+                file_hash = compute_file_hash(prompt_file)
+                cached_prompt_path = save_prompt_file_if_not_exists(
+                    file_hash, prompt_file
+                )
+                with open(cached_prompt_path, encoding="utf-8") as f:
+                    prompt = f.read()
+                custom_mode = True
+                prompt_filename = Path(cached_prompt_path).name
+            else:
+                prompt_path = (
+                    STRUCTURED_PROMPT_PATH
+                    if mode == "structured"
+                    else DEFAULT_PROMPT_PATH
+                )
+                with open(prompt_path, encoding="utf-8") as f:
+                    prompt = f.read()
+                custom_mode = False
+                prompt_filename = Path(prompt_path).name
+
+            if schema_file and schema_file.filename:
+                # clone_upload_file()로 복제된 파일은 UploadFile과 달리 await .read() 지원 안 함
+                schema_content = schema_file.file.read()  # 파일 핸들로 읽기
+                schema_override = json.loads(schema_content.decode("utf-8"))
+            else:
+                # with open(STRUCTURED_SCHEMA_PATH, "r", encoding="utf-8") as f:
+                #     schema_override = json.load(f)
+                schema_override = None
+
+            info_response = await ModelInfoService.get_model_info()
+            info = json.loads(info_response.body.decode("utf-8"))
+            inner_models = info["models"]["inner_model"]["model_list"]
+            outer_models = info["models"]["outer_model"]["model_list"]
+            model_url_map = await ModelInfoService.get_ollama_model_map()
+
+            # presigned_url = upload_file_to_minio(input_file, request_id)
+            presigned_url = upload_file_to_minio_v2(
+                file=input_file,
+                bucket_name=MINIO_BUCKET_NAME,
+                object_name=f"{request_id}/{input_file.filename}",
+            )
+            logger.info(f"[MinIO] presigned URL 생성 완료: {presigned_url}")
+
+            tasks = []
+            for model in model_list:
+                tasks.append(
+                    PipelineRunner.run_pipeline(
+                        request_info=request_info,
+                        request_id=request_id,
+                        file_path=presigned_url,
+                        filename=input_file.filename,
+                        prompt=prompt,
+                        prompt_filename=prompt_filename,
+                        custom_mode=custom_mode,
+                        mode=mode,
+                        model=model,
+                        inner_models=inner_models,
+                        outer_models=outer_models,
+                        model_url_map=model_url_map if model in inner_models else {},
+                        api_key=api_key,
+                        schema_override=schema_override,
+                        prompt_mode="extract",
+                    )
+                )
+
+            result_set = await asyncio.gather(*tasks)
+            results_minio = []
+            results_redis = []
+
+            for result in result_set:
+                results_minio.append(result)
+                # 'fields' 키 제외한 버전 생성
+                result_filtered = {k: v for k, v in result.items() if k != "fields"}
+                results_redis.append(result_filtered)
+
+            # ✅ 결과 Redis 저장 (요약본)
+            redis_key = f"pipeline_result:{result_id}"
+            redis_client.set(
+                redis_key,
+                json.dumps(results_redis, ensure_ascii=False),
+                ex=60 * 60,
+            )
+            logger.info(f"[REDIS] 결과 Redis 저장 완료: {result_id}")
+
+        except Exception as e:
+            logger.error(f"[PIPELINE] ❌ result_id={result_id} 처리 실패: {e}")
+            redis_client.set(
+                f"pipeline_result:{result_id}",
+                json.dumps({"error": str(e)}),
+                ex=60 * 60,
+            )
+
+        # ✅ 결과 MinIO 저장 (전체본)
+        try:
+            minio_key = f"{request_id}/{input_file.filename.rsplit('.', 1)[0]}.json"
+            presigned_url = save_result_to_minio(
+                result_dict=results_minio,
+                object_name=minio_key,
+            )
+            logger.info(f"[MinIO] 결과 MinIO 저장 완료: {presigned_url}")
+
+        except Exception as e:
+            logger.error(f"[MinIO] 결과 저장 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="결과 파일 저장 중 오류가 발생했습니다.",
+            )
--- a/workspace/services/model_service.py
+++ b/workspace/services/model_service.py
@@ -0,0 +1,69 @@
+import logging
+from typing import Dict
+
+import httpx
+from config.setting import OLLAMA_URL
+from fastapi.responses import JSONResponse
+
+logger = logging.getLogger(__name__)
+
+
+class ModelInfoService:
+    OUTER_MODELS = [
+        "claude-sonnet-4-20250514",
+        "claude-3-7-sonnet-20250219",
+        "claude-3-5-haiku-20241022",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gpt-4.1",
+        "gpt-4o",
+    ]
+
+    @staticmethod
+    async def get_ollama_model_map() -> Dict[str, str]:
+        model_url_map = {}
+        for url in OLLAMA_URL:
+            try:
+                async with httpx.AsyncClient(timeout=3.0) as client:
+                    tags_url = url.replace("/api/generate", "/api/tags")
+                    res = await client.get(tags_url)
+                    res.raise_for_status()
+                    models = res.json().get("models", [])
+                    for m in models:
+                        model_url_map[m["name"]] = url
+            except Exception as e:
+                logger.error(f"[ERROR] {url} 모델 조회 실패: {e}")
+        return model_url_map
+
+    @staticmethod
+    async def get_model_info() -> JSONResponse:
+        inner_models = []
+
+        for url in OLLAMA_URL:
+            try:
+                async with httpx.AsyncClient(timeout=3.0) as client:
+                    tags_url = url.replace("/generate", "/tags")
+                    res = await client.get(tags_url)
+                    res.raise_for_status()
+                    data = res.json()
+                    models = [m["name"] for m in data.get("models", [])]
+                    inner_models.extend(models)
+            except Exception as e:
+                logger.error(f"[API-INFO-ERROR] Ollama 모델 조회 실패 ({url}): {e}")
+
+        inner_models = list(set(inner_models))
+
+        return JSONResponse(
+            content={
+                "models": {
+                    "inner_model": {
+                        "default_model": "gpt-oss:20b",  # gemma3:27b
+                        "model_list": inner_models,
+                    },
+                    "outer_model": {
+                        "default_model": "gpt-4.1",
+                        "model_list": ModelInfoService.OUTER_MODELS,
+                    },
+                }
+            }
+        )
--- a/workspace/services/pipeline_runner.py
+++ b/workspace/services/pipeline_runner.py
@@ -0,0 +1,292 @@
+import asyncio
+import json
+import logging
+import time
+from typing import Dict, List, Literal, Optional
+
+import httpx
+import redis
+from config.setting import OCR_API_URL, OCR_REDIS_DB, OCR_REDIS_HOST, OCR_REDIS_PORT
+from utils.checking_files import token_counter
+from utils.image_converter import prepare_images_from_file
+from utils.logging_utils import log_pipeline_status, log_user_request
+from utils.text_generator import (
+    ClaudeGenerator,
+    GeminiGenerator,
+    GptGenerator,
+    OllamaGenerator,
+)
+from utils.text_processor import post_process
+
+logger = logging.getLogger(__name__)
+
+# Redis 클라이언트 생성 (Celery 결과용 DB=1)
+redis_client = redis.Redis(
+    host=OCR_REDIS_HOST,
+    port=OCR_REDIS_PORT,
+    db=OCR_REDIS_DB,
+    decode_responses=True,
+)
+
+
+class PipelineRunner:
+    @staticmethod
+    async def run_pipeline(
+        request_info: str,  # ✅ 추가
+        request_id: str,
+        file_path: str,
+        filename: str,
+        prompt: str,
+        prompt_filename: str,  # ✅ 추가
+        custom_mode: bool,
+        mode: str,
+        model: str,
+        inner_models: List[str],
+        outer_models: List[str],
+        model_url_map: Dict[str, str],
+        api_key: str,
+        schema_override: Optional[dict] = None,
+        prompt_mode: Literal["general", "extract"] = "extract",
+    ):
+        start_time = time.time()
+
+        if mode == "multimodal":
+            # 모델 유효성
+            if model not in outer_models:
+                raise ValueError(
+                    f"외부 모델 리스트에 '{model}'이 포함되어 있지 않습니다. outer_models: {outer_models}"
+                )
+            if not (("gpt" in model) or ("gemini" in model)):
+                raise ValueError("멀티모달 E2E는 gpt 계열만 지원합니다.")
+
+            # 입력 파일 → 이미지 바이트 리스트 준비
+            images = await prepare_images_from_file(file_path, filename)
+
+            # 요청 로깅(텍스트가 없으므로 prompt 길이만)
+            context_length = len(prompt)
+            try:
+                log_user_request(
+                    request_info=request_info,
+                    endpoint=f"/{prompt_mode}/{mode}",
+                    input_filename=filename,
+                    model=model,
+                    prompt_filename=prompt_filename,
+                    context_length=context_length,
+                    api_key=api_key,
+                )
+            except Exception as e:
+                logger.info(f"Failed to log '/{prompt_mode}/{mode}' request: {e}")
+
+            # 멀티모달 LLM 호출
+            log_pipeline_status(request_id, "멀티모달 LLM 추론 시작")
+            if "gpt" in model:
+                generator = GptGenerator(model=model)
+                generated_text, llm_model, llm_url = await asyncio.to_thread(
+                    generator.generate_multimodal, images, prompt, schema_override
+                )
+            elif "gemini" in model:
+                generator = GeminiGenerator(model=model)
+                generated_text, llm_model, llm_url = await asyncio.to_thread(
+                    generator.generate_multimodal, images, prompt, schema_override
+                )
+
+            end_time = time.time()
+            log_pipeline_status(request_id, "LLM 추론 완료 및 후처리 시작")
+
+            # 멀티모달은 OCR 텍스트/좌표 없음
+            text = ""
+            coord = None
+            ocr_model = "bypass(multimodal)"
+
+            json_data = post_process(
+                filename,
+                text,
+                generated_text,
+                coord,
+                ocr_model,
+                llm_model,
+                llm_url,
+                mode,
+                start_time,
+                end_time,
+                prompt_mode,
+            )
+            log_pipeline_status(request_id, "후처리 완료 및 결과 반환")
+            return json_data
+
+        try:
+            # OCR API 요청
+            log_pipeline_status(request_id, "OCR API 호출 시작")
+            async with httpx.AsyncClient() as client:
+                # ✅ presigned URL을 OCR API로 전달
+                ocr_resp = await client.post(
+                    OCR_API_URL,
+                    json=[
+                        {
+                            "file_url": file_path,  # presigned URL
+                            "filename": filename,
+                        }
+                    ],
+                    timeout=None,
+                )
+                ocr_resp.raise_for_status()
+
+                # OCR API 응답에서 task_id 추출
+                task_ids_json = ocr_resp.json()
+                print(f"[DEBUG] OCR API 응답: {task_ids_json}")
+                task_ids = [
+                    item.get("task_id") for item in task_ids_json.get("results", [])
+                ]
+                if not task_ids:
+                    raise ValueError("❌ OCR API에서 유효한 task_id를 받지 못했습니다.")
+                task_id = task_ids[0]
+
+            # Redis에서 결과를 5초 간격으로 최대 10회 폴링
+            raw_result = None
+            for attempt in range(10):  # 최대 10회 시도
+                redis_key = f"ocr_result:{task_id}"
+                raw_result = redis_client.get(redis_key)
+                if raw_result:
+                    logger.info(
+                        f"✅ Redis에서 task_id '{task_id}'에 대한 OCR 결과를 찾았습니다."
+                    )
+                    break
+                await asyncio.sleep(5)
+
+            if not raw_result:  # 결과가 없으면 예외 발생
+                error_message = (
+                    "❌ OCR API에서 작업을 완료하지 못했습니다. 페이지 수를 줄여주세요."
+                )
+                logger.error(error_message)
+                raise ValueError(error_message)
+
+            result_data = json.loads(raw_result)
+            text = result_data["parsed"]
+            coord = result_data.get("fields")
+            ocr_model = result_data.get("ocr_model", "OCR API(pytesseract)")
+
+        except Exception as e:
+            logger.error(f"❌ OCR 처리 중 예외 발생: {e}")
+            raise
+
+        # ✅ 입력 길이 검사
+        log_pipeline_status(request_id, "모델 입력 텍스트 길이 검사 시작")
+        token_count = token_counter(prompt, text)
+        context_length = len(prompt + text)
+
+        # 🔽 로그 기록
+        try:
+            log_user_request(
+                request_info=request_info,
+                endpoint=f"/{prompt_mode}/{mode}",
+                input_filename=filename,
+                model=model,
+                prompt_filename=prompt_filename,
+                context_length=context_length,
+                api_key=api_key,
+                # token_count=token_count,
+            )
+        except Exception as e:
+            logger.info(f"Failed to log '/{prompt_mode}/{mode}' request: {e}")
+
+        # ✅ 120K 토큰 초과 검사
+        if token_count > 120000:
+            return post_process(
+                filename,
+                text,
+                f"⚠️ 입력 텍스트가 {token_count} 토큰으로 입력 길이를 초과했습니다. 모델 호출 생략합니다.",
+                coord,
+                ocr_model,
+                "N/A",
+                "N/A",
+                mode,
+                start_time,
+                time.time(),
+                prompt_mode,
+            )
+
+        # 2. 내부 모델 처리 (Ollama)
+        if mode in ("inner", "all", "structured"):
+            if model in inner_models:
+                log_pipeline_status(request_id, "내부 LLM 추론 시작")
+                api_url = model_url_map.get(model)
+                if not api_url:
+                    raise ValueError(
+                        f"❌ 모델 '{model}'이 로드된 Ollama 서버를 찾을 수 없습니다."
+                    )
+
+                generator = OllamaGenerator(model=model, api_url=api_url)
+
+                if mode == "structured":
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.structured_generate,
+                        text,
+                        prompt,
+                        custom_mode,
+                        schema_override,
+                    )
+                else:
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.generate, text, prompt, custom_mode, prompt_mode
+                    )
+            else:
+                raise ValueError(
+                    f"내부 모델 리스트에 '{model}'이 포함되어 있지 않습니다. inner_models: {inner_models}"
+                )
+
+        # 3. 외부 모델 처리
+        elif mode in ("outer", "all", "structured"):
+            if model in outer_models:
+                log_pipeline_status(request_id, "외부 LLM 추론 시작")
+                if "claude" in model:
+                    generator = ClaudeGenerator(model=model)
+                elif "gemini" in model:
+                    generator = GeminiGenerator(model=model)
+                elif "gpt" in model:
+                    generator = GptGenerator(model=model)
+                else:
+                    raise ValueError(
+                        "지원되지 않는 외부 모델입니다. ['gemini', 'claude', 'gpt'] 중 선택하세요."
+                    )
+
+                if mode == "structured":
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.structured_generate,
+                        text,
+                        prompt,
+                        custom_mode,
+                        schema_override,
+                    )
+                else:
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.generate, text, prompt, custom_mode, prompt_mode
+                    )
+            else:
+                raise ValueError(
+                    f"외부 모델 리스트에 '{model}'이 포함되어 있지 않습니다. outer_models: {outer_models}"
+                )
+        else:
+            raise ValueError(
+                f"❌ 지원되지 않는 모드입니다. 'inner', 'outer', 'all', 'structured' 중에서 선택하세요. (입력: {mode})"
+            )
+
+        log_pipeline_status(request_id, "LLM 추론 완료 및 후처리 시작")
+        end_time = time.time()
+
+        # 4. 후처리
+        json_data = post_process(
+            filename,
+            text,
+            generated_text,
+            coord,
+            ocr_model,
+            llm_model,
+            llm_url,
+            mode,
+            start_time,
+            end_time,
+            prompt_mode,
+        )
+
+        log_pipeline_status(request_id, "후처리 완료 및 결과 반환")
+        return json_data
--- a/workspace/services/prompt.py
+++ b/workspace/services/prompt.py
@@ -0,0 +1,36 @@
+SUMMARY_PROMPT_TEMPLATE = """
+/no_think
+너는 방금 끝난 회의의 내용을 정리해서 팀원들에게 공유해야 하는 프로젝트 매니저야.
+아래의 STT 회의록 초안은 오타나 문맥 오류가 있을 수 있어. 무리하게 해석하기보다는 문맥상 가장 자연스럽고 합리적인 내용으로 정리하고, 애매한 부분은 그 불확실성을 그대로 언급해도 괜찮아.
+아래 양식 외의 내용은 절대 포함하지 마. 각 항목의 제목과 번호는 반드시 그대로 유지해.
+출력은 json으로 해
+
+# 양식
+1. 회의 주요 키워드 (5개 내외로 작성)
+2. 논의된 주요 안건 목록(Action Items)
+3. 각 안건별 핵심 논의 내용 요약
+4. 최종적으로 합의된 결정 사항들
+5. 다음 회의에서 논의할 내용이나 미결 사항 (있다면 작성)
+
+# 내용
+{context}
+"""
+
+ONLY_GEMINI_PROMPT_TEMPLATE = """
+다음은 여러 명이 참여한 회의의 전사 기록이다. 각 발화자는 "SPEAKER_01", "SPEAKER_02" 와 같은 형식으로 구분되어 있다.
+같은 내용이지만 SPEAKER의 순서가 다를 수 도 있다.
+아래의 STT 회의록 초안은 오타나 문맥 오류가 있을 수 있어. 무리하게 해석하기보다는 문맥상 가장 자연스럽고 합리적인 내용으로 정리하고, 애매한 부분은 그 불확실성을 그대로 언급해도 괜찮아.
+각 화자의 발언을 고려하여 
+아래 양식 외의 내용은 절대 포함하지 마. 각 항목의 제목과 번호는 반드시 그대로 유지해.
+출력은 json으로 해
+
+# 양식
+1. 회의 주요 키워드 (5개 내외로 작성)
+2. 논의된 주요 안건 목록(Action Items)
+3. 각 안건별 핵심 논의 내용 요약
+4. 최종적으로 합의된 결정 사항들
+5. 다음 회의에서 논의할 내용이나 미결 사항 (있다면 작성)
+
+# 내용
+{context}
+"""
--- a/workspace/services/report.py
+++ b/workspace/services/report.py
@@ -0,0 +1,198 @@
+import asyncio
+import json
+import logging
+import os
+import re
+
+import httpx
+from anthropic import AsyncAnthropic
+from dotenv import load_dotenv
+from google.generativeai import GenerativeModel  # gemini
+from openai import AsyncOpenAI
+
+from services.prompt import ONLY_GEMINI_PROMPT_TEMPLATE, SUMMARY_PROMPT_TEMPLATE
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+tasks_store = {}
+ask_gpt_name = "gpt-4.1-mini"
+ask_ollama_qwen_name = "qwen3:custom"
+ask_gemini_name = "gemini-2.5-flash"
+ask_claude_name = "claude-3-7-sonnet-latest"
+
+
+def parse_json_safe(text: str):
+    """응답 텍스트가 JSON 포맷이 아닐 수도 있으니 안전하게 파싱 시도"""
+    try:
+        # 혹시 ```json ... ``` 형식 포함 시 제거
+        if text.startswith("```json"):
+            text = text.strip("```json").strip("```").strip()
+        return json.loads(text)
+    except Exception:
+        return {"raw_text": text}
+
+
+async def ask_gpt4(text: str):
+    try:
+        client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        response = await client.chat.completions.create(
+            model=ask_gpt_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                }
+            ],
+            temperature=0,
+        )
+        return ask_gpt_name, parse_json_safe(response.choices[0].message.content)
+    except Exception as e:
+        logger.error(f"ask_gpt4 error: {e}")
+        return ask_gpt_name, {"error": str(e)}
+
+
+def fix_incomplete_json(text: str) -> str:
+    open_braces = text.count("{")
+    close_braces = text.count("}")
+    if open_braces > close_braces:
+        text += "}" * (open_braces - close_braces)
+    return text
+
+
+async def ask_ollama_qwen(text: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            res = await client.post(
+                "http://172.16.10.176:11434/api/generate",
+                json={
+                    "model": "qwen3:custom",
+                    "prompt": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                },
+                timeout=300,
+            )
+            raw_text = res.text
+
+            # 1. <think> 태그 제거
+            raw_text = re.sub(r"</?think>", "", raw_text)
+
+            # 2. 각 줄별 JSON 파싱 시도 (스트림 JSON 형식)
+            json_objects = []
+            for line in raw_text.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    obj = json.loads(line)
+                    json_objects.append(obj)
+                except json.JSONDecodeError:
+                    # 무시하거나 로그 남기기
+                    pass
+
+            # 3. 여러 JSON 조각 중 'response' 필드 내용만 합치기 (필요시)
+            full_response = "".join(obj.get("response", "") for obj in json_objects)
+
+            # 4. 합쳐진 response에서 JSON 부분만 추출
+            json_match = re.search(r"\{.*\}", full_response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(0)
+                try:
+                    parsed_json = json.loads(json_str)
+                    return "qwen3:custom", parsed_json
+                except json.JSONDecodeError:
+                    return "qwen3:custom", {
+                        "error": "Invalid JSON in response",
+                        "raw_text": full_response,
+                    }
+            else:
+                return "qwen3:custom", {
+                    "error": "No JSON found in response",
+                    "raw_text": full_response,
+                }
+
+    except Exception as e:
+        return "qwen3:custom", {"error": str(e)}
+
+
+async def ask_gemini(text: str):
+    try:
+        model = GenerativeModel(model_name=ask_gemini_name)
+        response = model.generate_content(SUMMARY_PROMPT_TEMPLATE.format(context=text))
+        return ask_gemini_name, parse_json_safe(response.text)
+    except Exception as e:
+        logger.error(f"ask_gemini error: {e}")
+        return ask_gemini_name, {"error": str(e)}
+
+
+async def dialog_ask_gemini(text: str):
+    try:
+        model = GenerativeModel(model_name=ask_gemini_name)
+        response = model.generate_content(
+            ONLY_GEMINI_PROMPT_TEMPLATE.format(context=text)
+        )
+        return ask_gemini_name, parse_json_safe(response.text)
+    except Exception as e:
+        logger.error(f"ask_gemini error: {e}")
+        return ask_gemini_name, {"error": str(e)}
+
+
+async def ask_claude(text: str):
+    try:
+        client = AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        response = await client.messages.create(
+            model=ask_claude_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                }
+            ],
+            max_tokens=12800,
+            stream=False,
+        )
+        raw = response.content[0].text
+        return ask_claude_name, parse_json_safe(raw)
+    except Exception as e:
+        logger.error(f"ask_claude error: {e}")
+        return ask_claude_name, {"error": str(e)}
+
+
+async def total_summation(text: str) -> dict:
+    tasks = [ask_gpt4(text), ask_ollama_qwen(text), ask_gemini(text), ask_claude(text)]
+    results = await asyncio.gather(*tasks)
+    return dict(results)
+
+
+async def run_model_task(model_func, text, key, task_id):
+    try:
+        model_name, result = await model_func(text)
+        tasks_store[task_id][key] = {
+            "status": "completed",
+            "model_name": model_name,
+            "result": result,
+        }
+    except Exception as e:
+        tasks_store[task_id][key] = {
+            "status": "failed",
+            "error": str(e),
+        }
+
+
+async def run_all_models(text: str, task_id: str):
+    # 초기 상태 세팅
+    tasks_store[task_id] = {
+        "gpt4": {"status": "pending", "result": None},
+        "qwen3": {"status": "pending", "result": None},
+        "gemini": {"status": "pending", "result": None},
+        "claude": {"status": "pending", "result": None},
+        "finished": False,
+    }
+
+    await asyncio.gather(
+        run_model_task(ask_gpt4, text, "gpt4", task_id),
+        run_model_task(ask_ollama_qwen, text, "qwen3", task_id),
+        run_model_task(ask_gemini, text, "gemini", task_id),
+        run_model_task(ask_claude, text, "claude", task_id),
+    )
+
+    tasks_store[task_id]["finished"] = True
--- a/workspace/static/dummy_response.json
+++ b/workspace/static/dummy_response.json
@@ -0,0 +1,42 @@
+[
+  {
+    "filename": "250107_out_SYJV-250001_Advanced Mobilization.pdf",
+    "outer_model": {
+      "ocr_model": "OCR not used",
+      "llm_model": "gpt-4.1",
+      "api_url": "OpenAI Python SDK"
+    },
+    "time": {
+      "duration_sec": "8.24",
+      "started_at": 1747614863.8500028,
+      "ended_at": 1747614872.089025
+    },
+    "fields": [],
+    "parsed": "SEOYOUNG JOINT VENTURE \n \n \nRef. No. SYJV-250001 \nJan / 07 / 2025 \n \nMr. BENJAMIN A. BAUTISTA \nProject Director \nRoads Management Cluster 1 (Bilateral) – UPMO \nDepartment of Public Works and Highways \n2nd Street, Port Area, Manila \n \nThru \n: \nANTONIO ERWIN R. ARANAZ \n \n \nProject Manager \n \nSubject \n: \nAdvanced Mobilization of Experts \n \n \nConsulting Services for the Independent Design Check of the Panay-Guimaras-\nNegros Island Bridges Project [Loan Agreement No.: PHL-23] \n \nDear Mr. Bautista, \n \nWith reference to the above-mentioned consulting services, we respectfully inform the \nadvanced mobilization of Experts. We, SEOYOUNG JV, listed below the mobilized experts in \naccordance with the provisions of the time schedule. \n \nIt will be appreciated if we can receive your response the soonest possible time. Your \nfavorable consideration hereof is highly appreciated. \n \n \nVery Truly Yours,  \n \n \nJONG HAK, KIM \nTeam Leader \nIDC Services for PGN Bridges Project, SEOYOUNG JV \n \nEnclosures :  \n 1. Mobilization of International Key Experts in Home (Korea) \n 2. Mobilization of International Non-Key Experts in Home (Korea) \n 3. Mobilization of Local Key Experts in Field (Philippines) \n 4. Mobilization of Local Non-Key Experts in Field (Philippines) \n 5. CVs of Experts \n \n \nSEOYOUNG JOINT VENTURE \n \n \n1. Mobilization of International Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nKIM, JONG HAK \nTeam Leader \nJan 05, 2025 \n2 \nLEE, SANG HEE \nBridge Structural Engineer \nJan 05, 2025 \n3 \nJANG, SEI CHANG \nBridge Analysis Engineer \nJan 05, 2025 \n4 \nLEE, JIN WOO \nBridge Foundation Engineer \nJan 05, 2025 \n5 \nLEE, KEUN HO \nBridge Seismic Engineer \nJan 05, 2025 \n6 \nKIM, YOUNG SOO \nBridge Engineer (Pylon) \nJan 05, 2025 \n7 \nSONG, HYE GUM \nBridge Engineer (Cable) \nJan 05, 2025 \n8 \nLEE, JAE SUNG \nBridge Engineer (Wind) \nJan 05, 2025 \n9 \nSHIN, GYOUNG SEOB \nGeotechnical Engineer \nJan 05, 2025 \n \n2. Mobilization of International Non-Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nKOH, JONG UP \nHighway Engineer \nJan 05, 2025 \n2 \nPARK, JAE JIN \nTraffic Analysis Specialist \nJan 05, 2025 \n3 \nSONG, YONG CHUL \nOffshore Engineer \nJan 05, 2025 \n4 \nHA, MIN KYU \nDrainage Design Engineer \nJan 05, 2025 \n5 \nJANG, MYUNG HEE \nGeologist \nJan 05, 2025 \n6 \nKIM, IK HWAN \nQuantity Engineer \nJan 05, 2025 \n \n3. Mobilization of Local Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nMark Anthony V. Apelo \nBridge Engineer (Analysis) \nJan 05, 2025 \n2 \nMelodina F. Tuano \nBridge Engineer (Substructure) \nJan 05, 2025 \n3 \nAurora T. Fabro \nBridge Engineer (Superstructure1) \nJan 05, 2025 \n4 \nRogelio T Sumbe \nBridge Engineer (Superstructure2) \nJan 05, 2025 \n5 \nGuillermo Gregorio A. Mina \nBridge Engineer (Foundation) \nJan 05, 2025 \n \n4. Mobilization of Local Non-Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nElvira G. Guirindola \nHighway Engineer 1 \nJan 05, 2025 \n2 \nDaniel S. Baptista \nHighway Engineer 2 \nJan 05, 2025 \n3 \nMario M. Quimboy \nQuantity Engineer 1 \nJan 05, 2025 \n4 \nAnaliza C. Bauda \nQuantity Engineer 2 \nJan 05, 2025 \n \n",
+    "generated": "```json\n{\n  \"공문 번호\": \"SYJV-250001\",\n  \"공문 일자\": \"Jan / 07 / 2025\",\n  \"수신처\": \"Department of Public Works and Highways\",\n  \"수신자\": \"Project Director\",\n  \"수신자 약자\": \"PD\",\n  \"발신처\": \"SEOYOUNG JOINT VENTURE\",\n  \"발신자\": \"Team Leader\",\n  \"발신자 약자\": \"TL\",\n  \"공문 제목\": \"Advanced Mobilization of Experts\",\n  \"공문 제목 요약\": \"전문가 사전 동원 보고\",\n  \"공문 내용 요약\": \"프로젝트에 필요한 전문가들이 사전 동원되었음을 알림\",\n  \"공문간 연계\": \"없음\",\n  \"공문 종류\": \"기술/성과물\",\n  \"공문 유형\": \"보고\",\n  \"첨부문서 제목\": [\n    \"Mobilization of International Key Experts in Home (Korea)\",\n    \"Mobilization of International Non-Key Experts in Home (Korea)\",\n    \"Mobilization of Local Key Experts in Field (Philippines)\",\n    \"Mobilization of Local Non-Key Experts in Field (Philippines)\",\n    \"CVs of Experts\"\n  ],\n  \"첨부문서 수\": 5\n}\n```",
+    "processed": {
+      "공문번호": "SYJV-250001",
+      "공문일자": "Jan / 07 / 2025",
+      "수신처": "Department of Public Works and Highways",
+      "수신자": "Project Director",
+      "수신자약자": "PD",
+      "발신처": "SEOYOUNG JOINT VENTURE",
+      "발신자": "Team Leader",
+      "발신자약자": "TL",
+      "공문제목": "Advanced Mobilization of Experts",
+      "공문제목요약": "전문가 사전 동원 보고",
+      "공문내용요약": "프로젝트에 필요한 전문가들이 사전 동원되었음을 알림",
+      "공문간연계": "없음",
+      "공문종류": "기술/성과물",
+      "공문유형": "보고",
+      "첨부문서제목": [
+        "Mobilization of International Key Experts in Home (Korea)",
+        "Mobilization of International Non-Key Experts in Home (Korea)",
+        "Mobilization of Local Key Experts in Field (Philippines)",
+        "Mobilization of Local Non-Key Experts in Field (Philippines)",
+        "CVs of Experts"
+      ],
+      "첨부문서수": 5
+    }
+  }
+]
--- a/workspace/static/html/extract_guide.html
+++ b/workspace/static/html/extract_guide.html
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>📄 공문 추출·번역 API 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>📄 문서 추출·번역 API 가이드</h1>
+<p>
+🔹 아래는 <strong>/extract</strong> 계열 API에 프롬프트를 작성하고 사용하는 방법에 대한 안내입니다.
+</p>
+
+<h3>📌 사용 가능한 API 종류</h3>
+<P>
+ 🔹 <strong>/extract/inner</strong>: 내부 모델을 사용<br>
+ 🔹 <strong>/extract/outer</strong>: 외부 모델을 사용<br>
+ 🔹 <strong>/extract/all</strong>: 내부 + 외부 모델을 동시에 사용<br>
+ 🔹 <strong>/extract/structured</strong>: 고정된 JSON 필드로 정형 응답
+</p>
+
+<hr>
+
+<h2>✅ "/extract/inner", "/extract/outer", "/extract/all"</h2>
+<p>
+ 🔹 문서 추출 항목을 다양하게 변경하며 시도할 경우에 사용합니다.<br>
+ 🔹 해당 API의 업로드 파일은 2가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_extract_swagger.png" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>files</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>prompt_file</strong>: <span class="warn">(선택)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 프롬프트 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 프롬프트를 사용</li>
+        </ul>
+    </li>
+</ul>
+
+<p class="warn">Tip. 프롬프트 업로드⭕ 경우, <strong>"JSON으로 작성해주세요"</strong> 문구는 자동으로 삽입되므로 직접 <strong>작성할 필요가 없습니다.</strong><p>
+<p>→ 따라서, <strong>프롬프트 작성은 아래처럼 항목 설명만 작성</strong>하면 됩니다:</p>
+
+<code>  1. 공문번호: 문서 번호를 기입하세요.
+  2. 공문일자: 공문 발행일을 작성하세요.
+  3. 수신처: 수신 기관이나 부서명을 작성하세요.
+  4. 수신자: 수신자의 이름 또는 직책을 기입하세요.
+  ...</code>
+
+<hr>
+
+<h2>✅ "extract/structured"</h2>
+<p>
+ 🔹 문서 추출 항목을 고정하여 정해진 필드 형식으로 응답 받기 위해 사용합니다.<br>
+ 🔹 해당 API의 업로드 파일은 3가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_extract_structured_swagger.png" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>files</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>schema_file</strong>: <span class="warn">(선택)</span> 응답 구조를 정의한 스키마 파일(.json)을 업로드합니다
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 필드 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 필드를 사용</li>
+        </ul>
+    </li>    
+    <li><strong>prompt_file</strong>: <span class="warn">(선택)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 프롬프트 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 프롬프트를 사용</li>
+        </ul>
+    </li>
+</ul>
+
+<p class="warn">※ schemna json 작성은 "Guide Book" 첫 번째인 "schema_file_guide"를 참고해주세요.</p>
+
+</body>
+</html>
--- a/workspace/static/html/extraction_structured_guide.html
+++ b/workspace/static/html/extraction_structured_guide.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>📄 /extract/structured 프롬프트 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+    <h1>📄 /extract/structured 프롬프트 가이드</h1>
+    <p>아래는 <strong>/extract</strong> 계열 API에 프롬프트를 작성하고 사용하는 방법에 대한 안내입니다.</p>
+
+    <hr>
+
+    <h2>✅ 항목은 고정하되, 항목별 '지시문' 을 수정하고 싶은 경우</h2>
+    <h3>🖥️ 사용 API: <strong>/extract/structured</strong></h3>
+    <p>🔹 항목은 16개로 <strong>고정</strong>되어 있으며 <strong>추가/삭제/변경 불가</strong>합니다.</p>
+    <p>🔹 <strong>각 항목에 대한 '지시문' 설명만 작성</strong>할 수 있습니다.</p>
+
+    <code>
+1. 공문번호: 공문서 상단에 표기된 문서 번호를 추출합니다.
+2. 공문일자: 공문이 발행된 날짜를 추출합니다.
+3. 수신처: 문서를 수신하는 기관 또는 부서를 식별합니다.
+...
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
+    </code>
+</body>
+</html>
--- a/workspace/static/html/general_guide.html
+++ b/workspace/static/html/general_guide.html
@@ -0,0 +1,176 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>🧾 일반 추론 API 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>🧾 일반 추론 API 가이드</h1>
+<p>
+🔹 <strong>/general</strong> 계열 API를 활용하여 문서 기반 질문-응답 요약을 수행하는 방법을 안내합니다.<br>
+🔹 공문 외에 다양한 도메인에 적용 가능하며, 사용자는 <strong>URL(Markdwon)</strong> 또는 <strong>JSON</strong> 구조로 답변을 받습니다.
+</p>
+
+<h3>📌 사용 가능한 API 종류</h3>
+<p>
+🔹 <strong>/general/inner</strong>: 내부 모델을 사용하여 일반 요약 수행<br>
+🔹 <strong>/general/outer</strong>: 외부 모델(GPT, Claude, Gemini 등)을 사용하여 요약 수행
+</p>
+
+<hr>
+
+<h2>✅ 프롬프트 작성 예시</h2>
+<p>
+🔹 <strong>프롬프트 파일은 반드시 업로드</strong>해야 합니다.<br>
+🔹 [예시] 질문은 다음과 같이 구성할 수 있습니다:
+</p>
+
+<code>문서 분석
+
+[Q1] 이 문서의 주요 내용을 요약해주세요.
+
+[Q2] 발신자와 수신자 정보를 정리해주세요.
+
+[Q3] 문서에서 요청하는 주요 조치를 요약해주세요.
+
+[Q4] 날짜, 장소, 인명 등 주요 엔티티를 추출해주세요.
+
+[Q5] 이 문서의 목적이나 배경을 기술해주세요.
+</code>
+
+<hr>
+<h2>✅ Schema JSON 작성 예시</h2>
+<p>
+🔹 <strong>schema_file은 선택사항</strong>이며, JSON 형식으로 답변 받기 위해선 작성이 필요합니다.<br>
+🔹 추출이 필요한 항목과 항목의 답변을 정의할 때 사용합니다.<br>
+🔹 특수 항목은 <strong>enum</strong> 또는 <strong>type</strong> 값을 값정할 수 있습니다.
+</p>
+
+<pre>
+{
+    "title": "DocumentSummary",
+    "type": "object",
+    "properties": {
+        "공문번호": { "type": "string" },
+        "공문일자": { "type": "string" },
+        "수신체": { "type": "string" },
+        "수신자": { "type": "string" },
+        "수신자_약자": { "type": "string" },
+        "발신체": { "type": "string" },
+        "발신자": { "type": "string" },
+        "발신자_약자": { "type": "string" },
+        "공문제목": { "type": "string" },
+        "공문제목요약": { "type": "string" },
+        "공문내용요약": { "type": "string" },
+        "공문간연계": { "type": "string" },
+        "공문종류": {
+        "type": "string",
+        "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+        },
+        "공문유형": {
+        "type": "string",
+        "enum": ["보고", "요청", "지시", "회신", "계약"]
+        },
+        "첨부문서제목": { "type": "string" },
+        "첨부문서수": { "type": "integer" }
+    },
+    "required": [
+        "공문번호", "공문일자", "수신체", "수신자", "수신자_약자",
+        "발신체", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+        "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+    ]
+}</pre>
+<h3>📌 주요 키·속성 설명</h3>
+<p>🔹 위 JSON 예시는 <strong>Schema 구조</strong>를 정의하는 방식으로 작성되어 있으며, 각 키의 의미는 다음과 같습니다:</p>
+<ul>
+    <li><strong>title</strong>: 스키마의 이름 또는 제목을 정의합니다. 주로 문서나 데이터 객체의 이름을 지정하는 데 사용됩니다.<br>
+        [예시]: <strong>"title": "DocumentSummary"</strong> → 이 JSON은 DocumentSummary라는 이름의 구조입니다.</li>
+    <br>  
+    <li><strong>type</strong>: 이 JSON 구조 자체가 어떤 형태의 데이터인지 정의합니다.<br>
+        [예시]: <strong>"type": "object"</strong> → 이 스키마는 key-value 쌍으로 이루어진 객체(object)입니다.</li>
+    <br>
+    <li><strong>properties</strong>: 객체 안에 포함된 각 필드(속성)를 정의하는 부분입니다.<br>
+        이 안에는 각각의 필드 이름(key)과 해당 값의 <strong>type</strong> 및 <strong>enum</strong> 등 상세 정보가 포함됩니다.<br>
+        [예시]: <strong>"공문번호": { "type": "string" }</strong> → 공문번호는 문자열 타입이어야 함을 의미합니다.</li>
+    <br>
+    <ul>
+        <li><strong>type</strong>: 해당 값의 데이터 유형을 지정합니다. 주요 유형은 다음과 같습니다:
+            <ul>
+                <li><strong>string</strong>: 문자열 (예: "서울특별시")</li>
+                <li><strong>integer</strong>: 정수 (예: 3, 25)</li>
+                <li><strong>boolean</strong>: 참/거짓 값 (예: true, false)</li>
+            </ul>
+        </li>
+        <li><strong>enum</strong>: 해당 필드가 가질 수 있는 값을 목록으로 제한합니다. 지정된 값 외에는 허용되지 않습니다.
+            <br>[예시]: <strong>"공문종류": { "type": "string", "enum": ["행정/일반", "기술/성과물", "회의/기타"] }</strong>
+        </li>
+    </ul>
+    <br>
+    
+    <li><strong>required</strong>: 필수로 입력되어야 하는 항목들의 리스트입니다.<br>
+        이 배열에 나열된 필드가 누락될 경우, JSON이 유효하지 않은 것으로 간주됩니다.<br>
+        [예시]: <strong>"required": ["공문번호", "공문일자", ...]</strong> → 이 필드들은 반드시 포함되어야 합니다.</li>
+</ul>
+<p class="warn">Tip. schemna json을 사용하는 경우, <strong>프롬프트의 각 항목에 대한 지시문(description)을 각분으로 설정</strong>해주면 더 좋습니다.</p>
+<code>  1. 공문번호: 문서 번호를 기입하세요. (예시: Ref. No. SYJV-250031)
+  2. 공문일자: 공문 발행일을 작성하세요. (예시: Mar / 28 / 2025)
+  3. 수신처: 수신 기관이나 부서명을 작성하세요. (예시: Department of Public Works and Highways)
+  ...
+  16. 첨부문서수: 첨부문서제목을 바탕으로 문서의 개수를 작성하세요.
+</code>
+</body>
+</html>
+
+<hr>
+
+<h2>✅ 사용 절차 안내</h2>
+<p>
+🔹 해당 API에 업로드 가능한 파일은 3가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_general.png" alt="FastAPI general 입력 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>input_file</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>prompt_file</strong>: <span class="warn">(필수)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.</li>
+    <li><strong>schema_file</strong>: <span class="warn">(선택)</span> 응답 구조를 정의한 스키마 파일(.json)을 업로드합니다.</li>
+</ul>
+
+<hr>
+
+<h2>1️⃣ Markdown 형식 응답 예시(schema file 미업로드)</h2>
+<p>
+🔹 모델은 질문에 대해 <strong>줄글 형식의 응답을 생성</strong>하며, 응답 JSON에는 다음 필드가 포함됩니다:
+</p>
+<img src="static/image/FastAPI_general_response.png" alt="FastAPI general 결과 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 주요 답변 키 설명</h3>
+<ul>
+    <li><strong>generated</strong>: 마크다운 형식의 응답 텍스트</li>
+    <li><strong>summary_html</strong>: 마크다운을 HTML로 변환하여 저장한 URL</li>
+    🔗<a href="http://172.16.10.176:8888/view/generated_html/Contract_for_Main_Office.html" target="_blank">
+    http://172.16.10.176:8888/view/generated_html/Contract_for_Main_Office.html
+    </a>
+</ul>
+<img src="static/image/FastAPI_general_result.png" alt="FastAPI general 결과 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+
+<hr>
+
+<h2>2️⃣ 구조화 JSON 형식 응답 예시(schema file 업로드)</h2>
+<p>
+🔹 /general API에 <strong>schema_file</strong>을 함께 업로드한 경우, 모델은 지정된 JSON Schema에 따라 항목별 응답을 생성합니다.
+</p>
+<img src="static/image/FastAPI_general_JSONresult.png" alt="FastAPI structured 응답 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 주요 답변 키 설명</h3>
+<ul>
+    <li><strong>generated</strong>: JSON 구조의 응답 데이터</li>
+    <li><strong>processed</strong>: 구조화된 응답이므로 별도의 후처리는 생략되며, 안내 메시지만 포함됩니다.</li>
+</ul>
+<p class="warn">※ Claude 모델은 <strong>영문 필드명만 허용</strong>합니다.</p>
+
+</body>
+</html>
--- a/workspace/static/html/schema_file_guide.html
+++ b/workspace/static/html/schema_file_guide.html
@@ -0,0 +1,98 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>🧾 스키마 파일 작성 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>🧾 JSON Schema file 작성 가이드</h2>
+<p>
+🔹 JSON Schema는 AI 모델이 생성해야 할 <strong>응답의 구조를 정의</strong>할 때 사용됩니다.<br>
+🔹 schema_file을 설정하면 문서에서 추출해야 할 항목과 각 항목의 데이터 형식을 명확하게 지정할 수 있습니다.
+</p>
+<h3>📌 사용 되는 API 종류</h3>
+<p>
+🔹 <strong>/extract/structed</strong>
+</p>
+
+<hr>
+<h2>✅ Schema JSON 작성 예시</h2>
+<p>🔹 [예시] 공문 요약을 위한 JSON Schema 작성 예시입니다:</p>
+
+<pre>{
+  "title": "DocumentSummary",
+  "type": "object",
+  "properties": {
+    "공문번호": { "type": "string" },
+    "공문일자": { "type": "string" },
+    "수신체": { "type": "string" },
+    "수신자": { "type": "string" },
+    "수신자_약자": { "type": "string" },
+    "발신체": { "type": "string" },
+    "발신자": { "type": "string" },
+    "발신자_약자": { "type": "string" },
+    "공문제목": { "type": "string" },
+    "공문제목요약": { "type": "string" },
+    "공문내용요약": { "type": "string" },
+    "공문간연계": { "type": "string" },
+    "공문종류": {
+      "type": "string",
+      "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+    },
+    "공문유형": {
+      "type": "string",
+      "enum": ["보고", "요청", "지시", "회신", "계약"]
+    },
+    "첨부문서제목": { "type": "string" },
+    "첨부문서수": { "type": "integer" }
+  },
+  "required": [
+    "공문번호", "공문일자", "수신체", "수신자", "수신자_약자",
+    "발신체", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+    "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+  ]
+}</pre>
+
+<hr>
+
+<h3>📌 주요 키 설명</h3>
+<p>🔹 위 JSON 예시는 <strong>Schema 구조</strong>를 정의하는 방식으로 작성되어 있으며, 각 키의 의미는 다음과 같습니다:</p>
+<ul>
+  <li><strong>title</strong>: JSON 스키마의 이름 또는 제목을 정의합니다. 일반적으로 문서나 데이터 객체의 이름으로 사용됩니다.</li>
+  <li><strong>type</strong>: 이 JSON 전체 구조가 어떤 데이터 형태인지 지정합니다. 예: object, array, string 등.</li>
+  <li><strong>properties</strong>: 객체 내부에 포함된 각 항목(필드)을 정의하는 공간입니다. 각 항목에 대해 <strong>type</strong>이나 <strong>enum</strong>을 지정할 수 있습니다.</li>
+  <li><strong>required</strong>: 필수로 입력되어야 할 항목을 배열 형태로 나열합니다. 이 <strong>항목들이 누락되면 JSON 유효성 검사에서 실패</strong>하게 됩니다.</li>
+</ul>
+
+<hr>
+
+<h3>📌 필드 속성 설명</h3>
+<p>🔹 각 항목에 정의되는 <strong>type</strong>과 <strong>enum</strong>의 의미는 다음과 같습니다:</p>
+<ul>
+  <li><strong>type</strong>: 해당 필드의 데이터 유형을 명시합니다. 주요 유형은 다음과 같습니다:
+    <ul>
+      <li><strong>string</strong>: 문자열 값 (예: "서울특별시")</li>
+      <li><strong>integer</strong>: 정수 값 (예: 3, 25)</li>
+      <li><strong>boolean</strong>: 참/거짓 논리값 (예: true, false)</li>
+    </ul>
+  </li>
+  <li><strong>enum</strong>: 해당 항목이 가질 수 있는 값을 제한할 때 사용합니다. 배열로 허용 가능한 값을 정의하며, 그 외 값은 허용되지 않습니다.<br>
+    예: <strong>"공문종류"는 "행정/일반", "기술/성과물", "회의/기타" 중 하나여야 함</strong>
+  </li>
+</ul>
+
+<p class="warn">Tip. 프롬프트 작성 시 각 항목에 대한 <strong>지시문(description)</strong>을 따로 설정하면 AI 응답의 품질이 더욱 향상됩니다.</p>
+<code>  1. 공문번호: 문서 번호를 기입하세요. (예시: Ref. No. SYJV-250031)
+  2. 공문일자: 공문 발행일을 작성하세요. (예시: Mar / 28 / 2025)
+  3. 수신처: 수신 기관이나 부서명을 작성하세요. (예시: Department of Public Works and Highways)
+  ...
+  16. 첨부문서수: 첨부문서제목을 바탕으로 문서의 개수를 작성하세요.
+</code>
+</body>
+</html>
--- a/workspace/static/image/FastAPI_extract_structured_swagger.png
+++ b/workspace/static/image/FastAPI_extract_structured_swagger.png
--- a/workspace/static/image/FastAPI_extract_swagger.png
+++ b/workspace/static/image/FastAPI_extract_swagger.png
--- a/workspace/static/image/FastAPI_general.png
+++ b/workspace/static/image/FastAPI_general.png
--- a/workspace/static/image/FastAPI_general_JSONresult.png
+++ b/workspace/static/image/FastAPI_general_JSONresult.png
--- a/workspace/static/image/FastAPI_general_response.png
+++ b/workspace/static/image/FastAPI_general_response.png
--- a/workspace/static/image/FastAPI_general_result.png
+++ b/workspace/static/image/FastAPI_general_result.png
--- a/workspace/static/image/logo.png
+++ b/workspace/static/image/logo.png
--- a/workspace/static/prompt/d6c_test_prompt_eng.txt
+++ b/workspace/static/prompt/d6c_test_prompt_eng.txt
@@ -0,0 +1,30 @@
+Instructions:
+- Accuracy is critically important.
+- The reference number must be extracted only from the line that starts with "Ref. No."
+- For items marked as “Korean”, the answer must be written in Korean.
+- Output only the following 13 fields, no more, no less.
+- If the information is unknown, write "확인필요". If it is clearly missing, write "없음".
+
+1. 공문 번호: Extract only the "Ref. No." line in the format "ENG-NUM"
+2. 공문 일자: YYYY.MM.DD
+3. 수신자: Extract the job title of the recipient only
+4. 수신자 약자: Abbreviation of recipient's title
+5. 발신자: Extract the job title of the sender only from the signature block at the end of the document
+- Do not extract contact persons mentioned in the body
+- Do not include the organization names
+6.. 발신자 약자: Abbreviation of sender's title
+7. 공문 제목: Extract only the first line or the first bold phrase directly following the “Subject:” label, Do NOT include secondary lines or explanatory phrases, such as project names or descriptions.
+8. 공문 제목 요약: Write a 10–20 character summary in Korean
+9. 공문 내용 요약: Provide a brief summary in Korean   
+10. 공문간 연계: Write "있음" only if the content of the document explicitly mentions, references, or responds to another document, Do not infer linkage based on date similarity, numbering (e.g., “PH-00”), or reference format alone.
+If no explicit mention of another document is found in the content, write "없음"
+11. 공문 종류: Choose one of the following
+-행정/일반=for administrative topics such as personnel, dispatch, budget, contracts
+-기술/성과물=for technical discussions, schedules, deliverables, technical meetings
+12. 공문 유형: Choose one from
+-보고=One-way communication of facts or plans
+-요청=Requests or inquiries to the recipient
+-지시=Orders or commands from authority
+-회신=Replies or feedback to prior documents
+-계약=Official correspondence related to contract terms
+13. 첨부문서 수: Provide the number only
--- a/workspace/static/prompt/default_prompt_v0.1.txt
+++ b/workspace/static/prompt/default_prompt_v0.1.txt
@@ -0,0 +1,30 @@
+다음은 스캔된 문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+정확성이 매우 중요하므로 반드시 공문에 포함된 텍스트만 사용하여 작성해주세요.  
+
+다음 주어진 항목을 반드시 JSON 형식(```json)으로 작성해주세요:
+
+1. 공문 번호: 공문 번호로 Ref. No.를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: Ref. No. SYJV-250031)
+2. 공문 일자: 공문에 적혀 있는 날짜입니다. 번역하지 않고 그대로 표기해주세요. (예시: Mar / 28 / 2025)
+3. 수신처: 공문을 받는 사람이 속한 조직명 (예시: Department of Public Works and Highways)
+4. 수신자: 공문을 받은 사람의 직책 (예시: Project Director)
+5. 수신자(약자): 수신자 직책 약자 (예시: PD)
+6. 발신처: 공문을 보낸 사람이 속한 조직명 (예시: SEOYOUNG JOINT VENTURE)
+7. 발신자: 공문을 보낸 사람의 직책 (예시: Team Leader)
+8. 발신자(약자): 발신자 직책 약자 (예시: TL)
+9. 공문 제목: 공문의 제목으로 SUBJECT 의미합니다. 적당한 길이로 끊어야 하는데 윗 문장이 프로젝트 이름으로 판단되는 경우, 9.1 프로젝트 항목을 신설해 리턴 (예시: Submission of Comment Matrix for Design Deliverable)
+10. 공문 제목 요약: 공문 제목을 10~20자 사이로 요약해주세요. 반드시 한글로 작성합니다.
+11. 공문 내용 요약: 공문 내용을 요약해주세요. 반드시 한글로 작성합니다.
+12. 공문간 연계: 연계된 공문이 있으면 공문번호를 알려주세요. 공문번호만 필요합니다. 없는 경우는 없음으로 표기해주세요.
+13. 공문 종류: 공문 종류는 공문의 내용을 분석해서 다음 3가지 중 반드시 하나를 선택합니다.
+  * 행정/일반 – 인사, 파견, 조직, 비용(예산), 계약 등 경영/행정 관련 
+  * 기술/성과물 – 일정 협의, 작업계획, 성과물 제출, 기술적 업무 회의, 성과물 전달 등 
+  * 회의/기타 – 회의록 등 위에 내용 이외의 것
+14. 공문 유형: 공문 유형은 공문의 내용을 분석해서 다음의 5가지 중 반드시 하나를 선택합니다.
+  * 보고 : 완료된 사실이나 계획을 일방적으로 알리는 공문
+  * 요청 : 상대방의 행동 또는 답변을 유도하는 공문
+  * 지시 : 권한 있는 주체가 수행을 명령하는 공문
+  * 회신 : 기존 공무에 대해 응답하거나 의견을 제공하는 공문
+  * 계약 : 계약조건 변경과 관련된 공식 공문
+15. 첨부문서제목: 공문의 첨부 문서는 Enclosures: 를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: 1. Comment Matrix_4.4.2 Draft Detailed Engineer Design Report (Section A) )
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
--- a/workspace/static/prompt/i18n_test_prompt_kor.txt
+++ b/workspace/static/prompt/i18n_test_prompt_kor.txt
@@ -0,0 +1,24 @@
+주의:
+- **정확성이 매우 중요합니다.**
+- 한글로 작성하라고 명시된 항목은 반드시 한글로 작성해야 합니다.
+- 반드시 아래 **1~10번 항목만** 출력하며, 절대 누락하지 마세요.
+- 항목을 알 수 없으면 "확인필요", 항목이 문서에 존재하지 않으면 "없음"이라고 작성하세요.
+
+1. 공문 번호
+2. 공문 일자: YYYY.MM.DD 
+3. 수신자
+4. 발신자: 담당
+5. 공문 제목
+6. 공문 내용 요약: **한글로** 간단하게 요약
+7. 공문간 연계: 다른 공문과의 연관이 명시되어 있으면 "있음", 없으면 "없음"으로 작성
+8. 공문 종류: 아래 중 하나를 선택
+   - 행정/일반: 인사, 파견, 조직, 예산, 계약 등 행정 관련 내용
+   - 기술/성과물: 일정, 작업계획, 성과물 제출, 기술 업무 등
+9. 공문 유형: 아래 중 하나를 선택
+   - 보고: 완료된 사실이나 계획을 알리는 경우
+   - 요청: 상대방의 행동이나 응답을 요구하는 경우
+   - 지시: 권한 있는 주체가 수행을 명령하는 경우
+   - 회신: 기존 공문에 대한 응답이나 의견인 경우
+   - 계약: 계약 조건 변경과 관련된 공문
+10. 첨부문서 수: 숫자만 작성
+
--- a/workspace/static/prompt/structured_prompt_v0.1.txt
+++ b/workspace/static/prompt/structured_prompt_v0.1.txt
@@ -0,0 +1,28 @@
+다음은 스캔된 문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+정확성이 매우 중요하므로 반드시 공문에 포함된 텍스트만 사용하여 작성해주세요.  
+
+1. 공문 번호: 공문 번호로 Ref. No.를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: Ref. No. SYJV-250031)
+2. 공문 일자: 공문에 적혀 있는 날짜입니다. 번역하지 않고 그대로 표기해주세요. (예시: Mar / 28 / 2025)
+3. 수신처: 공문을 받는 사람이 속한 조직명 (예시: Department of Public Works and Highways)
+4. 수신자: 공문을 받은 사람의 직책 (예시: Project Director)
+5. 수신자(약자): 수신자 직책 약자 (예시: PD)
+6. 발신처: 공문을 보낸 사람이 속한 조직명 (예시: SEOYOUNG JOINT VENTURE)
+7. 발신자: 공문을 보낸 사람의 직책 (예시: Team Leader)
+8. 발신자(약자): 발신자 직책 약자 (예시: TL)
+9. 공문 제목: 공문의 제목으로 SUBJECT 의미합니다. 적당한 길이로 끊어야 하는데 윗 문장이 프로젝트 이름으로 판단되는 경우, 9.1 프로젝트 항목을 신설해 리턴 (예시: Submission of Comment Matrix for Design Deliverable)
+10. 공문 제목 요약: 공문 제목을 10~20자 사이로 요약해주세요. 반드시 한글로 작성합니다.
+11. 공문 내용 요약: 공문 내용을 요약해주세요. 반드시 한글로 작성합니다.
+12. 공문간 연계: 연계된 공문이 있으면 공문번호를 알려주세요. 공문번호만 필요합니다. 없는 경우는 없음으로 표기해주세요.
+13. 공문 종류: 공문 종류는 공문의 내용을 분석해서 다음 3가지 중 반드시 하나를 선택합니다.
+  * 행정/일반 – 인사, 파견, 조직, 비용(예산), 계약 등 경영/행정 관련 
+  * 기술/성과물 – 일정 협의, 작업계획, 성과물 제출, 기술적 업무 회의, 성과물 전달 등 
+  * 회의/기타 – 회의록 등 위에 내용 이외의 것
+14. 공문 유형: 공문 유형은 공문의 내용을 분석해서 다음의 5가지 중 반드시 하나를 선택합니다.
+  * 보고 : 완료된 사실이나 계획을 일방적으로 알리는 공문
+  * 요청 : 상대방의 행동 또는 답변을 유도하는 공문
+  * 지시 : 권한 있는 주체가 수행을 명령하는 공문
+  * 회신 : 기존 공무에 대해 응답하거나 의견을 제공하는 공문
+  * 계약 : 계약조건 변경과 관련된 공식 공문
+15. 첨부문서제목: 공문의 첨부 문서는 Enclosures: 를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: 1. Comment Matrix_4.4.2 Draft Detailed Engineer Design Report (Section A) )
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
--- a/workspace/static/structured_schema.json
+++ b/workspace/static/structured_schema.json
@@ -0,0 +1,34 @@
+{
+    "title": "DocumentSummary",
+    "type": "object",
+    "properties": {
+      "공문번호": { "type": "string" },
+      "공문일자": { "type": "string" },
+      "수신처": { "type": "string" },
+      "수신자": { "type": "string" },
+      "수신자_약자": { "type": "string" },
+      "발신처": { "type": "string" },
+      "발신자": { "type": "string" },
+      "발신자_약자": { "type": "string" },
+      "공문제목": { "type": "string" },
+      "공문제목요약": { "type": "string" },
+      "공문내용요약": { "type": "string" },
+      "공문간연계": { "type": "string" },
+      "공문종류": {
+        "type": "string",
+        "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+      },
+      "공문유형": {
+        "type": "string",
+        "enum": ["보고", "요청", "지시", "회신", "계약"]
+      },
+      "첨부문서제목": { "type": "string" },
+      "첨부문서수": { "type": "integer" }
+    },
+    "required": [
+      "공문번호", "공문일자", "수신처", "수신자", "수신자_약자",
+      "발신처", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+      "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+    ]
+  }
+  
--- a/workspace/utils/init.py
+++ b/workspace/utils/init.py
--- a/workspace/utils/checking_files.py
+++ b/workspace/utils/checking_files.py
@@ -0,0 +1,57 @@
+import os
+from io import BytesIO
+
+import tiktoken
+from config.setting import ALLOWED_EXTENSIONS
+from fastapi import HTTPException, UploadFile
+
+
+def validate_all_files(*upload_files: UploadFile):
+    for upload_file in upload_files:
+        if not upload_file:
+            continue
+
+        _, ext = os.path.splitext(upload_file.filename.lower())
+
+        if ext not in ALLOWED_EXTENSIONS:
+            raise HTTPException(
+                status_code=400,
+                detail=(
+                    f"파일 '{upload_file.filename}'은(는) 지원하지 않는 확장자입니다. "
+                    f"허용된 확장자는 {', '.join(ALLOWED_EXTENSIONS)} 입니다."
+                ),
+            )
+
+
+def token_counter(prompt: str, text: str) -> int:
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")  # OpenAI 기반 tokenizer
+        token_count = len(enc.encode(prompt + text))
+    except Exception:
+        token_count = len(prompt + text) // 4  # fallback: 대략적 추정
+    return token_count
+
+
+# ✅ UploadFile을 대신할 수 있는 간단한 래퍼 클래스
+class SimpleUploadFile:
+    def __init__(
+        self,
+        filename: str,
+        content: bytes,
+        content_type: str = "application/octet-stream",
+    ):
+        self.filename = filename
+        self.file = BytesIO(content)
+        self.content_type = content_type
+
+
+# ✅ UploadFile 객체 복사 → SimpleUploadFile로 변환
+def clone_upload_file(upload_file: UploadFile) -> SimpleUploadFile:
+    file_bytes = upload_file.file.read()
+    upload_file.file.seek(0)
+
+    return SimpleUploadFile(
+        filename=upload_file.filename,
+        content=file_bytes,
+        content_type=upload_file.content_type,
+    )
--- a/workspace/utils/checking_keys.py
+++ b/workspace/utils/checking_keys.py
@@ -0,0 +1,78 @@
+import logging
+import os
+
+from dotenv import load_dotenv
+from fastapi import HTTPException, Security
+from fastapi.security import APIKeyHeader
+from services.api_key_service import validate_api_key
+from snowflake import SnowflakeGenerator
+
+logger = logging.getLogger(__name__)
+load_dotenv()
+
+# .env 파일에서 관리자 API 키를 로드
+ADMIN_API_KEY = os.getenv("ADMIN_API_KEY")
+
+# 헤더 설정
+api_key_header = APIKeyHeader(
+    name="X-API-KEY", auto_error=False, description="Client-specific API Key"
+)
+admin_api_key_header = APIKeyHeader(
+    name="X-Admin-KEY", auto_error=False, description="Key for administrative tasks"
+)
+
+
+def get_api_key(api_key: str = Security(api_key_header)):
+    """요청 헤더의 X-API-KEY가 유효한지 Redis를 통해 검증합니다."""
+    if not validate_api_key(api_key):
+        logger.warning(f"유효하지 않은 API 키로 접근 시도: {api_key}")
+        raise HTTPException(status_code=401, detail="Invalid or missing API Key")
+    return api_key
+
+
+def get_admin_key(admin_key: str = Security(admin_api_key_header)):
+    """관리자용 API 키를 검증합니다."""
+    if not ADMIN_API_KEY:
+        logger.error(
+            "ADMIN_API_KEY가 서버에 설정되지 않았습니다. 관리자 API를 사용할 수 없습니다."
+        )
+        raise HTTPException(status_code=500, detail="Server configuration error")
+
+    if not admin_key or admin_key != ADMIN_API_KEY:
+        logger.warning("유효하지 않은 관리자 키로 관리 API 접근 시도.")
+        raise HTTPException(status_code=403, detail="Not authorized for this operation")
+    return admin_key
+
+
+class APIKeyLoader:
+    @staticmethod
+    def load_gemini_key() -> str:
+        key = os.getenv("GEMINI_API_KEY")
+        if not key:
+            logger.error("GEMINI_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("GEMINI_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+    @staticmethod
+    def load_claude_key() -> str:
+        key = os.getenv("ANTHROPIC_API_KEY")
+        if not key:
+            logger.error("ANTHROPIC_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("ANTHROPIC_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+    @staticmethod
+    def load_gpt_key() -> str:
+        key = os.getenv("OPENAI_API_KEY")
+        if not key:
+            logger.error("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+
+def create_key(node: int = 1) -> str:
+    """
+    Snowflake 알고리즘 기반 고유 키 생성기 (request_id용)
+    """
+    generator = SnowflakeGenerator(node)
+    return str(next(generator))
--- a/workspace/utils/image_converter.py
+++ b/workspace/utils/image_converter.py
@@ -0,0 +1,35 @@
+import io
+from pathlib import Path
+from typing import List
+
+import httpx
+
+
+async def prepare_images_from_file(
+    file_url: str, filename: str, max_pages: int = 5, dpi: int = 180
+) -> List[bytes]:
+    """presigned URL → bytes. PDF이면 앞쪽 max_pages 페이지만 이미지로 변환하여 bytes 리스트 반환"""
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(file_url, timeout=None)
+        resp.raise_for_status()
+        file_bytes = resp.content
+
+    ext = Path(filename).suffix.lower()
+    if ext in [".pdf", ".tif", ".tiff"]:
+        try:
+            from pdf2image import convert_from_bytes
+        except ImportError as e:
+            raise RuntimeError(
+                "pdf2image가 필요합니다. `pip install pdf2image poppler-utils`"
+            ) from e
+
+        pil_images = convert_from_bytes(file_bytes, dpi=dpi)
+        images = []
+        for i, im in enumerate(pil_images[:max_pages]):
+            buf = io.BytesIO()
+            im.save(buf, format="PNG")
+            images.append(buf.getvalue())
+        return images
+    else:
+        # 단일 이미지
+        return [file_bytes]
--- a/workspace/utils/logging_utils.py
+++ b/workspace/utils/logging_utils.py
@@ -0,0 +1,182 @@
+import csv
+import json
+import logging
+import time
+from datetime import datetime
+from pathlib import Path
+
+import redis
+from config.setting import PGN_REDIS_DB, PGN_REDIS_HOST, PGN_REDIS_PORT
+from fastapi import Depends, Request
+
+from utils.checking_keys import get_api_key
+from utils.request_utils import get_client_ip, get_swagger_port
+
+logger = logging.getLogger(__name__)
+
+redis_client = redis.Redis(
+    host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True
+)
+
+
+def log_user_request(
+    request_info: str,
+    endpoint: str,
+    input_filename: str,
+    model: str,
+    prompt_filename: str,
+    context_length: int,
+    api_key: str,
+):
+    client_ip = get_client_ip(request_info)
+    swagger_port = get_swagger_port(request_info)
+
+    # ✅ 1. CSV 파일 저장
+    logs_dir = Path("./logs")
+    logs_dir.mkdir(exist_ok=True)
+
+    csv_path = logs_dir / f"{client_ip}.csv"
+    new_file = not csv_path.exists()
+
+    with open(csv_path, mode="a", newline="", encoding="utf-8") as f:
+        writer = csv.writer(f)
+        if new_file:
+            writer.writerow(
+                [
+                    "timestamp",
+                    "swagger_port",
+                    "endpoint",
+                    "input_filename",
+                    "prompt_filename",
+                    "model",
+                    "context_length",
+                    "api_key",
+                ]
+            )
+        writer.writerow(
+            [
+                time.strftime("%Y-%m-%d %H:%M:%S"),
+                swagger_port,
+                endpoint,
+                input_filename,
+                prompt_filename,
+                model,
+                context_length,
+                api_key,
+            ]
+        )
+
+    # ✅ 2. Loki용 JSON 로그 출력
+    logger.info(
+        json.dumps(
+            {
+                "ip": client_ip,
+                "swagger_port": swagger_port,
+                "endpoint": endpoint,
+                "input_filename": input_filename,
+                "prompt_filename": prompt_filename,
+                "model": model,
+                "context_length": context_length,
+                "api_key": api_key,
+                "event": "inference_log",
+            }
+        )
+    )
+
+
+def log_generation_info(custom_mode: bool, user_prompt: str = ""):
+    logger.info(f"[GENERATE-PROMPT-USED] 사용자 정의 프롬프트 사용유무: {custom_mode}")
+
+    if custom_mode:
+        logger.info(f"[GENERATE-USER-PROMPT]\n{user_prompt}")
+    else:
+        logger.info("[GENERATE-DEFAULT-PROMPT] Default_prompt")
+
+
+def log_pipeline_status(request_id: str, status_message: str, step_info: dict = None):
+    log_entry = {
+        "status": status_message,
+        "timestamp": datetime.now().isoformat(),
+        "step_info": step_info,
+    }
+    redis_client.rpush(f"pipeline_status:{request_id}", json.dumps(log_entry))
+
+
+def ns_to_sec(ns: int) -> float:
+    """나노초를 초로 변환"""
+    return round(ns / 1e9, 3)  # 소수점 3자리
+
+
+def log_ollama_stats(res: dict):
+    """Ollama 응답 JSON 내 추론 통계를 한 줄 JSON 로그로 출력 (초 단위 변환 + token/s 포함)"""
+    # 원본 값
+    total_duration = res.get("total_duration")
+    load_duration = res.get("load_duration")
+    prompt_eval_count = res.get("prompt_eval_count")
+    prompt_eval_duration = res.get("prompt_eval_duration")
+    eval_count = res.get("eval_count")
+    eval_duration = res.get("eval_duration")
+
+    # 초 단위로 변환
+    stats = {
+        "model": res.get("model"),
+        "total_duration_ns": total_duration,
+        "total_duration_sec": ns_to_sec(total_duration),
+        "load_duration_ns": load_duration,
+        "load_duration_sec": ns_to_sec(load_duration),
+        "prompt_eval_count": prompt_eval_count,
+        "prompt_eval_duration_ns": prompt_eval_duration,
+        "prompt_eval_duration_sec": ns_to_sec(prompt_eval_duration),
+        "eval_count": eval_count,
+        "eval_duration_ns": eval_duration,
+        "eval_duration_sec": ns_to_sec(eval_duration),
+    }
+
+    # token/s 계산
+    if eval_count and eval_duration:
+        stats["generation_speed_tok_per_sec"] = round(
+            eval_count / (eval_duration / 1e9), 2
+        )
+
+    logger.info("[OLLAMA-STATS] " + json.dumps(stats, ensure_ascii=False))
+
+
+class EndpointLogger:
+    def __init__(self, request: Request, api_key: str = Depends(get_api_key)):
+        self.request = request
+        self.api_key = api_key
+
+    def log(
+        self,
+        model: str,
+        input_filename: str = "N/A",
+        prompt_filename: str = "N/A",
+        context_length: int = 0,
+    ):
+        try:
+            log_user_request(
+                request_info=self.request,
+                endpoint=self.request.url.path,
+                input_filename=input_filename,
+                model=model,
+                prompt_filename=prompt_filename,
+                context_length=context_length,
+                api_key=self.api_key,
+            )
+        except Exception as e:
+            logger.warning(
+                f"Failed to log request for endpoint {self.request.url.path}: {e}"
+            )
+
+class HealthCheckFilter(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        # The access log record for uvicorn has the data in `args`.
+        # record.args = (client_addr, method, path, http_version, status_code)
+        # e.g. ('127.0.0.1:37894', 'GET', '/health/API', '1.1', 200)
+        if isinstance(record.args, tuple) and len(record.args) == 5:
+            method = record.args[1]
+            path = record.args[2]
+            status_code = record.args[4]
+            if method == 'GET' and isinstance(path, str) and path.startswith('/health') and status_code == 200:
+                return False
+        return True
--- a/workspace/utils/minio_utils.py
+++ b/workspace/utils/minio_utils.py
@@ -0,0 +1,164 @@
+import io
+import json
+import logging
+from datetime import timedelta
+from typing import Optional
+
+from config.setting import (
+    MINIO_ACCESS_KEY,
+    MINIO_ENDPOINT,
+    MINIO_RESULTS_BUCKET_NAME,
+    MINIO_SECRET_KEY,
+)
+from fastapi import UploadFile
+from minio import Minio
+from minio.error import S3Error
+
+# MinIO 클라이언트 전역 생성
+minio_client = Minio(
+    MINIO_ENDPOINT,
+    access_key=MINIO_ACCESS_KEY,
+    secret_key=MINIO_SECRET_KEY,
+    secure=False,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_minio_client():
+    """
+    MinIO 클라이언트를 반환합니다. 연결 확인을 위해 list_buckets() 호출로 테스트합니다.
+    """
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=False,  # HTTPS 사용 여부에 맞게 설정
+        )
+
+        # ✅ 연결 테스트 (버킷 목록 조회)
+        client.list_buckets()
+
+        return client
+    except Exception as e:
+        raise RuntimeError(f"MinIO 연결 실패: {e}")
+
+
+def save_result_to_minio(result_dict: dict, object_name: str) -> str:
+    """
+    결과 JSON(dict)을 BytesIO로 인코딩하여 MinIO에 저장하고 presigned URL 반환
+    """
+    try:
+        # JSON -> BytesIO
+        result_bytes = io.BytesIO(
+            json.dumps(result_dict, ensure_ascii=False).encode("utf-8")
+        )
+        result_bytes.seek(0)
+
+        # MinIO에 업로드
+        minio_client.put_object(
+            bucket_name=MINIO_RESULTS_BUCKET_NAME,
+            object_name=object_name,
+            data=result_bytes,
+            length=result_bytes.getbuffer().nbytes,
+            content_type="application/json",
+        )
+
+        # presigned URL 생성
+        presigned_url = minio_client.presigned_get_object(
+            MINIO_RESULTS_BUCKET_NAME,
+            object_name,
+        )
+        return presigned_url
+
+    except Exception as e:
+        logger.error(f"❌ MinIO 작업 실패: {e}")
+        raise
+
+
+def upload_file_to_minio_v2(
+    file: UploadFile, bucket_name: str, object_name: str
+) -> str:
+    """
+    파일을 MinIO에 업로드하고, presigned URL을 반환합니다.
+
+    Args:
+        file (UploadFile): FastAPI의 UploadFile 객체
+        bucket_name (str): 업로드할 버킷 이름
+        object_name (str): 저장될 객체 이름 (경로 포함 가능)
+
+    Returns:
+        str: 생성된 presigned URL
+    """
+    try:
+        # 1. 버킷 존재 확인 및 생성
+        found = minio_client.bucket_exists(bucket_name)
+        if not found:
+            minio_client.make_bucket(bucket_name)
+            logger.info(f"✅ 버킷 '{bucket_name}' 생성 완료.")
+
+        # 2. 파일 업로드
+        file.file.seek(0)  # 파일 포인터를 처음으로 이동
+        minio_client.put_object(
+            bucket_name,
+            object_name,
+            file.file,
+            length=-1,  # 파일 크기를 모를 때 -1로 설정
+            part_size=10 * 1024 * 1024,  # 10MB 단위로 청크 업로드
+        )
+        logger.info(f"✅ '{object_name}' -> '{bucket_name}' 업로드 성공.")
+
+        # 3. Presigned URL 생성
+        presigned_url = minio_client.presigned_get_object(
+            bucket_name,
+            object_name,
+            expires=timedelta(days=7),  # URL 만료 기간 (예: 7일, 필요에 따라 조절 가능)
+        )
+        logger.info(f"✅ Presigned URL 생성 완료: {presigned_url}")
+
+        return presigned_url
+
+    except Exception as e:
+        logger.error(f"❌ MinIO 작업 실패: {e}")
+        raise  # 실패 시 예외를 다시 발생시켜 호출 측에서 처리하도록 함
+
+
+def fetch_result_from_minio(request_id: str) -> Optional[dict]:
+    try:
+        # MinIO에서 객체 목록 가져오기 (폴더 내 전체 파일 조회)
+        objects = minio_client.list_objects(
+            bucket_name=MINIO_RESULTS_BUCKET_NAME,
+            prefix=f"{request_id}/",
+            recursive=True,
+        )
+
+        json_obj = next(
+            (obj for obj in objects if obj.object_name.endswith(".json")), None
+        )
+        if not json_obj:
+            logger.warning(
+                f"[MINIO] request_id: {request_id} 경로에 .json 파일이 존재하지 않습니다."
+            )
+            return None
+
+        object_name = json_obj.object_name
+        print(
+            f"[MINIO] request_id: {request_id}에 대한 결과를 가져옵니다. 대상 파일: {object_name}"
+        )
+        # 객체 다운로드
+        response = minio_client.get_object(MINIO_RESULTS_BUCKET_NAME, object_name)
+        content = response.read()
+
+        # JSON 디코드
+        result_dict = json.loads(content.decode("utf-8"))
+
+        logger.info(f"[MINIO] 결과 JSON 로드 성공: {object_name}")
+        return result_dict
+
+    except S3Error as e:
+        logger.error(f"[MINIO] S3Error 발생: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"[MINIO] 기타 오류 발생: {e}")
+        return None
--- a/workspace/utils/prompt_cache.py
+++ b/workspace/utils/prompt_cache.py
@@ -0,0 +1,32 @@
+import hashlib
+import os
+
+from workspace.config.setting import CACHED_PROMPT_DIR
+
+
+# ✅ 프롬프트 캐시 저장 디렉토리가 없으면 자동 생성
+def ensure_cache_dir():
+    os.makedirs(CACHED_PROMPT_DIR, exist_ok=True)
+
+
+# ✅ 파일에서 바이트를 읽어옴 (UploadFile 또는 SimpleUploadFile 모두 대응)
+def read_file_bytes(upload_file) -> bytes:
+    upload_file.file.seek(0)
+    return upload_file.file.read()
+
+
+# ✅ SHA-256 해시 생성
+def compute_file_hash(upload_file) -> str:
+    content = read_file_bytes(upload_file)
+    return hashlib.sha256(content).hexdigest()
+
+
+# ✅ {해시}.txt 형태로 저장
+def save_prompt_file_if_not_exists(file_hash: str, upload_file) -> str:
+    ensure_cache_dir()
+    file_path = os.path.join(CACHED_PROMPT_DIR, f"{file_hash}.txt")
+    if not os.path.exists(file_path):
+        content = read_file_bytes(upload_file)
+        with open(file_path, "wb") as f:
+            f.write(content)
+    return file_path
--- a/workspace/utils/redis_utils.py
+++ b/workspace/utils/redis_utils.py
@@ -0,0 +1,22 @@
+# utils/redis_utils.py
+
+import redis
+from config.setting import PGN_REDIS_DB, PGN_REDIS_HOST, PGN_REDIS_PORT
+
+
+def get_redis_client():
+    """
+    Redis 클라이언트를 반환합니다. decode_responses=True 설정으로 문자열을 자동 디코딩합니다.
+    """
+    try:
+        redis_client = redis.Redis(
+            host=PGN_REDIS_HOST,
+            port=PGN_REDIS_PORT,
+            db=PGN_REDIS_DB,
+            decode_responses=True,
+        )
+        # 연결 확인 (ping)
+        redis_client.ping()
+        return redis_client
+    except redis.ConnectionError as e:
+        raise RuntimeError(f"Redis 연결 실패: {e}")
--- a/workspace/utils/request_utils.py
+++ b/workspace/utils/request_utils.py
@@ -0,0 +1,27 @@
+from fastapi import Request
+
+
+# 🔽 사용자 IP 확인
+def get_client_ip(request: Request) -> str:
+    xff = request.headers.get("X-Forwarded-For")
+    if xff:
+        return xff.split(",")[0].strip()  # 첫 번째 값(실제 클라이언트 IP)
+    xri = request.headers.get("X-Real-IP")
+    if xri:
+        return xri  # Nginx가 전달한 원래 클라이언트 IP
+    return request.client.host  # 마지막 fallback (프록시/NAT IP)
+
+
+# 🔽 요청 SWAGGER 포트 확인
+def get_swagger_port(request: Request) -> str:
+    # 우선순위: X-Forwarded-Port → request.url.port → Host 헤더 → 기본 포트 추정
+    port = request.headers.get("X-Forwarded-Port")
+    if port:
+        return port
+    if request.url.port:
+        return str(request.url.port)
+    host_header = request.headers.get("host")
+    if host_header and ":" in host_header:
+        return host_header.split(":")[1]
+    # 마지막으로 기본 포트(HTTPS 443, HTTP 80) 추정
+    return "443" if request.headers.get("X-Forwarded-Proto") == "https" else "80"
--- a/workspace/utils/text_formatter.py
+++ b/workspace/utils/text_formatter.py
@@ -0,0 +1,21 @@
+class PromptFormatter:
+    SYSTEM_PROMPT = """
+다음은 스캔된 공문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+
+다음 주어진 항목을 JSON 형식(```json)으로 작성해주세요:
+"""
+
+    @staticmethod
+    def format(text: str, user_prompt: str = None, custom_mode: bool = False, prompt_mode: str = "extract") -> str:
+        if custom_mode and prompt_mode == "extract":
+            return (
+                f"{PromptFormatter.SYSTEM_PROMPT}\n\n"
+                f"{user_prompt}\n\n"
+                f"다음은 OCR로 추출된 원시 텍스트입니다:\n\n{text}"
+            )
+        else:
+            return (
+                f"{user_prompt}\n\n"
+                f"다음은 OCR로 추출된 원시 텍스트입니다:\n\n{text}"
+            )
--- a/workspace/utils/text_generator.py
+++ b/workspace/utils/text_generator.py
@@ -0,0 +1,479 @@
+import copy
+import json
+import logging
+from collections import OrderedDict
+from typing import Optional
+
+import anthropic
+import google.generativeai as genai
+import requests
+from anthropic._exceptions import BadRequestError, OverloadedError
+from fastapi import HTTPException
+from google.api_core.exceptions import ResourceExhausted
+from openai import OpenAI
+
+from utils.checking_keys import APIKeyLoader
+from utils.logging_utils import log_generation_info, log_ollama_stats
+from utils.text_formatter import PromptFormatter
+
+logger = logging.getLogger(__name__)
+
+
+# ✅ 1. Ollama Gen
+class OllamaGenerator:
+    def __init__(
+        self, model="gemma3:27b", api_url="http://pgn_ollama_gemma:11534/api/generate"
+    ):
+        self.model = model
+        self.api_url = api_url
+
+    # ✅ 1-1. Gen-General
+    def generate(
+        self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        # /no_think 자동 부착
+        if "qwen" in self.model.lower():
+            prompt += " /no_think"
+
+        payload = {"model": self.model, "prompt": prompt, "stream": False}
+        try:
+            response = requests.post(self.api_url, json=payload)
+            response.raise_for_status()
+
+            res = response.json()
+            if "response" not in res:
+                raise ValueError(
+                    "[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다."
+                )
+
+            log_ollama_stats(res)
+
+            return res["response"], self.model, self.api_url
+        # ☑️ GEMINI API 초과 시, exception
+        except Exception as e:
+            logger.error(f"[OLLAMA-ERROR] 서버 연결 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Ollama 서빙 서버에 연결할 수 없습니다.\n서버가 실행 중인지 확인하세요.",
+            )
+
+    # ✅ 1-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "format": schema_override,
+            "stream": False,
+        }
+
+        response = requests.post(self.api_url, json=payload)
+        response.raise_for_status()
+
+        res = response.json()
+        if "response" not in res:
+            raise ValueError(
+                "[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다."
+            )
+
+        # ✅ 추론 통계 정보 로그 추가
+        log_ollama_stats(res)
+
+        # ✅ 클래스 검증 제거 → JSON 파싱만 수행
+        try:
+            structured = json.loads(res["response"])
+            return structured, self.model, self.api_url
+        except json.JSONDecodeError as e:
+            logger.error(f"[PARSE-ERROR] LLM 응답이 JSON으로 파싱되지 않음: {e}")
+            raise ValueError("LLM 응답이 JSON 형식이 아닙니다.")
+
+
+# ✅ 2. Gemini Gen
+class GeminiGenerator:
+    def __init__(self, model="gemini-2.5-pro-exp-03-25"):
+        self.api_key = APIKeyLoader.load_gemini_key()
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel(model)
+
+    def clean_schema_for_gemini(self, schema: dict) -> dict:
+        # Gemini는 title 등 일부 필드를 허용하지 않음
+        cleaned = dict(schema)  # shallow copy
+        cleaned.pop("title", None)
+        cleaned.pop("$schema", None)
+        # 기타 필요 시 추가 제거
+        return cleaned
+
+    # ✅ 2-1. Gen-General
+    def generate(
+        self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.model.generate_content(prompt)
+
+            if not response.text:
+                raise ValueError(
+                    "[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다."
+                )
+            return (
+                response.text,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        # ☑️ GEMINI API 초과 시, exception
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+    # ✅ 2-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        response_schema = (
+            self.clean_schema_for_gemini(schema_override) if schema_override else None
+        )
+
+        try:
+            response = self.model.generate_content(
+                contents=prompt,
+                generation_config=genai.GenerationConfig(
+                    response_mime_type="application/json",
+                    response_schema=response_schema,
+                ),
+            )
+
+            if not response.text:
+                raise ValueError(
+                    "❌ Gemini 응답에서 구조화된 데이터를 파싱하지 못했습니다."
+                )
+
+            parsed = json.loads(response.text)
+            if isinstance(parsed, list) and isinstance(parsed[0], dict):
+                structured = parsed[0]
+
+            elif isinstance(parsed, dict):
+                structured = parsed
+
+            elif isinstance(parsed, list) and isinstance(parsed[0], str):
+                structured = json.loads(parsed[0])
+
+            else:
+                raise ValueError("❌ 응답 형식이 예상과 다릅니다.")
+
+            # ✅ 필드 순서 정렬
+            if schema_override and "properties" in schema_override:
+                ordered_keys = list(schema_override["properties"].keys())
+                structured = OrderedDict(
+                    (key, structured.get(key)) for key in ordered_keys
+                )
+
+            return (
+                structured,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        # ☑️ GEMINI API 초과 시, exception
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-STRUCTURED-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="'Gemini' 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+        except json.JSONDecodeError as e:
+            logger.error(f"[GEMINI-STRUCTURED-PARSE-ERROR] 응답 JSON 파싱 실패: {e}")
+            raise ValueError("Gemini 응답이 JSON 형식이 아닙니다.")
+
+    def generate_multimodal(self, images, prompt, schema_override=None):
+        import io
+        from PIL import Image
+
+        content = [prompt]
+        for image_bytes in images:
+            try:
+                img = Image.open(io.BytesIO(image_bytes))
+                content.append(img)
+            except Exception as e:
+                logger.error(f"[GEMINI-MULTIMODAL-ERROR] 이미지 처리 실패: {e}")
+                raise HTTPException(
+                    status_code=400, detail=f"이미지 파일을 처리할 수 없습니다: {e}"
+                )
+
+        try:
+            response = self.model.generate_content(content)
+
+            if not response.text:
+                raise ValueError(
+                    "[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다."
+                )
+            return (
+                response.text,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-MULTIMODAL-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+        except Exception as e:
+            logger.error(f"[GEMINI-MULTIMODAL-ERROR] Gemini 응답 파싱 실패: {e}")
+            raise HTTPException(
+                status_code=500, detail=f"❌ Gemini 응답 생성에 실패했습니다: {e}"
+            )
+
+# ✅ 3. Cluade Gen
+class ClaudeGenerator:
+    def __init__(self, model="claude-3-7-sonnet-20250219"):
+        self.api_key = APIKeyLoader.load_claude_key()
+        self.client = anthropic.Anthropic(api_key=self.api_key)
+        self.model = model
+
+    # ✅ 3-1. Gen-General
+    def generate(
+        self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=4096,
+                messages=[{"role": "user", "content": prompt}],
+            )
+
+            if not response.content[0].text:
+                raise ValueError(
+                    "[GENERATE-CLAUDE-ERROR] LLM 응답에 'response' 가 없습니다."
+                )
+
+            return response.content[0].text, self.model, "anthropic.Anthropic SDK"
+
+        # ☑️ CLAUDE API 초과 시, exception
+        except (BadRequestError, OverloadedError) as e:
+            logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+    # ✅ 3-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        # ✅ Claude는 JSON Schema의 key가 모두 영문이어야 함
+        if schema_override:
+            try:
+                for k in schema_override.get("properties", {}).keys():
+                    if any(ord(ch) > 127 for ch in k):
+                        # 한글 포함 여부 확인
+                        raise HTTPException(
+                            status_code=400,
+                            detail="❌ Claude 모델은 JSON Schema의 필드명이 영어로만 구성되어 있어야 합니다. 필드명을 영문으로 수정해 주세요.",
+                        )
+            except Exception as e:
+                raise HTTPException(
+                    status_code=400, detail=f"스키마 처리 중 오류 발생: {str(e)}"
+                )
+
+        tools = [
+            {
+                "name": "build_text_analysis_result",
+                "description": "Extract structured fields from OCR text in document format",
+                "input_schema": schema_override,
+            }
+        ]
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=4096,
+                messages=[{"role": "user", "content": prompt}],
+                tools=tools,
+                tool_choice={"type": "tool", "name": "build_text_analysis_result"},
+            )
+
+            structured = response.content[0].input
+            return structured, self.model, "anthropic.Anthropic SDK"
+
+        # ☑️ CLAUDE API 초과 시, exception
+        except (BadRequestError, OverloadedError) as e:
+            logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+
+# ✅ 4. GPT Gen
+class GptGenerator:
+    def __init__(self, model="gpt-4o"):
+        # ✅ OpenAI API Key 로딩 및 유효성 검증
+        raw = APIKeyLoader.load_gpt_key()
+        if not raw:
+            raise RuntimeError("OPENAI_API_KEY가 설정되지 않았습니다.")
+        self.api_key = raw.strip()
+        if not self.api_key.startswith(("sk-", "sk-proj-")):
+            raise RuntimeError("유효하지 않은 OpenAI API Key 형식입니다.")
+
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = model
+
+    def enforce_strict_schema(self, schema: dict) -> dict:
+        strict_schema = copy.deepcopy(schema)
+
+        # ✅ required 자동 보완
+        props = strict_schema.get("properties", {})
+        existing_required = set(strict_schema.get("required", []))
+        all_keys = set(props.keys())
+
+        # 누락된 필드를 required에 추가
+        missing_required = all_keys - existing_required
+        strict_schema["required"] = list(existing_required | missing_required)
+
+        # ✅ additionalProperties 보장
+        if "additionalProperties" not in strict_schema:
+            strict_schema["additionalProperties"] = False
+
+        return strict_schema
+
+    # ✅ 4-1. Gen-General
+    def generate(
+        self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.client.responses.create(model=self.model, input=prompt)
+        except Exception as e:
+            logger.error(f"[GENERATE-GPT-ERROR] OpenAI API 호출 중 예외 발생: {e}")
+            raise RuntimeError("GPT 생성 요청 중 오류가 발생했습니다.") from e
+
+        try:
+            if (
+                not response.output
+                or not response.output[0].content
+                or not response.output[0].content[0].text
+            ):
+                raise ValueError("LLM 응답에 'response'가 없습니다.")
+        except Exception as e:
+            logger.error(
+                f"[GENERATE-GPT-ERROR] 응답 파싱 실패: {e} | 원본 응답: {response}"
+            )
+            raise RuntimeError("GPT 응답 파싱 중 오류가 발생했습니다.") from e
+
+        return response.output[0].content[0].text, self.model, "OpenAI Python SDK"
+
+    # ✅ 4-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        schema = self.enforce_strict_schema(schema_override) if schema_override else {}
+
+        # ✅ Function Calling 방식으로 schema_override 전달
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "build_summary",
+                    "description": "Extract structured document summary from OCR text.",
+                    "parameters": schema,
+                    "strict": True,
+                },
+            }
+        ]
+
+        try:
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an assistant that extracts structured document summary from OCR text.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                tools=tools,
+                tool_choice={"type": "function", "function": {"name": "build_summary"}},
+            )
+
+            tool_call = response.choices[0].message.tool_calls[0]
+            arguments_json = tool_call.function.arguments
+            structured = json.loads(arguments_json)
+
+            return structured, self.model, "OpenAI Function Calling"
+
+        except Exception as e:
+            logger.error(f"[GPT-STRUCTURED-ERROR] GPT 응답 파싱 실패: {e}")
+            raise HTTPException(
+                status_code=500, detail="❌ GPT 구조화 응답 생성에 실패했습니다."
+            )
+
+    def generate_multimodal(self, images, prompt, schema_override=None):
+        import base64
+
+        content = [{"type": "text", "text": prompt}]
+        for image_bytes in images:
+            base64_image = base64.b64encode(image_bytes).decode("utf-8")
+            content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{base64_image}"},
+                }
+            )
+
+        messages = [{"role": "user", "content": content}]
+
+        try:
+            response = self.client.chat.completions.create(
+                model=self.model, messages=messages, max_tokens=4096
+            )
+            generated_text = response.choices[0].message.content
+            return generated_text, self.model, "OpenAI Python SDK"
+        except Exception as e:
+            logger.error(f"[GPT-MULTIMODAL-ERROR] GPT-4o 응답 파싱 실패: {e}")
+            raise HTTPException(
+                status_code=500, detail="❌ GPT-4o 응답 생성에 실패했습니다."
+            )
--- a/workspace/utils/text_processor.py
+++ b/workspace/utils/text_processor.py
@@ -0,0 +1,134 @@
+import json
+import logging
+import re
+from pathlib import Path
+from typing import Literal
+
+import markdown2
+from workspace.config.setting import SUMMARY_HTML_DIR
+
+logger = logging.getLogger(__name__)
+
+
+def safe_filename(filename: str) -> str:
+    base = Path(filename).stem
+    base = re.sub(r"[^\w\-]", "_", base)
+    return f"{base}.html"
+
+
+def post_process(
+    file_name,
+    text,
+    generated_text,
+    coord,
+    ocr_model,
+    llm_model,
+    llm_url,
+    mode,
+    start_time,
+    end_time,
+    prompt_mode: Literal["general", "extract"] = "extract",
+):
+    result_dict = {}
+
+    # ✅ 구조화 모드는 후처리 생략
+    if mode == "structured":
+        result_dict = {
+            "message": "✅ 구조화된 JSON 모델 출력입니다. post_process 후처리 생략됨.",
+            "note": "generated 필드 참조 바랍니다.",
+        }
+
+    # ✅ 일반 추론 모드일 경우
+    elif prompt_mode == "general":
+        html_content = markdown2.markdown(generated_text.strip())
+        html_filename = safe_filename(file_name)
+        html_path = SUMMARY_HTML_DIR / html_filename
+        html_path.write_text(html_content, encoding="utf-8")
+
+        summary_url = f"http://172.16.10.176:8888/view/generated_html/{html_filename}"
+
+        result_dict = {
+            "message": "✅ 줄글로 생성된 모델 출력입니다. post_process 후처리 생략됨.",
+            "note": "아래 url에 접속하여 markdown 형식으로 응답 확인하세요.",
+            "summary_html": summary_url,
+        }
+
+    # ✅ 추출 기반 후처리 (extract)
+    else:
+        # ✅ JSON 코드블럭 형식 처리
+        if "```json" in generated_text:
+            try:
+                logger.debug("[PROCESS-JSON] JSON 코드블럭 형식 후처리 진행합니다.")
+                json_block = re.search(
+                    r"```json\s*(\{.*?\})\s*```", generated_text, re.DOTALL
+                )
+                if json_block:
+                    parsed_json = json.loads(json_block.group(1))
+                    result_dict = {
+                        re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", k): v
+                        for k, v in parsed_json.items()
+                    }
+            except Exception as e:
+                logger.error("[PROCESS-ERROR] JSON 코드블럭 파싱 실패:", e)
+
+        # ✅ 길이 초과 메시지 감지 및 처리
+        elif "입력 텍스트가" in generated_text and "모델 호출 생략" in generated_text:
+            result_dict = {
+                "message": "⚠️ 입력 텍스트가 너무 깁니다. LLM 모델 호출을 생략했습니다.",
+                "note": "OCR로 추출된 원본 텍스트(parsed)를 참고해 주세요.",
+            }
+
+        else:
+            # ✅ "1.제목:" 또는 "1. 제목:" 형식 처리
+            logger.debug("[PROCESS-STRING] JSON 코드블럭 형식이 아닙니다.")
+            blocks = re.split(r"\n(?=\d+\.\s*[^:\n]+:)", generated_text.strip())
+
+            for block in blocks:
+                if ":" in block:
+                    key_line, *rest = block.split(":", 1)
+                    key = re.sub(r"^\d+\.\s*", "", key_line).strip()
+                    cleaned_key = re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", key)
+
+                    value = rest[0].strip() if rest else ""
+                    value = re.sub(r"^[^\w가-힣a-zA-Z]+", "", value).strip()
+
+                    result_dict[cleaned_key] = value
+
+    json_data = {
+        "filename": file_name,
+        f"{mode}_model": {
+            "ocr_model": ocr_model,
+            "llm_model": llm_model,
+            "api_url": llm_url,
+        },
+        "time": {
+            "duration_sec": f"{end_time - start_time:.2f}",
+            "started_at": start_time,
+            "ended_at": end_time,
+        },
+        "fields": coord,
+        "parsed": text,
+        "generated": generated_text,
+        "processed": result_dict,
+    }
+
+    # final_result
+    logger.info(json.dumps(json_data["processed"], indent=2, ensure_ascii=False))
+
+    return json_data
+
+
+def ocr_process(filename, ocr_model, coord, text, start_time, end_time):
+    json_data = {
+        "filename": filename,
+        "model": {"ocr_model": ocr_model},
+        "time": {
+            "duration_sec": f"{end_time - start_time:.2f}",
+            "started_at": start_time,
+            "ended_at": end_time,
+        },
+        "fields": coord,
+        "parsed": text,
+    }
+
+    return json_data