first commit

2025-10-27 09:39:16 +09:00
commit a756bc9c11
69 changed files with 5714 additions and 0 deletions
--- a/.env
+++ b/.env
@@ -0,0 +1,46 @@
+APP_VERSION=v1.2508.1
+
+# This is an example environment file.
+# Copy this file to .env and fill in the actual values.
+
+# This is an example environment file.
+# Copy this file to .env and fill in the actual values.
+
+# --- LLM Gateway API Keys ---
+# The ADMIN_API_KEY is required for administrative functions of the application.
+ADMIN_API_KEY=ac690ed057d41c9b6b6125c5ad5e45efb24e3e5e7aa2f5c2189638ee9ff03f81
+USER_API_TEST_KEY=sk-e03e060ea4ee8edf2e057fbff3e68c28
+
+# The following keys are for making requests to external LLM services.
+# They are not used directly by the gateway's core logic but should be set for full functionality.
+GEMINI_API_KEY=AIzaSyD37Fp00b_i2DIywwtQu39w0RhkGAJO4YM
+ANTHROPIC_API_KEY=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
+OPENAI_API_KEY=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
+
+
+# --- LLM Gateway Redis ---
+# These should match the service name and container port in docker-compose.yml
+REDIS_HOST=llm_gateway_test_redis # 127.0.0.1 llm_gateway_redis
+REDIS_PORT=6379 # 6382(uvicorn) 6379
+REDIS_DB=2
+
+
+# --- MinIO Storage ---
+# Connection details for your MinIO instance.
+MINIO_ENDPOINT=172.16.10.175:9000
+MINIO_ACCESS_KEY=kyy
+MINIO_SECRET_KEY=hLAk3aQfH8HTs7ELTcyR
+MINIO_BUCKET_NAME=ocr-gateway
+MINIO_RESULTS_BUCKET_NAME=ocr-gateway-results
+
+# --- OCR Service (Optional) ---
+# Uncomment and set if you are using the OCR service.
+OCR_API_URL=http://ocr_gateway_test:8880/ocr # http://localhost:8890/ocr
+OCR_REDIS_HOST=ocr_gateway_test_redis # localhost
+OCR_REDIS_PORT=6379 # 6381(uvicorn) 6379
+OCR_REDIS_DB=0
+
+
+# --- Ollama Services (Optional) ---
+# Comma-separated list of Ollama API endpoints.
+OLLAMA_URLS="http://localhost:11534/api/generate,http://localhost:11634/api/generate,http://localhost:11734/api/generate"
--- a/.gitattributes
+++ b/.gitattributes
@@ -0,0 +1,32 @@
+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text
+*.pdf filter=lfs diff=lfs merge=lfs -text
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,30 @@
+# 캐시 및 임시 파일 무시
+__pycache__/
+**/__pycache__/
+**/**/__pycache__/
+*.py[cod]
+.ruff_cache/
+.pytest_cache/
+# 로그/업로드 디렉토리 무시
+src/static/html/generated/
+minio/
+logs/
+cached/
+temp_upload/
+test/
+
+# Loki 관련 무시
+loki/
+**/loki/
+
+# 기타
+.DS_Store
+api_keys.json
+docker-compose_minio.yml
+
+# gitignore for specific environment files
+.env.8888
+.env.8889
+
+.venv
+TODO.md
--- a/.python-version
+++ b/.python-version
@@ -0,0 +1 @@
+3.12
--- a/36
+++ b/36
@@ -0,0 +1,36 @@
+FROM astral/uv:0.8.9-python3.12-bookworm
+
+WORKDIR /workspace
+
+EXPOSE 8888
+
+ENV PYTHONUNBUFFERED=1
+
+# 시스템 패키지 설치
+RUN apt-get update && \
+    apt-get install -y --no-install-recommends \
+        vim curl poppler-utils && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/*
+
+#파일권한 설정.
+RUN groupadd -g 1001 user
+RUN useradd -r -u 1001 -g user user
+RUN chown -R user:user /opt
+RUN chown -R user:user /workspace
+
+COPY pyproject.toml .
+RUN uv pip install --system --no-cache -r pyproject.toml
+
+# 로컬의 src 디렉토리 내용을 /workspace/src에 복사
+COPY src ./src
+
+# user가 쓸 수 있게 소유권 변경
+RUN chown -R user:user /workspace/src
+
+# PYTHONPATH를 WORKDIR로 설정
+WORKDIR /workspace/src
+ENV PYTHONPATH=/workspace/src
+USER user
+
+CMD ["sh", "-c", "uvicorn api:app --workers 4 --host 0.0.0.0 --port ${PORT:-8877} --log-config config/log_config.yaml"]
--- a/README.md
+++ b/README.md
@@ -0,0 +1,169 @@
+# LLM Gateway API
+
+## 프로젝트 개요
+
+이 프로젝트는 `ocr_gateway_test`와 연동하여 OCR 모델을 테스트하는 데 사용되는 API 게이트웨이입니다.
+
+## 설치 및 실행 방법
+
+이 프로젝트는 Docker Compose를 사용하여 각 서비스를 컨테이너 환경에서 실행합니다. 로컬에 Python 가상 환경(`venv`)을 구성할 필요가 없습니다.
+
+1.  **Docker 설치**: 시스템에 Docker와 Docker Compose가 설치되어 있어야 합니다.
+
+2.  **환경 변수 설정**: 프로젝트 루트의 `.env` 파일을 각 환경에 맞게 설정합니다.
+
+3.  **서비스 실행**: 아래 명령어를 사용하여 모든 서비스를 백그라운드에서 실행합니다.
+
+    ```bash
+    docker compose up -d
+    ```
+
+4.  **서비스 로그 확인**: 아래 명령어로 서비스 로그를 확인할 수 있습니다.
+
+    ```bash
+    docker compose logs -f
+    ```
+
+5.  **서비스 종료**: 실행 중인 서비스를 중지하려면 아래 명령어를 사용합니다.
+    ```bash
+    docker compose down
+    ```
+
+## 프로젝트 구조
+
+```
+/mnt/c/Python/workspace/llm_gateway_test/
+├───.env
+├───.gitattributes
+├───.gitignore
+├───.python-version
+├───api_keys.json
+├───docker-compose.yml
+├───Dockerfile
+├───pyproject.toml
+├───README.md
+├───requirements.lock
+├───uv.lock
+├───logs/
+└───src/
+    ├───api_keys.json
+    ├───api.py                      # FastAPI 애플리케이션 진입점
+    ├───config/                     # 설정 관련 모듈
+    │   ├───__init__.py
+    │   ├───log_config.yaml
+    │   └───setting.py
+    ├───interface/                  # 사용자 인터페이스 (Streamlit 등)
+    │   ├───__init__.py
+    │   └───streamlit_ui.py
+    ├───routers/                    # FastAPI 라우터 (API 엔드포인트 정의)
+    │   ├───__init__.py
+    │   ├───api_key_router.py
+    │   ├───download_router.py
+    │   ├───dummy_router.py
+    │   ├───extract_router.py
+    │   ├───general_router.py
+    │   ├───guide_router.py
+    │   ├───llm_summation.py
+    │   ├───model_router.py
+    │   ├───ocr_router.py
+    │   ├───stt_router.py
+    │   └───yolo_router.py
+    ├───services/                   # 비즈니스 로직 처리 서비스
+    │   ├───__init__.py
+    │   ├───api_key_service.py
+    │   ├───download_service.py
+    │   ├───dummy_service.py
+    │   ├───inference_service.py
+    │   ├───model_service.py
+    │   ├───pipeline_runner.py
+    │   ├───prompt.py
+    │   └───report.py
+    ├───static/                     # 정적 파일 (가이드 HTML, 예제 등)
+    └───utils/                      # 공용 유틸리티 모듈
+```
+
+## 주요 API 엔드포인트
+
+### 상태 확인 및 가이드
+
+| 경로             | 메서드 | 설명                       |
+| ---------------- | ------ | -------------------------- |
+| `/health/*`      | GET    | 서버 상태 확인             |
+| `/info`          | GET    | 사용 가능한 모델 목록 조회 |
+| `/general_guide` | GET    | 범용 추론 가이드 HTML 제공 |
+| `/extract_guide` | GET    | 문서 추출 가이드 HTML 제공 |
+
+### 범용 추론 (General)
+
+| 경로                             | 메서드 | 설명                             |
+| -------------------------------- | ------ | -------------------------------- |
+| `/general/inner`                 | POST   | 내부 LLM 기반 범용 추론 (비동기) |
+| `/general/outer`                 | POST   | 외부 LLM 기반 범용 추론 (비동기) |
+| `/general/progress/{request_id}` | GET    | 범용 추론 작업 상태 및 결과 조회 |
+
+### 문서 정보 추출 (Extract)
+
+| 경로                             | 메서드 | 설명                                  |
+| -------------------------------- | ------ | ------------------------------------- |
+| `/extract/inner`                 | POST   | 내부 LLM 기반 문서 정보 추출 (비동기) |
+| `/extract/outer`                 | POST   | 외부 LLM 기반 문서 정보 추출 (비동기) |
+| `/extract/progress/{request_id}` | GET    | 문서 추출 작업 상태 및 결과 조회      |
+
+### 텍스트 추출 (OCR)
+
+| 경로                         | 메서드 | 설명                             |
+| ---------------------------- | ------ | -------------------------------- |
+| `/ocr`                       | POST   | 문서 파일 OCR 작업 요청 (비동기) |
+| `/ocr/progress/{request_id}` | GET    | OCR 작업 진행 상태 조회          |
+| `/ocr/result/{request_id}`   | GET    | OCR 작업 결과 조회               |
+
+### 음성-텍스트 변환 (STT)
+
+| 경로                     | 메서드 | 설명                                     |
+| ------------------------ | ------ | ---------------------------------------- |
+| `/audio`                 | POST   | 음성 파일을 STT API로 전달하여 변환 요청 |
+| `/progress/{request_id}` | GET    | STT 작업 진행 상태 조회                  |
+| `/result/{request_id}`   | GET    | STT 작업 결과 조회                       |
+
+## 결과 JSON 구조 예시
+
+문서 추출 API (`/extract/*`) 호출 시 반환되는 최종 결과의 JSON 구조 예시입니다. (실제 응답은 다를 수 있습니다.)
+
+```json
+{
+  "request_id": "요청 식별자",
+  "progress_logs": [
+    {"status": "작업 접수", "timestamp": "2025-07-21T10:00:00Z"},
+    {"status": "OCR 작업 시작", "timestamp": "2025-07-21T10:00:05Z"},
+    {"status": "텍스트 추출 중", "timestamp": "2025-07-21T10:00:06Z"}
+  ],
+  "final_result": {
+    "filename": "example.pdf",
+    "model": {
+      "ocr_model": "tesseract",
+      "llm_model": "gemma:7b"
+    },
+    "time": {
+      "duration_sec": 25.5,
+      "started_at": 1721556000.0,
+      "ended_at": 1721556025.5
+    },
+    "fields": {
+      "추출된 텍스트": [[x1, y1], [x2, y2], [x3, y3], [x4, y4]]
+    },
+    "parsed": "OCR 모델로 추출한 원본 텍스트입니다.",
+    "generated": "LLM이 요약 및 번역한 텍스트입니다.",
+    "processed": {
+      "제목": "문서의 제목",
+      "한글제목": "번역된 한국어 제목",
+      "본문": "문서의 영문 본문",
+      "한글본문": "번역된 한국어 본문",
+      "날짜": "문서에 명시된 날짜",
+      "보낸사람": "발신인 정보",
+      "받는사람": "수신인 정보",
+      "연관공문": "참조 또는 연관된 문서",
+      "문서유형": "문서의 분류 (예: 보고서, 계약서)"
+    }
+  }
+}
+```
--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -0,0 +1,68 @@
+services:
+  llm_gateway_test:
+    build:
+      context: .
+    image: llm_gateway
+    container_name: llm_gateway_test
+    ports:
+      - "8877:8877"
+    env_file:
+      - .env
+    environment:
+      - TZ=Asia/Seoul
+    stdin_open: true
+    restart: always
+    tty: true
+    networks:
+      - llm_gateway_test_net
+    depends_on:
+      redis:
+        condition: service_healthy
+
+    # ✅ 로그 디렉터리 호스트 마운트
+    volumes:
+      - ./logs:/workspace/src/logs
+
+    # ✅ 시작 시 로그 디렉터리 보장 후 uvicorn 실행
+    command: >
+      sh -lc "mkdir -p /workspace/src/logs &&
+              exec uvicorn api:app --workers 4 --host 0.0.0.0 --port 8877
+              --log-config config/log_config.yaml"
+
+    healthcheck:
+      test:
+        [
+          "CMD-SHELL",
+          "curl -f http://localhost:8877/health/API && curl -f http://localhost:8877/health/Redis && curl -f http://localhost:8877/health/MinIO",
+        ]
+      interval: 60s
+      timeout: 5s
+      retries: 3
+      start_period: 10s
+
+  redis:
+    image: redis:7-alpine
+    container_name: llm_gateway_test_redis
+    command:
+      [
+        "redis-server",
+        "--maxmemory",
+        "256mb",
+        "--maxmemory-policy",
+        "allkeys-lru",
+      ]
+    ports:
+      - "6382:6379"
+    restart: always
+    networks:
+      - llm_gateway_test_net
+    healthcheck:
+      test: ["CMD", "redis-cli", "ping"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
+
+networks:
+  llm_gateway_test_net:
+    name: llm_gateway_test_net
+    driver: bridge
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,70 @@
+[project]
+name = "llm_gateway"
+version = "0.1.0"
+description = "LLM Gateway to provide unified interface for various LLM services."
+authors = [{ name = "Your Name", email = "your@email.com" }]
+requires-python = ">=3.12"
+dependencies = [
+    "PyMuPDF==1.23.7",
+    "pdf2image",
+    "pytesseract",
+    "Pillow",
+    "python-docx",
+    "uvicorn[standard]",
+    "fastapi",
+    "python-multipart",
+    "markdown2",
+    "sqlalchemy",
+    "psycopg2-binary",
+    "aiofiles",
+    "streamlit",
+    "requests",
+    "httpx",
+    "python-dotenv",
+    "google-generativeai",
+    "anthropic",
+    "openai",
+    "ollama",
+    "prometheus-fastapi-instrumentator",
+    "tiktoken",
+    "redis",
+    "celery",
+    "flower",
+    "snowflake-id",
+    "minio",
+    "pytest>=8.4.1",
+]
+
+[project.optional-dependencies]
+dev = [
+    "ruff",
+    "pytest",
+    "pytest-asyncio",
+    "httpx",
+]
+
+[tool.uv]
+cache-keys = [{ file = "pyproject.toml" }, { file = "uv.lock" }]
+
+[tool.ruff]
+line-length = 120
+indent-width = 4
+
+[tool.ruff.lint]
+select = ["E4", "E7", "E9", "F"]
+ignore = []
+fixable = ["ALL"]
+unfixable = []
+dummy-variable-rgx = "^(_+|(_+[a-zA-Z0-9_]*[a-zA-Z0-9]+?))$"
+
+[tool.ruff.format]
+quote-style = "double"
+indent-style = "space"
+skip-magic-trailing-comma = false
+line-ending = "auto"
+docstring-code-format = false
+docstring-code-line-length = "dynamic"
+
+[tool.pytest.ini_options]
+# Add 'src' to the pythonpath to allow pytest to find the modules inside the src directory.
+pythonpath = ["src", "."]
--- a/src/api.py
+++ b/src/api.py
@@ -0,0 +1,139 @@
+import logging
+from contextlib import asynccontextmanager
+from pathlib import Path
+
+from fastapi import Depends, FastAPI, HTTPException, Request
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import HTMLResponse
+from fastapi.staticfiles import StaticFiles
+from prometheus_fastapi_instrumentator import Instrumentator
+
+from config.setting import APP_VERSION, SUMMARY_HTML_DIR
+from routers import (
+    api_key_router,
+    download_router,
+    dummy_router,
+    extract_router,
+    general_router,
+    guide_router,
+    llm_summation,
+    model_router,
+    ocr_router,
+    stt_router,
+    yolo_router,
+)
+from services.api_key_service import load_api_keys_from_file
+from utils.checking_keys import get_admin_key, get_api_key
+from utils.minio_utils import get_minio_client
+from utils.redis_utils import get_redis_client
+
+# 현재 api.py 파일의 디렉토리 (src/)를 기준으로 경로를 설정합니다.
+BASE_DIR = Path(__file__).parent
+STATIC_DIR = BASE_DIR / "static"
+SWAGGER_UI_HTML_PATH = STATIC_DIR / "html" / "swagger_ui.html"
+
+
+logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s - %(message)s")
+
+
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    # 애플리케이션 시작 시 파일에서 API 키 로드
+    print("Loading API keys from file...")
+    load_api_keys_from_file()
+    yield
+
+
+app = FastAPI(
+    title="LLM GATEWAY TEST",
+    description="LLM Gateway 테스트 서버",
+    version=APP_VERSION,
+    docs_url=None,  # Disable default docs
+    lifespan=lifespan,
+    favicon_url="/static/image/favicon.ico",
+)
+
+
+# 응답 헤더에 애플리케이션 버전 추가
+@app.middleware("http")
+async def add_version_header(request: Request, call_next):
+    response = await call_next(request)
+    response.headers["X-App-Version"] = APP_VERSION
+    return response
+
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=[
+        "http://172.16.42.101",
+        "http://gsim.hanmaceng.co.kr",
+        "http://gsim.hanmaceng.co.kr:6464",
+        "https://overseas.projectmastercloud.com",
+        "http://localhost:5174",
+    ],
+    allow_origin_regex=r"http://(localhost:5174|172\.16\.\d{1,3}\.\d{1,3}|gsim\.hanmaceng\.co\.kr)(:\d+)?",
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# API 키 검증을 위한 의존성 설정
+api_key_dependency = Depends(get_api_key)
+admin_key_dependency = Depends(get_admin_key)
+
+# Prometheus Metrics Exporter 활성화
+instrumentator = Instrumentator()
+instrumentator.instrument(app).expose(app)
+
+# 정적 파일 서빙
+app.mount("/view/generated_html", StaticFiles(directory=SUMMARY_HTML_DIR), name="summary_html")
+app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static")
+
+# 라우터 포함
+app.include_router(guide_router)
+app.include_router(api_key_router, dependencies=[admin_key_dependency], include_in_schema=False)
+app.include_router(model_router, dependencies=[api_key_dependency])
+app.include_router(download_router, dependencies=[api_key_dependency])
+app.include_router(general_router, dependencies=[api_key_dependency])
+app.include_router(extract_router, dependencies=[api_key_dependency])
+app.include_router(dummy_router, dependencies=[api_key_dependency])
+app.include_router(ocr_router, dependencies=[api_key_dependency])
+app.include_router(stt_router, dependencies=[api_key_dependency])
+app.include_router(llm_summation, dependencies=[api_key_dependency])
+app.include_router(yolo_router, dependencies=[api_key_dependency])
+
+
+# /docs URL에 커스터마이징된 Swagger UI 연결
+@app.get("/docs", response_class=HTMLResponse, include_in_schema=False)
+async def get_custom_swagger_ui():
+    try:
+        with open(SWAGGER_UI_HTML_PATH, "r", encoding="utf-8") as f:
+            return HTMLResponse(content=f.read())
+    except FileNotFoundError:
+        raise HTTPException(status_code=404, detail="Swagger UI HTML file not found.")
+
+
+@app.get("/health/API")
+async def health_check():
+    return {"status": "API ok"}
+
+
+@app.get("/health/Redis")
+def redis_health_check():
+    client = get_redis_client()
+    if client is None:
+        raise HTTPException(status_code=500, detail="Redis connection failed")
+    try:
+        client.ping()
+        return {"status": "Redis ok"}
+    except Exception:
+        raise HTTPException(status_code=500, detail="Redis ping failed")
+
+
+@app.get("/health/MinIO")
+def minio_health_check():
+    try:
+        get_minio_client()
+        return {"status": "MinIO ok"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"MinIO health check failed: {str(e)}")
--- a/src/config/init.py
+++ b/src/config/init.py
--- a/src/config/log_config.yaml
+++ b/src/config/log_config.yaml
@@ -0,0 +1,80 @@
+version: 1
+disable_existing_loggers: False
+
+formatters:
+  access:
+    (): uvicorn.logging.AccessFormatter
+    format: '%(asctime)s [%(levelname)s] %(client_addr)s - "%(request_line)s" %(status_code)s'
+  standard:
+    format: "%(asctime)s | %(levelname)s | %(name)s | %(message)s"
+
+filters:
+  health_check_filter:
+    (): utils.logging_utils.HealthCheckFilter
+
+handlers:
+  # ▶ 접근 로그 (콘솔)
+  access_console:
+    class: logging.StreamHandler
+    level: INFO
+    formatter: access
+    filters: [health_check_filter]
+
+  # ▶ 접근 로그 (파일)
+  access_file:
+    class: logging.handlers.RotatingFileHandler
+    level: INFO
+    formatter: access
+    filename: logs/access.log # => /workspace/src/logs/access.log
+    maxBytes: 10485760 # 10MB
+    backupCount: 5
+    filters: [health_check_filter]
+
+  # ▶ 애플리케이션/에러 로그 (콘솔)
+  app_console:
+    class: logging.StreamHandler
+    level: INFO
+    formatter: standard
+
+  # ▶ 애플리케이션/에러 로그 (파일)
+  app_file:
+    class: logging.handlers.RotatingFileHandler
+    level: INFO
+    formatter: standard
+    filename: logs/app.log # => /workspace/src/logs/app.log
+    maxBytes: 10485760
+    backupCount: 5
+
+loggers:
+  # uvicorn 접근 로그: 요청/응답
+  uvicorn.access:
+    level: INFO
+    handlers: [access_console, access_file]
+    propagate: false
+
+  # uvicorn 런타임/에러
+  uvicorn:
+    level: INFO
+    handlers: [app_console, app_file]
+    propagate: false
+
+  uvicorn.error:
+    level: INFO
+    handlers: [app_console, app_file]
+    propagate: false
+
+  # FastAPI/Starlette 내부 로거
+  fastapi:
+    level: INFO
+    handlers: [app_console, app_file]
+    propagate: false
+
+  starlette:
+    level: INFO
+    handlers: [app_console, app_file]
+    propagate: false
+
+# 루트 로거(여러분 코드에서 logging.getLogger 사용분 포함)
+root:
+  level: INFO
+  handlers: [app_console, app_file]
--- a/src/config/setting.py
+++ b/src/config/setting.py
@@ -0,0 +1,82 @@
+import logging
+import os
+from pathlib import Path
+
+from dotenv import load_dotenv
+
+# 로깅 기본 설정
+logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
+
+# .env 파일 로드 (상위 디렉토리까지 탐색)
+load_dotenv()
+
+# -----------------------------------------------------------------------------
+# 경로 설정 (Path Configuration)
+# -----------------------------------------------------------------------------
+PROJECT_ROOT = Path(__file__).resolve().parents[2]  # src
+STATIC_DIR = PROJECT_ROOT / "src" / "static"
+
+# 프롬프트 & 스키마 경로
+DEFAULT_PROMPT_PATH = STATIC_DIR / "prompt" / "default_prompt_v0.1.txt"
+STRUCTURED_PROMPT_PATH = STATIC_DIR / "prompt" / "structured_prompt_v0.1.txt"
+I18N_PROMPT_PATH = STATIC_DIR / "prompt" / "i18n_test_prompt_kor.txt"
+D6C_PROMPT_PATH = STATIC_DIR / "prompt" / "d6c_test_prompt_eng.txt"
+STRUCTURED_SCHEMA_PATH = STATIC_DIR / "structured_schema.json"
+
+# HTML 가이드 경로
+EXTRACT_DEFAULT_PATH = STATIC_DIR / "html" / "extract_guide.html"
+EXTRACT_STRUCTURED_PATH = STATIC_DIR / "html" / "extraction_structured_guide.html"
+GENERAL_GUIDE_PATH = STATIC_DIR / "html" / "general_guide.html"
+SCHEMA_FILE_PATH = STATIC_DIR / "html" / "schema_file_guide.html"
+SUMMARY_HTML_DIR = STATIC_DIR / "html" / "generated"
+
+
+# # Swagger UI 경로
+# SWAGGER_UI_DIR = os.getenv("SWAGGER_UI_DIR", str(STATIC_DIR / "swagger-ui"))
+# logging.info(f"Final SWAGGER_UI_DIR path: {SWAGGER_UI_DIR}")
+
+# -----------------------------------------------------------------------------
+# 외부 서비스 및 인프라 설정 (External Services & Infrastructure)
+# 이 값들은 모두 환경 변수를 통해 주입받습니다.
+# -----------------------------------------------------------------------------
+
+# --- Application Version ---
+APP_VERSION = os.getenv("APP_VERSION", "v1.2508.0")
+
+# --- Ollama ---
+# 여러 개의 Ollama 엔드포인트를 콤마(,)로 구분하여 문자열로 전달
+# 예: "http://host1:11534/api/generate,http://host2:11634/api/generate"
+_ollama_urls_str = os.getenv(
+    "OLLAMA_URLS",
+    "http://ollama_gemma:11534/api/generate,http://ollama_gptoss:11634/api/generate,http://ollama_qwen:11734/api/generate",
+)
+OLLAMA_URLS = [url.strip() for url in _ollama_urls_str.split(",") if url.strip()]
+
+# --- OCR ---
+OCR_API_URL = os.getenv("OCR_API_URL", "http://ocr_gateway_test:8880/ocr")
+OCR_REDIS_HOST = os.getenv("OCR_REDIS_HOST", "ocr_gateway_test_redis")
+OCR_REDIS_PORT = int(os.getenv("OCR_REDIS_PORT", 6379))
+OCR_REDIS_DB = int(os.getenv("OCR_REDIS_DB", 0))
+
+# --- LLM Gateway Redis ---
+# docker-compose.yml의 서비스 이름을 기본값으로 사용
+PGN_REDIS_HOST = os.getenv("REDIS_HOST", "llm_gateway_test_redis")
+PGN_REDIS_PORT = int(os.getenv("REDIS_PORT", 6379))
+PGN_REDIS_DB = int(os.getenv("REDIS_DB", 2))
+
+# --- MinIO ---
+MINIO_ENDPOINT = os.getenv("MINIO_ENDPOINT", "172.16.10.175:9000")
+MINIO_ACCESS_KEY = os.getenv("MINIO_ACCESS_KEY", "kyy")
+MINIO_SECRET_KEY = os.getenv("MINIO_SECRET_KEY", "hLAk3aQfH8HTs7ELTcyR")
+MINIO_BUCKET_NAME = os.getenv("MINIO_BUCKET_NAME", "ocr-gateway")
+MINIO_RESULTS_BUCKET_NAME = os.getenv("MINIO_RESULTS_BUCKET_NAME", "ocr-gateway-results")
+
+# -----------------------------------------------------------------------------
+# 애플리케이션 동작 설정 (Application Behavior)
+# -----------------------------------------------------------------------------
+ALLOWED_EXTENSIONS = {".pdf", ".docx", ".jpg", ".jpeg", ".png"}
+
+# -----------------------------------------------------------------------------
+# 디렉토리 생성 (Directory Creation)
+# -----------------------------------------------------------------------------
+os.makedirs(SUMMARY_HTML_DIR, exist_ok=True)
--- a/src/interface/init.py
+++ b/src/interface/init.py
--- a/src/interface/streamlit_ui.py
+++ b/src/interface/streamlit_ui.py
@@ -0,0 +1,31 @@
+import streamlit as st
+from urllib.parse import quote
+import requests
+import os
+
+API_URL = "http://api:8888/upload"  # FastAPI 서비스 이름 기준
+DOWNLOAD_URL = "http://172.16.10.176:8888/download/"
+
+st.set_page_config(page_title="HANMAC PGN Documents", layout="centered")
+
+st.image("/app/app/static/logo.png", use_container_width=True)
+st.markdown("<h2 style='text-align: center;'>🔹PM Oversea DEMO🔹</h2>", unsafe_allow_html=True)
+
+uploaded_files = st.file_uploader("PDF 파일 업로드", type=["pdf"], accept_multiple_files=True)
+
+if st.button("업로드 및 처리") and uploaded_files:
+    files = [("files", (f.name, f.getvalue(), "application/pdf")) for f in uploaded_files]
+    with st.spinner("서버에 업로드 및 처리 중..."):
+        response = requests.post(API_URL, files=files)
+
+    if response.status_code == 200:
+        st.success("처리 완료! 결과를 아래에서 확인하세요.")
+        results = response.json()["results"]
+        for r in results:
+            filename = r["filename"]
+            json_path = r["saved_path"]
+            encoded_filename = quote(os.path.basename(json_path))
+            download_link = f"{DOWNLOAD_URL}{encoded_filename}"
+            st.markdown(f"✅ **{filename}** 처리 완료\n[JSON 다운로드]({download_link})")
+    else:
+        st.error("❌ 처리 중 오류가 발생했습니다.")
--- a/src/routers/init.py
+++ b/src/routers/init.py
@@ -0,0 +1,25 @@
+from .api_key_router import router as api_key_router
+from .download_router import router as download_router
+from .dummy_router import router as dummy_router
+from .extract_router import router as extract_router
+from .general_router import router as general_router
+from .guide_router import router as guide_router
+from .llm_summation import router as llm_summation
+from .model_router import router as model_router
+from .ocr_router import router as ocr_router
+from .stt_router import router as stt_router
+from .yolo_router import router as yolo_router
+
+__all__ = [
+    "api_key_router",
+    "download_router",
+    "dummy_router",
+    "extract_router",
+    "general_router",
+    "guide_router",
+    "model_router",
+    "ocr_router",
+    "stt_router",
+    "llm_summation",
+    "yolo_router",
+]
--- a/src/routers/api_key_router.py
+++ b/src/routers/api_key_router.py
@@ -0,0 +1,43 @@
+from fastapi import APIRouter, Body, HTTPException
+
+from services import api_key_service
+
+router = APIRouter(prefix="/manage", tags=["API Key Management"])
+
+
+@router.post("/keys", summary="Create a new API Key")
+def create_key(
+    client_name: str = Body(
+        ...,
+        embed=True,
+        description="Name of the client or service that will use this key.",
+    ),
+):
+    """
+    새로운 API 키를 생성하고 시스템에 등록합니다.
+    """
+    if not client_name:
+        raise HTTPException(status_code=400, detail="Client name is required.")
+
+    new_key_info = api_key_service.create_api_key(client_name)
+    return {"message": "API Key created successfully", "key_info": new_key_info}
+
+
+@router.get("/keys", summary="List all API Keys")
+def list_keys():
+    """
+    현재 시스템에 등록된 모든 API 키의 정보를 조회합니다.
+    """
+    keys = api_key_service.list_api_keys()
+    return {"keys": keys}
+
+
+@router.delete("/keys/{api_key}", summary="Revoke an API Key")
+def revoke_key(api_key: str):
+    """
+    지정된 API 키를 시스템에서 영구적으로 삭제(폐기)합니다.
+    """
+    success = api_key_service.revoke_api_key(api_key)
+    if not success:
+        raise HTTPException(status_code=404, detail="API Key not found.")
+    return {"message": f"API Key '{api_key}' has been revoked."}
--- a/src/routers/download_router.py
+++ b/src/routers/download_router.py
@@ -0,0 +1,23 @@
+from fastapi import APIRouter
+
+from services.download_service import DownloadService
+
+router = APIRouter(tags=["Model Management"])
+
+
+# ✅ GET:기본 프롬프트 다운로드
+@router.get("/default_prompt", summary="기본 프롬프트 파일 다운로드")
+async def download_default_prompt():
+    return DownloadService.download_default_prompt()
+
+
+# ✅ GET:구조화 프롬프트 파일 다운로드
+@router.get("/structured_prompt", summary="구조화 프롬프트 파일 다운로드")
+async def download_structured_prompt():
+    return DownloadService.download_structured_prompt()
+
+
+# ✅ GET:구조화 필드 정의 파일 다운로드
+@router.get("/structured_schema", summary="구조화 포맷 정의 파일 다운로드")
+async def download_structured_schema():
+    return DownloadService.download_structured_schema()
--- a/src/routers/dummy_router.py
+++ b/src/routers/dummy_router.py
@@ -0,0 +1,69 @@
+from typing import Optional
+
+from fastapi import Depends, Form, Request
+
+from services.inference_service import InferenceHandler
+from utils.checking_keys import get_api_key
+from utils.custom_router import CustomAPIRouter
+
+router = CustomAPIRouter(prefix="/dummy", tags=["Dummy"])
+
+
+# ✅ POST:DUMMY
+@router.post(
+    "/extract/outer",
+    summary="더미 응답 생성",
+    description="""### **요약**
+실제 모델 추론이나 파일 업로드 없이, 지정된 모델의 응답 형식을 테스트하기 위한 더미(dummy) 결과를 생성합니다.
+
+### **작동 방식**
+-   요청 시, 시스템에 미리 저장된 더미 응답(`dummy_response.json`)을 즉시 반환합니다.
+-   실제 OCR, LLM 추론 등 어떠한 백그라운드 작업도 수행하지 않습니다.
+-   네트워크나 모델 성능에 관계없이 API 응답 구조를 빠르게 확인하는 용도로 사용됩니다.
+
+### **입력 (multipart/form-data)**
+-   `model` (선택): 응답 형식의 기준이 될 모델 이름. (기본값: `dummy`)
+    -   이 값은 실제 추론에 사용되지 않으며, 형식 테스트용으로만 기능합니다.
+
+### **출력 (application/json)**
+-   **즉시 반환**:
+    ```json
+    {
+        "filename": "dummy_input.pdf",
+        "dummy_model": {
+            "ocr_model": "dummy",
+            "llm_model": "dummy",
+            "api_url": "dummy"
+        },
+        "time": {
+            "duration_sec": "0.00",
+            "started_at": "...",
+            "ended_at": "..."
+        },
+        "fields": {},
+        "parsed": "dummy",
+        "generated": "dummy",
+        "processed": {
+            "dummy response"
+        }
+    }
+    ```
+""",
+)
+async def extract_outer(
+    request_info: Request,
+    model: Optional[str] = Form(default="dummy", description="실제 추론 없이 포맷 테스트용으로 사용됩니다."),
+    api_key: str = Depends(get_api_key),
+):
+    return await InferenceHandler.handle_extract_background(
+        request_id=None,
+        result_id=None,
+        input_file=None,
+        schema_file=None,
+        prompt_file=None,
+        mode="dummy",
+        model_list=[model],
+        ocr_model="N/A",
+        request_info=request_info,
+        api_key=api_key,
+    )
--- a/src/routers/extract_router.py
+++ b/src/routers/extract_router.py
@@ -0,0 +1,811 @@
+import asyncio
+import io
+import json
+from typing import Optional
+from urllib.parse import urlparse
+
+import requests
+from fastapi import Depends, File, Form, Request, UploadFile
+from fastapi.responses import JSONResponse
+from redis import Redis
+
+from config.setting import (
+    D6C_PROMPT_PATH,
+    I18N_PROMPT_PATH,
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+)
+from services.inference_service import InferenceHandler
+from utils.checking_files import (
+    clone_upload_file,
+    validate_all_files,
+)
+from utils.checking_keys import create_key, get_api_key
+from utils.custom_router import CustomAPIRouter
+from utils.minio_utils import fetch_result_from_minio
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True)
+
+router = CustomAPIRouter(prefix="/extract", tags=["Extraction"])
+
+
+# ✅ 공통 비동기 추론 엔드포인트 생성기
+def register_extract_route(path: str, mode: str, default_llm: str, default_ocr: str, summary: str, description: str):
+    @router.post(path, summary=summary, description=description)
+    async def extract_endpoint(
+        request_info: Request,
+        input_file: UploadFile = File(...),
+        prompt_file: Optional[UploadFile] = File(
+            default=None,
+            description="⚠️ prompt_file 업로드하지 않을 경우, **'Send empty value'** 체크박스를 반드시 해제해주세요.",
+        ),
+        llm_model: Optional[str] = Form(default=default_llm),
+        ocr_model: Optional[str] = Form(default=default_ocr),
+        api_key: str = Depends(get_api_key),
+    ):
+        validate_all_files(input_file)
+
+        # ✅ 고유한 요청 ID 생성
+        request_id = create_key()
+        result_id = create_key()
+
+        cloned_input = clone_upload_file(input_file) if input_file else None
+        cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+        # ✅ 백그라운드에서 작업 실행
+        asyncio.create_task(
+            InferenceHandler.handle_extract_background(
+                request_id=request_id,
+                result_id=result_id,
+                input_file=cloned_input,
+                schema_file=None,
+                prompt_file=cloned_prompt,
+                mode=mode,
+                model_list=[llm_model],
+                ocr_model=ocr_model,
+                request_info=request_info,
+                api_key=api_key,
+            )
+        )
+
+        # ✅ request_id → result_id 매핑 저장
+        redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+        return JSONResponse(
+            content={
+                "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+                "request_id": request_id,
+                "status_check_url": f"/extract/progress/{request_id}",
+            }
+        )
+
+    # FastAPI 문서화용 정보 부여
+    extract_endpoint.__name__ = f"extract_{mode}"
+    extract_endpoint.__doc__ = description
+    return extract_endpoint
+
+
+# ✅ 내부 모델용 등록
+extract_inner = register_extract_route(
+    path="/inner",
+    mode="inner",
+    default_llm="gemma3:27b",
+    default_ocr="upstage",
+    summary="내부 LLM 기반 문서 정보 추출 (비동기)",
+    description="""### **요약**
+내부망에 배포된 LLM(Ollama 기반)을 사용하여 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다. 이 엔드포인트는 사전 정의된 기본 프롬프트를 사용하며, 비동기적으로 처리됩니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`을 받아 고유 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에 대해 **OCR API**를 호출하여 텍스트를 추출합니다.
+    -   시스템에 내장된 기본 프롬프트와 추출된 텍스트를 조합합니다. (`prompt_file`을 업로드하여 기본 프롬프트를 대체할 수 있습니다.)
+    -   내부 LLM(Ollama)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: `GET /extract/progress/{request_id}`로 작업 상태와 최종 결과를 조회합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출의 대상이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+-   `prompt_file` (선택): 기본 프롬프트 대신 사용할 사용자 정의 `.txt` 프롬프트 파일.
+-   `model` (선택): 사용할 내부 LLM 모델 이름. (기본값: `gemma3:27b`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/extract/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /extract/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 외부 모델용 등록
+extract_outer = register_extract_route(
+    path="/outer",
+    mode="outer",
+    default_llm="gemini-2.5-flash",
+    default_ocr="upstage",
+    summary="외부 LLM 기반 문서 정보 추출 (비동기)",
+    description="""### **요약**
+외부 상용 LLM(예: GPT, Gemini)을 사용하여 문서에서 정보를 추출하고 응답을 생성합니다. 내부 LLM 엔드포인트와 작동 방식은 동일하나, 외부 API를 호출합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`을 받아 `request_id`를 생성 후 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에서 **OCR API**를 통해 텍스트를 추출합니다.
+    -   내장된 기본 프롬프트(또는 사용자 정의 `prompt_file`)와 텍스트를 조합합니다.
+    -   외부 LLM API(OpenAI, Google 등)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: `GET /extract/progress/{request_id}`로 작업 상태와 최종 결과를 조회합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (선택): 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/extract/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /extract/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 멀티모달 GPT 테스트용 등록
+extract_outer_gpt = register_extract_route(
+    path="/outer/gpt",
+    mode="multimodal",
+    default_llm="gpt-4.1",
+    default_ocr="Not Used OCR",
+    summary="멀티모달 GPT 테스트용",
+    description="""### **요약**
+GPT-4o와 같은 멀티모달 모델을 사용하여, 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 멀티모달 출력을 생성하도록 특화된 프롬프트(`multimodal_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `prompt_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+
+# ✅ 멀티모달 Gemini 테스트용 등록
+extract_outer_gemini = register_extract_route(
+    path="/outer/gemini",
+    mode="multimodal",
+    default_llm="gemini-2.5-flash",
+    default_ocr="Not Used OCR",
+    summary="멀티모달 Gemini 테스트용",
+    description="""### **요약**
+Gemini와 같은 멀티모달 모델을 사용하여, 문서(PDF, 이미지 등)에서 정보를 추출하고 응답을 생성합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 멀티모달 출력을 생성하도록 특화된 프롬프트(`multimodal_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `prompt_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+
+
+@router.post(
+    "/inner/d6c",
+    summary="국내 문서 테스트용",
+)
+async def extract_d6c(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    llm_model: Optional[str] = Form(default="gemma3:27b"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(I18N_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=I18N_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+@router.post(
+    "/inner/i18n",
+    summary="해외 문서 테스트용",
+)
+async def extract_i18n(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    llm_model: Optional[str] = Form(default="gemma3:27b"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(D6C_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=D6C_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ structured 모드: 구조화 JSON 스키마 기반 추론
+@router.post(
+    "/inner/structured",
+    summary="구조화된 JSON 정보 추출 (비동기)",
+    description="""### **요약**
+사용자가 제공한 `schema_file`에 정의된 JSON 스키마에 따라, 문서에서 정보를 추출하여 구조화된 JSON으로 반환합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 구조화된 출력을 생성하도록 특화된 프롬프트(`structured_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `schema_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `schema_file` (**필수**): 원하는 출력 JSON 구조를 정의하는 `.json` 파일.
+-   `prompt_file` (**필수**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+async def extract_structured_inner(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    schema_file: UploadFile = File(...),
+    prompt_file: UploadFile = File(...),
+    llm_model: Optional[str] = Form(default="gemma3:27b"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    # ✅ 고유한 요청 ID 생성
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+    cloned_schema = clone_upload_file(schema_file) if schema_file else None
+    cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+    # ✅ 백그라운드에서 작업 실행
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=cloned_schema,
+            prompt_file=cloned_prompt,
+            mode="structured",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    # ✅ request_id → result_id 매핑 저장
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ structured 모드: 구조화 JSON 스키마 기반 추론
+@router.post(
+    "/outer/structured",
+    include_in_schema=False,  # Swagger UI / ReDoc 에서 숨김
+    summary="구조화된 JSON 정보 추출 (비동기)",
+    description="""### **요약**
+사용자가 제공한 `schema_file`에 정의된 JSON 스키마에 따라, 문서에서 정보를 추출하여 구조화된 JSON으로 반환합니다.
+
+### **작동 방식**
+-   다른 추출 엔드포인트와 동일한 비동기 파이프라인을 따릅니다.
+-   추론 단계에서 시스템은 구조화된 출력을 생성하도록 특화된 프롬프트(`structured_prompt`)를 사용합니다.
+-   LLM은 `input_file`의 내용과 `schema_file`의 구조를 바탕으로 JSON 객체를 생성합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 정보 추출 대상 문서 파일.
+-   `schema_file` (**선택**): 원하는 출력 JSON 구조를 정의하는 `.json` 파일.
+-   `prompt_file` (**선택**): 구조화용 기본 프롬프트 대신 사용할 `.txt` 파일.
+-   `model` (선택): 사용할 LLM 모델 이름.
+
+### **출력 (application/json)**
+-   **초기 응답**: `request_id` 포함.
+-   **최종 결과**: `GET /extract/progress/{request_id}` 조회 시, 지정된 스키마를 따르는 JSON 객체가 반환됩니다.
+""",
+)
+async def extract_structured_outer(
+    request_info: Request,
+    input_file: UploadFile = File(...),
+    schema_file: UploadFile = File(...),
+    prompt_file: UploadFile = File(...),
+    llm_model: Optional[str] = Form(default="gemini-2.5-flash"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    validate_all_files(input_file)
+
+    # ✅ 고유한 요청 ID 생성
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+    cloned_schema = clone_upload_file(schema_file) if schema_file else None
+    cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+    # ✅ 백그라운드에서 작업 실행
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=cloned_schema,
+            prompt_file=cloned_prompt,
+            mode="structured",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    # ✅ request_id → result_id 매핑 저장
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+# ✅ 상태 로그 조회 API
+@router.get(
+    "/progress/{request_id}",
+    summary="정보 추출 작업 상태 및 결과 조회",
+    description="""### **요약**
+`POST /extract/*` 계열 엔드포인트 요청 시 반환된 `request_id`를 사용하여, 해당 정보 추출 작업의 진행 상태와 최종 결과를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 기반으로 Redis에 저장된 작업 로그와 결과 데이터를 조회합니다.
+-   작업이 진행 중일 때는 현재까지의 로그를, 완료되었을 때는 로그와 함께 최종 결과(`final_result`)를 반환합니다.
+
+### **입력**
+-   `request_id`: 조회할 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }      
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "processed": "LLM의 최종 응답 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "message": "{request_id}에 대한 상태 로그가 없습니다."
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    redis_key = f"pipeline_status:{request_id}"
+
+    # 1. 상태 로그 조회
+    logs = redis_client.lrange(redis_key, 0, -1)
+    if not logs:
+        return JSONResponse(
+            status_code=404,
+            content={"message": f"{request_id}에 대한 상태 로그가 없습니다."},
+        )
+    parsed_logs = [json.loads(log) for log in logs] if logs else []
+
+    # 2. request_id → result_id 매핑 조회
+    result_id = redis_client.hget("pipeline_result_mapping", request_id)
+
+    final_result = None
+    if result_id:
+        # 3. Redis에서 최종 결과 조회
+        result_key = f"pipeline_result:{result_id}"
+        result_str = redis_client.get(result_key)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+                return JSONResponse(
+                    content={
+                        "request_id": request_id,
+                        "progress_logs": parsed_logs,
+                        "final_result": final_result,
+                    }
+                )
+            except json.JSONDecodeError:
+                final_result = {"massage": "[REDIS] 결과 존재하지만, 디코딩에 실패했습니다."}
+        else:
+            print(f"[REDIS] request_id {request_id} 가 Redis에 없습니다.")
+
+    # 4. Redis에 결과가 없으면 MinIO에서 조회
+    try:
+        print(f"[MINIO] MinIO에서 결과를 가져오는 중: {request_id}")
+        result_str = fetch_result_from_minio(request_id)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+            except json.JSONDecodeError:
+                final_result = {"error": "Result found in MinIO but failed to decode JSON."}
+
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": final_result,
+                }
+            )
+        else:
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": None,
+                }
+            )
+    except Exception as e:
+        print(f"[MINIO] MinIO 결과 조회 중 실패했습니다: {e}")
+        return JSONResponse(
+            content={
+                "request_id": request_id,
+                "progress_logs": parsed_logs,
+                "final_result": {"error": f"Failed to fetch result from MinIO: {e}"},
+            }
+        )
+
+
+## 조찬영
+@router.post(
+    "/inner2/d6c",
+    summary="국내 문서 테스트용",
+)
+async def extract2_d6c(
+    request_info: Request,
+    minio_url: str = Form(...),
+    llm_model: Optional[str] = Form(default="qwen3:30b"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    try:
+        response = requests.get(minio_url)
+        response.raise_for_status()  # 4xx/5xx 응답에 대해 HTTPError 발생
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 403:
+            return JSONResponse(
+                status_code=400,
+                content={"message": "제공된 MinIO URL이 만료되었거나 접근 권한이 없습니다."},
+            )
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": f"URL에서 파일을 가져오는 데 실패했습니다: {e.response.status_code} {e.response.reason}"
+                },
+            )
+    except requests.exceptions.RequestException as e:
+        return JSONResponse(
+            status_code=400,
+            content={"message": f"URL에 연결하는 중 오류가 발생했습니다: {e}"},
+        )
+
+    # URL에서 쿼리 파라미터를 제외한 파일 이름 추출
+    parsed_url = urlparse(minio_url)
+    file_name = parsed_url.path.split("/")[-1]
+
+    # 다운로드한 파일 데이터로 UploadFile 객체 생성
+    input_file = UploadFile(filename=file_name, file=io.BytesIO(response.content))
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(I18N_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=I18N_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+@router.post(
+    "/inner2/i18n",
+    summary="해외 문서 테스트용",
+)
+async def extract2_i18n(
+    request_info: Request,
+    minio_url: str = Form(...),
+    llm_model: Optional[str] = Form(default="qwen3:30b"),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    api_key: str = Depends(get_api_key),
+):
+    try:
+        response = requests.get(minio_url)
+        response.raise_for_status()  # 4xx/5xx 응답에 대해 HTTPError 발생
+    except requests.exceptions.HTTPError as e:
+        if e.response.status_code == 403:
+            return JSONResponse(
+                status_code=400,
+                content={"message": "제공된 MinIO URL이 만료되었거나 접근 권한이 없습니다."},
+            )
+        else:
+            return JSONResponse(
+                status_code=400,
+                content={
+                    "message": f"URL에서 파일을 가져오는 데 실패했습니다: {e.response.status_code} {e.response.reason}"
+                },
+            )
+    except requests.exceptions.RequestException as e:
+        return JSONResponse(
+            status_code=400,
+            content={"message": f"URL에 연결하는 중 오류가 발생했습니다: {e}"},
+        )
+
+    # URL에서 쿼리 파라미터를 제외한 파일 이름 추출
+    parsed_url = urlparse(minio_url)
+    file_name = parsed_url.path.split("/")[-1]
+
+    # 다운로드한 파일 데이터로 UploadFile 객체 생성
+    input_file = UploadFile(filename=file_name, file=io.BytesIO(response.content))
+    validate_all_files(input_file)
+
+    request_id = create_key()
+    result_id = create_key()
+
+    cloned_input = clone_upload_file(input_file) if input_file else None
+
+    # 설정에 정의된 기본 I18N 프롬프트 파일을 항상 사용
+    with open(D6C_PROMPT_PATH, "rb") as f:
+        content = f.read()
+
+    # 메모리 내 파일 객체 생성
+    spooled_file = io.BytesIO(content)
+
+    # UploadFile과 유사한 객체를 생성하여 백그라운드 핸들러로 전달
+    dummy_prompt_file = UploadFile(filename=D6C_PROMPT_PATH.name, file=spooled_file)
+    cloned_prompt = clone_upload_file(dummy_prompt_file)
+
+    asyncio.create_task(
+        InferenceHandler.handle_extract_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=cloned_input,
+            schema_file=None,
+            prompt_file=cloned_prompt,
+            mode="inner",
+            model_list=[llm_model],
+            ocr_model=ocr_model,
+            request_info=request_info,
+            api_key=api_key,
+        )
+    )
+
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
+
+
+## 조찬영
+
+
+@router.post(
+    "/test",
+    summary="테스트용 엔드포인트 (비동기)",
+    description="""### **요약**
+개발 및 테스트 목적으로 사용되는 간단한 비동기 엔드포인트입니다. 문자열 입력을 받아 백그라운드에서 처리하고, `request_id`를 즉시 반환합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   입력받은 값들을 그대로 결과로 저장하는 간단한 작업을 5초 동안 수행합니다.
+3.  **상태 및 결과 확인**: `GET /extract/progress/{request_id}`로 작업 상태와 최종 결과를 조회합니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 파일 경로 또는 임의의 문자열.
+-   `prompt_file` (**필수**): 파일 경로 또는 임의의 문자열.
+-   `llm_model` (선택): 사용할 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "테스트 작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/extract/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /extract/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+async def test_endpoint(
+    request_info: Request,
+    input_file: str = Form(...),
+    prompt_file: str = Form(...),
+    llm_model: str = Form("gemini-2.5-flash"),
+    api_key: str = Depends(get_api_key),
+):
+    # ✅ 고유한 요청 ID 생성
+    request_id = create_key()
+    result_id = create_key()
+
+    # ✅ 백그라운드에서 작업 실행
+    asyncio.create_task(
+        InferenceHandler.handle_test_background(
+            request_id=request_id,
+            result_id=result_id,
+            input_file=input_file,
+            prompt_file=prompt_file,
+            llm_model=llm_model,
+            request_info=request_info,
+            api_key=api_key,
+            mode="outer",
+        )
+    )
+
+    # ✅ request_id → result_id 매핑 저장
+    redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+    return JSONResponse(
+        content={
+            "message": "테스트 작업이 백그라운드에서 실행 중입니다.",
+            "request_id": request_id,
+            "status_check_url": f"/extract/progress/{request_id}",
+        }
+    )
--- a/src/routers/general_router.py
+++ b/src/routers/general_router.py
@@ -0,0 +1,305 @@
+import asyncio
+import json
+from typing import Optional
+
+from fastapi import Depends, File, Form, Request, UploadFile
+from fastapi.responses import JSONResponse
+from redis import Redis
+
+from config.setting import (
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+)
+from services.inference_service import InferenceHandler
+from utils.checking_files import (
+    clone_upload_file,
+    validate_all_files,
+)
+from utils.checking_keys import create_key, get_api_key
+from utils.custom_router import CustomAPIRouter
+from utils.minio_utils import fetch_result_from_minio
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True)
+
+
+router = CustomAPIRouter(prefix="/general", tags=["General"])
+
+
+# ✅ 공통 비동기 추론 엔드포인트 생성기
+def register_general_route(path: str, mode: str, default_llm: str, default_ocr: str, summary: str, description: str):
+    @router.post(path, summary=summary, description=description)
+    async def general_endpoint(
+        request_info: Request,
+        input_file: UploadFile = File(...),
+        prompt_file: UploadFile = File(...),
+        llm_model: Optional[str] = Form(default=default_llm),
+        ocr_model: Optional[str] = Form(default=default_ocr),
+        api_key: str = Depends(get_api_key),
+    ):
+        validate_all_files(input_file)
+
+        # ✅ 고유한 요청 ID 생성
+        request_id = create_key()
+        result_id = create_key()
+
+        cloned_input = clone_upload_file(input_file) if input_file else None
+        cloned_prompt = clone_upload_file(prompt_file) if prompt_file else None
+
+        # ✅ 백그라운드에서 작업 실행
+        asyncio.create_task(
+            InferenceHandler.handle_general_background(
+                request_id=request_id,
+                result_id=result_id,
+                input_file=cloned_input,
+                prompt_file=cloned_prompt,
+                llm_model=llm_model,
+                ocr_model=ocr_model,
+                mode=mode,
+                request_info=request_info,
+                api_key=api_key,
+            )
+        )
+
+        # ✅ request_id → result_id 매핑 저장
+        redis_client.hset("pipeline_result_mapping", request_id, result_id)
+
+        return JSONResponse(
+            content={
+                "message": "문서 추출 및 생성형 응답 작업이 백그라운드에서 실행 중입니다.",
+                "request_id": request_id,
+                "status_check_url": f"/general/progress/{request_id}",
+            }
+        )
+
+    # FastAPI 문서화용 정보 부여
+    general_endpoint.__name__ = f"general_{mode}"
+    general_endpoint.__doc__ = description
+    return general_endpoint
+
+
+# ✅ 내부 모델용 등록
+general_inner = register_general_route(
+    path="/inner",
+    mode="inner",
+    default_llm="gemma3:27b",
+    default_ocr="upstage",
+    summary="내부 LLM 기반 범용 추론 요청 (비동기)",
+    description="""### **요약**
+내부망에 배포된 LLM(Ollama 기반)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 이 엔드포인트는 파일(PDF, 이미지 등)에서 텍스트를 추출하고, 사용자가 제공한 프롬프트를 적용하여 결과를 생성합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`이 문서나 이미지일 경우, **OCR API**를 호출하여 텍스트를 추출합니다.
+    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
+    -   내부 LLM(Ollama)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+    -   내부적으로 OCR을 통해 텍스트가 자동 추출됩니다.
+-   `prompt_file` (**필수**): LLM에 전달할 명령어(프롬프트)가 포함된 `.txt` 파일.
+-   `model` (선택): 사용할 내부 LLM 모델 이름. (기본값: `gemma3:27b`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/general/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 외부 모델용 등록
+general_outer = register_general_route(
+    path="/outer",
+    mode="outer",
+    default_llm="gemini-2.5-flash",
+    default_ocr="upstage",
+    summary="외부 LLM 기반 범용 추론 요청 (비동기)",
+    description="""### **요약**
+외부 상용 LLM(예: GPT, Gemini, Claude)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다. 기능과 작동 방식은 내부 LLM용 엔드포인트와 동일하나, 외부 API를 호출하는 점이 다릅니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   `input_file`에서 **OCR API**를 통해 텍스트를 추출합니다.
+    -   추출된 텍스트와 `prompt_file`의 내용을 조합하여 최종 프롬프트를 구성합니다.
+    -   외부 LLM API(OpenAI, Google, Anthropic 등)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+-   `prompt_file` (**필수**): LLM에 전달할 프롬프트가 포함된 `.txt` 파일.
+-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/general/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+# ✅ 멀티모달 모델용 등록
+general_e2e = register_general_route(
+    path="/outer/e2e",
+    mode="multimodal",
+    default_llm="gemini-2.5-flash",
+    default_ocr="Not Used OCR",
+    summary="멀티모달 LLM 기반 범용 추론 요청 (비동기)",
+    description="""### **요약**
+멀티모달 상용 LLM(예: GPT, Gemini)을 사용하여 문서 기반의 범용 추론을 비동기적으로 요청합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `input_file`, `prompt_file` 등을 받아 고유한 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   추론 단계에서 시스템은 멀티모달 출력을 생성하도록 특화된 프롬프트를 사용합니다.
+    -   외부 멀티모달 LLM API(OpenAI, Google 등)에 추론을 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /general/progress/{request_id}` 엔드포인트에서 작업 진행 상태와 최종 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `input_file` (**필수**): 추론의 기반이 될 문서 파일.
+    -   지원 형식: `.pdf`, `.docx`, `.jpg`, `.png`, `.jpeg` 등.
+-   `prompt_file` (**필수**): LLM에 전달할 프롬프트가 포함된 `.txt` 파일.
+-   `model` (선택): 사용할 외부 LLM 모델 이름. (기본값: `gemini-2.5-flash`)
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    {
+      "message": "작업이 백그라운드에서 실행 중입니다.",
+      "request_id": "고유한 요청 ID",
+      "status_check_url": "/general/progress/고유한 요청 ID"
+    }
+    ```
+-   **최종 결과**: `GET /general/progress/{request_id}`를 통해 확인 가능.
+""",
+)
+
+
+# ✅ 상태 로그 조회 API
+@router.get(
+    "/progress/{request_id}",
+    summary="범용 추론 작업 상태 및 결과 조회",
+    description="""### **요약**
+`POST /general/inner` 또는 `POST /general/outer` 요청 시 반환된 `request_id`를 사용하여, 해당 작업의 진행 상태와 최종 결과를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 기반으로 Redis에 저장된 작업 로그와 결과 데이터를 조회합니다.
+-   작업이 진행 중일 때는 현재까지의 로그를, 완료되었을 때는 로그와 함께 최종 결과(`final_result`)를 반환합니다.
+
+### **입력**
+-   `request_id`: 조회할 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "processed": "LLM의 최종 응답 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "message": "{request_id}에 대한 상태 로그가 없습니다."
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    redis_key = f"pipeline_status:{request_id}"
+
+    # 1. 상태 로그 조회
+    logs = redis_client.lrange(redis_key, 0, -1)
+    if not logs:
+        return JSONResponse(
+            status_code=404,
+            content={"message": f"{request_id}에 대한 상태 로그가 없습니다."},
+        )
+    parsed_logs = [json.loads(log) for log in logs] if logs else []
+
+    # 2. request_id → result_id 매핑 조회
+    result_id = redis_client.hget("pipeline_result_mapping", request_id)
+
+    final_result = None
+    if result_id:
+        # 3. Redis에서 최종 결과 조회
+        result_key = f"pipeline_result:{result_id}"
+        result_str = redis_client.get(result_key)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+                return JSONResponse(
+                    content={
+                        "request_id": request_id,
+                        "progress_logs": parsed_logs,
+                        "final_result": final_result,
+                    }
+                )
+            except json.JSONDecodeError:
+                final_result = {"massage": "[REDIS] 결과 존재하지만, 디코딩에 실패했습니다."}
+        else:
+            print(f"[REDIS] request_id {request_id} 가 Redis에 없습니다.")
+
+    # 4. Redis에 결과가 없으면 MinIO에서 조회
+    try:
+        print(f"[MINIO] MinIO에서 결과를 가져오는 중: {request_id}")
+        result_str = fetch_result_from_minio(request_id)
+        if result_str:
+            try:
+                final_result = json.loads(result_str)
+            except json.JSONDecodeError:
+                # MinIO에 결과는 있지만 JSON 파싱 실패 시
+                final_result = {"error": "Result found in MinIO but failed to decode JSON."}
+
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": final_result,
+                }
+            )
+        else:
+            # MinIO에서 결과가 없으면 작업 진행 상태 실시간 확인
+            return JSONResponse(
+                content={
+                    "request_id": request_id,
+                    "progress_logs": parsed_logs,
+                    "final_result": None,  # 명시적으로 None으로 설정
+                }
+            )
+    except Exception as e:
+        print(f"[MINIO] MinIO 결과 조회 중 실패했습니다: {e}")
+        # MinIO 조회 중 에러 발생 시에도 진행 중으로 간주하거나 에러 상태 반환
+        return JSONResponse(
+            content={
+                "request_id": request_id,
+                "progress_logs": parsed_logs,
+                "final_result": {"error": f"Failed to fetch result from MinIO: {e}"},
+            }
+        )
--- a/src/routers/guide_router.py
+++ b/src/routers/guide_router.py
@@ -0,0 +1,47 @@
+from fastapi import APIRouter
+from fastapi.responses import FileResponse, HTMLResponse
+
+from config.setting import (
+    EXTRACT_DEFAULT_PATH,
+    GENERAL_GUIDE_PATH,
+    SCHEMA_FILE_PATH,
+)
+
+router = APIRouter(tags=["Guide Book"])
+
+
+# ✅ /schema_json 가이드 HTML
+@router.get(
+    "/schema_file_guide",
+    summary="schema 파일 작성 가이드북 HTML 보기",
+    description=(
+        "📄 본 가이드북은 <strong>/general</strong> 및 <strong>/extract/structured</strong> "
+        "엔드포인트에 첨부되는 <strong>schema_file</strong> 작성법을 설명합니다.<br><br>"
+        "가이드북은 <a href='/schema_file_guide' target='_blank'>여기</a>에서 확인하세요."
+    ),
+    response_class=HTMLResponse,
+)
+async def schema_guide():
+    return FileResponse(SCHEMA_FILE_PATH, media_type="text/html")
+
+
+# ✅ /general 가이드 HTML
+@router.get(
+    "/general_guide",
+    summary="/general 가이드북 HTML 보기",
+    description="가이드북을 <a href='/general_guide' target='_blank'>여기</a>에서 확인하세요.",
+    response_class=HTMLResponse,
+)
+async def general_guide():
+    return FileResponse(GENERAL_GUIDE_PATH, media_type="text/html")
+
+
+# ✅ /extract 가이드 HTML
+@router.get(
+    "/extract_guide",
+    summary="/extract 가이드북 HTML 보기",
+    description="가이드북을 <a href='/extract_guide' target='_blank'>여기</a>에서 확인하세요.",
+    response_class=HTMLResponse,
+)
+async def extract_guide():
+    return FileResponse(EXTRACT_DEFAULT_PATH, media_type="text/html")
--- a/src/routers/llm_summation.py
+++ b/src/routers/llm_summation.py
@@ -0,0 +1,84 @@
+import logging
+
+from fastapi import BackgroundTasks, Depends
+from pydantic import BaseModel
+
+from services.report import (
+    ask_ollama_qwen,
+    dialog_ask_gemini,
+    run_all_models,
+    tasks_store,
+    total_summation,
+)
+from utils.checking_keys import create_key
+from utils.custom_router import CustomAPIRouter
+from utils.logging_utils import EndpointLogger
+
+# ------------------------------------------
+
+# 로깅 설정
+logger = logging.getLogger(__name__)
+
+router = CustomAPIRouter(tags=["summary"])
+
+
+class SummaryRequest(BaseModel):
+    text: str
+
+
+@router.post("/summary")  # STT 요약 모델
+async def summarize(request: SummaryRequest, endpoint_logger: EndpointLogger = Depends(EndpointLogger)):
+    endpoint_logger.log(
+        llm_model="gpt-4.1-mini, qwen3:custom, gemini-2.5-flash, claude-3-7-sonnet-latest",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    results = await total_summation(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/ollama_summary")  # ollama 모델 전용
+async def ollama_summary(request: SummaryRequest, endpoint_logger: EndpointLogger = Depends(EndpointLogger)):
+    endpoint_logger.log(
+        llm_model="qwen3:custom",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    results = await ask_ollama_qwen(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/gemini_summary")
+async def gemini_summary(request: SummaryRequest):
+    results = await dialog_ask_gemini(request.text)
+    return {"summary_results": results}
+
+
+@router.post("/task_summary")  # 모델 별 전체 요약
+async def task_summary(
+    request: SummaryRequest,
+    background_tasks: BackgroundTasks,
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    endpoint_logger.log(
+        llm_model="gpt-4.1-mini, qwen3:custom, gemini-2.5-flash, claude-3-7-sonnet-latest",
+        input_filename="None",
+        prompt_filename="None",
+        context_length=len(request.text),
+    )
+
+    task_id = create_key()
+    background_tasks.add_task(run_all_models, request.text, task_id)
+    return {"task_id": task_id}
+
+
+@router.get("/task_summary/{task_id}")  # 모델 별 요약 조회
+async def get_status(task_id: str):
+    task = tasks_store.get(task_id)
+    if not task:
+        return {"error": "Invalid task_id"}
+    return task
--- a/src/routers/model_router.py
+++ b/src/routers/model_router.py
@@ -0,0 +1,17 @@
+from services.model_service import ModelInfoService
+from utils.custom_router import CustomAPIRouter
+
+router = CustomAPIRouter(tags=["Model Management"])
+
+
+# ✅ GET:사용 가능한 모델 조회 API
+@router.get(
+    "/info",
+    summary="'/extract', '/general' 에서 사용 가능한 모델 목록 확인",
+    description="""
+    ✅ 'inner(내부용)' 와 'outer(외부용)' 모델의 사용 가능한 목록을 확인합니다.<br>
+    ✅ 'Try it out' → 'Execute' 순서로 클릭합니다.<br>
+    """,
+)
+async def get_model_info():
+    return await ModelInfoService.get_model_info(mode="info")
--- a/src/routers/ocr_router.py
+++ b/src/routers/ocr_router.py
@@ -0,0 +1,150 @@
+import logging
+from typing import Optional
+
+import httpx
+from fastapi import Depends, File, Form, HTTPException, UploadFile
+from fastapi.responses import JSONResponse
+
+from config.setting import (
+    MINIO_BUCKET_NAME,
+    OCR_API_URL,
+)
+from utils.checking_files import validate_all_files
+from utils.checking_keys import create_key
+from utils.custom_router import CustomAPIRouter
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2  # ✅ MinIO 유틸 함수 import
+
+router = CustomAPIRouter(prefix="/ocr", tags=["OCR"])
+logger = logging.getLogger(__name__)
+
+
+@router.post(
+    "",
+    summary="문서 OCR 요청 (비동기)",
+    description="""### **요약**
+문서 파일(PDF, 이미지 등)을 받아 텍스트를 추출하는 OCR(광학 문자 인식) 작업을 비동기적으로 요청합니다.
+
+### **작동 방식**
+1.  **요청 접수**: `file`을 받아 고유 `request_id`를 생성하고 즉시 반환합니다.
+2.  **백그라운드 처리**:
+    -   업로드된 파일을 내부 저장소(MinIO)에 저장합니다.
+    -   별도의 OCR 서버에 텍스트 추출 작업을 요청합니다.
+3.  **상태 및 결과 확인**: 반환된 `request_id`를 사용하여 `GET /ocr/progress/{request_id}`로 작업 상태를, `GET /ocr/result/{request_id}`로 최종 텍스트 결과를 조회할 수 있습니다.
+
+### **입력 (multipart/form-data)**
+-   `file` (**필수**): 텍스트를 추출할 문서 파일.
+    -   지원 형식: `.pdf`, `.jpg`, `.png`, `.jpeg` 등 OCR 서버가 지원하는 형식.
+
+### **출력 (application/json)**
+-   **초기 응답**:
+    ```json
+    [
+      {
+        "request_id": "고유한 요청 ID",
+        "status": "작업 접수",
+        "message": "아래 URL을 통해 작업 상태 및 결과를 확인하세요."
+      }
+    ]
+    ```
+-   **최종 결과**: `GET /ocr/result/{request_id}`를 통해 확인 가능.
+""",
+)
+async def ocr_only(
+    file: UploadFile = File(...),
+    ocr_model: Optional[str] = Form(default="upstage"),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    validate_all_files(file)
+    results = []
+    endpoint_logger.log(
+        ocr_model=ocr_model,
+        input_filename=file.filename,
+        context_length=0,  # OCR은 context_length가 필요하지 않음
+    )
+
+    async with httpx.AsyncClient() as client:
+        # ✅ 1. 고유 ID 생성
+        request_id = create_key()
+        bucket_name = MINIO_BUCKET_NAME
+        object_name = f"{request_id}/{file.filename}"
+
+        # ✅ 2. MinIO에 파일 업로드 후 presigned URL 생성
+        # presigned_url = upload_file_to_minio(file, request_id)
+        presigned_url = upload_file_to_minio_v2(file=file, bucket_name=bucket_name, object_name=object_name)
+        logger.info(f"[MinIO] ✅ presigned URL 생성 완료: {presigned_url}")
+
+        try:
+            print(f"OCR_API_URL: {OCR_API_URL}" )
+            # ✅ 3. OCR API에 presigned URL 전달
+            resp = await client.post(
+                OCR_API_URL,
+                json={"file_url": presigned_url, "filename": file.filename, "ocr_model": ocr_model},
+                timeout=None,
+            )
+            resp.raise_for_status()
+
+        except Exception:
+            logger.exception("[OCR] ❌ 예기치 못한 오류 발생")
+            raise HTTPException(status_code=500, detail="OCR 요청 처리 중 내부 오류 발생")
+
+        # ✅ 4. OCR 응답에서 request_id 추출
+        for item in resp.json().get("results", []):
+            ocr_request_id = item.get("request_id")
+
+            result_item = {
+                "request_id": ocr_request_id,
+                "status": "작업 접수",
+                "message": "아래 URL을 통해 작업 상태 및 결과를 확인하세요.",
+            }
+            results.append(result_item)
+
+    return JSONResponse(content=results)
+
+
+@router.get(
+    "/progress/{request_id}",
+    summary="OCR 작업 상태 조회",
+    description="""### **요약**
+`POST /ocr` 요청 시 반환된 `request_id`를 사용하여 OCR 작업의 현재 진행 상태를 조회합니다.
+
+### **작동 방식**
+-   `request_id`를 OCR 서버에 전달하여 해당 작업의 상태를 가져옵니다.
+-   상태는 보통 'PENDING', 'IN_PROGRESS', 'SUCCESS', 'FAILURE' 등으로 표시됩니다.
+
+### **입력**
+-   `request_id`: 조회할 OCR 작업의 고유 ID.
+
+### **출력 (application/json)**
+-   **성공 시**:
+    ```json
+    {
+      "request_id": "요청 시 사용된 ID",
+      "progress_logs": [
+        { "timestamp": "...", "status": "OCR 시작", "details": "..." },
+        { "timestamp": "...", "status": "입력 길이 검사 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 시작", "details": "..." },
+        { "timestamp": "...", "status": "LLM 추론 완료 및 후처리 시작", "details": "..." },
+        { "timestamp": "...", "status": "후처리 완료 및 결과 반환"", "details": "..." }
+      ],
+      "final_result": {
+        "filename": "입력 파일",
+        "parsed": "OCR 결과 내용"
+      }
+    }
+    ```
+-   **ID가 유효하지 않을 경우 (404 Not Found)**:
+    ```json
+    {
+      "detail": "Meeting ID {request_id} 작업 없음"
+    }
+    ```
+""",
+)
+async def get_pipeline_status(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(f"{OCR_API_URL}/progress/{request_id}")
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"OCR 상태 조회 실패: {str(e)}")
--- a/src/routers/stt_router.py
+++ b/src/routers/stt_router.py
@@ -0,0 +1,140 @@
+# llmgateway/routers/stt_proxy.py
+import logging
+
+import httpx
+from fastapi import Depends, File, Form, HTTPException, UploadFile
+from fastapi.responses import JSONResponse
+
+from utils.checking_keys import create_key
+from utils.custom_router import CustomAPIRouter
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2
+
+router = CustomAPIRouter(tags=["STT Gateway"])
+
+STT_API_BASE_URL = "http://stt_fastapi:8899/ccp"  # docker-compose 내 서비스명 기반
+MULTI_STT_API_BASE_URL = "http://stt_fastapi:8899/dialog"  # docker-compose 내 서비스명 기반
+logger = logging.getLogger(__name__)
+
+
+# 파일 업로드 → stt_api에 Presigned URL 전달
+@router.post("/audio")
+async def proxy_audio(
+    audio_file: UploadFile = File(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    request_id = create_key()
+    bucket_name = "stt-gateway"
+    object_name = f"{request_id}/{audio_file.filename}"
+
+    try:
+        # upload_file_to_minio_v2는 presigned URL을 반환합니다.
+        presigned_url = upload_file_to_minio_v2(
+            file=audio_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    # 로깅
+    endpoint_logger.log(input_filename=audio_file.filename)
+
+    # stt_fastapi에 Presigned URL 정보 전달
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "file_url": presigned_url,
+                "language": "ko",
+            }
+            response = await client.post(f"{STT_API_BASE_URL}/audio", json=payload)
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"STT API 호출 실패: {str(e)}")
+
+
+# 상태 조회 → stt_api에 중계 및 오류 로깅
+@router.get("/progress/{request_id}")
+async def proxy_progress(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            response = await client.get(f"{STT_API_BASE_URL}/progress/{request_id}")
+            response.raise_for_status()  # HTTP 오류 발생 시 예외 처리
+
+            # 응답 데이터 확인 및 로깅
+            data = response.json()
+            if data.get("celery_status") == "FAILURE":
+                # 상세 오류 정보를 포함하여 에러 로그 기록
+                error_details = data.get("progress_logs", [])
+                logger.error(f"[ERROR] STT task failed for request_id {request_id}. Details: {error_details}")
+
+            return JSONResponse(content=data, status_code=response.status_code)
+
+    except httpx.HTTPStatusError as e:
+        logger.error(
+            f"STT progress check failed with status {e.response.status_code} for request_id {request_id}: {e.response.text}"
+        )
+        raise HTTPException(status_code=e.response.status_code, detail=f"STT 상태 조회 실패: {e.response.text}")
+    except Exception as e:
+        logger.error(f"An unexpected error occurred while checking STT progress for request_id {request_id}: {e}")
+        raise HTTPException(status_code=500, detail=f"STT 상태 조회 실패: {str(e)}")
+
+
+# 다중 입력 회의 → stt_api에 Presigned URL 전달
+@router.post("/dialog_processing")
+async def proxy_dialog_processing(
+    audio_file: UploadFile = File(...),
+    meeting_tag: str = Form(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    bucket_name = "stt-gateway"
+    request_id = create_key()
+    object_name = f"{meeting_tag}_{request_id}/{audio_file.filename}"
+
+    try:
+        presigned_url = upload_file_to_minio_v2(
+            file=audio_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed for dialog_processing: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    # 로깅
+    endpoint_logger.log(input_filename=audio_file.filename)
+
+    # stt_fastapi에 Presigned URL 정보 전달
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "file_url": presigned_url,
+                "meeting_tag": meeting_tag,
+            }
+            resp = await client.post(f"{MULTI_STT_API_BASE_URL}/dialog_processing", json=payload)
+        return JSONResponse(status_code=resp.status_code, content=resp.json())
+    except httpx.RequestError as e:
+        raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+
+@router.get("/start_parallel_stt/{meeting_tag}")
+async def proxy_start_parallel_stt(meeting_tag: str):
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(f"{MULTI_STT_API_BASE_URL}/start_parallel_stt/{meeting_tag}")
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+    return JSONResponse(status_code=resp.status_code, content=resp.json())
+
+
+@router.get("/dialog_result/{task_id}")
+async def proxy_get_progress(task_id: str):
+    async with httpx.AsyncClient() as client:
+        try:
+            resp = await client.get(f"{MULTI_STT_API_BASE_URL}/result/parallel/{task_id}")
+        except httpx.RequestError as e:
+            raise HTTPException(status_code=500, detail=f"내부 서버 요청 실패: {e}")
+
+    return JSONResponse(status_code=resp.status_code, content=resp.json())
--- a/src/routers/yolo_router.py
+++ b/src/routers/yolo_router.py
@@ -0,0 +1,80 @@
+# llmgateway/routers/stt_proxy.py
+import io
+import logging
+
+import httpx
+from fastapi import Depends, File, HTTPException, Request, UploadFile
+from fastapi.responses import JSONResponse, StreamingResponse
+
+from utils.checking_keys import create_key
+from utils.custom_router import CustomAPIRouter
+from utils.logging_utils import EndpointLogger
+from utils.minio_utils import upload_file_to_minio_v2
+
+router = CustomAPIRouter(tags=["YOLO Gateway"])
+
+YOLO_BASE_URL = "http://yolo_gateway:8891"  # docker-compose 내 서비스명 기반
+
+logger = logging.getLogger(__name__)
+
+
+@router.post("/detect_view")
+async def proxy_audio(
+    request_info: Request,
+    image_file: UploadFile = File(...),
+    endpoint_logger: EndpointLogger = Depends(EndpointLogger),
+):
+    request_id = create_key()
+    bucket_name = "yolo-gateway"
+    object_name = f"{request_id}/{image_file.filename}"
+
+    try:
+        presigned_url = upload_file_to_minio_v2(
+            file=image_file,
+            bucket_name=bucket_name,
+            object_name=object_name,
+        )
+    except Exception as e:
+        logger.error(f"MinIO upload failed: {e}")
+        raise HTTPException(status_code=500, detail="File upload to storage failed.")
+
+    endpoint_logger.log(llm_model="yolo11x", input_filename=image_file.filename, context_length=0)
+
+    try:
+        async with httpx.AsyncClient() as client:
+            payload = {
+                "request_id": request_id,
+                "file_url": presigned_url,
+            }
+            response = await client.post(f"{YOLO_BASE_URL}/detect", json=payload)
+        return JSONResponse(content=response.json(), status_code=response.status_code)
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=f"YOLO API 호출 실패: {str(e)}")
+
+
+# YOLO 서버의 이미지 프록시
+@router.get("/detect_view/images/{request_id}")
+async def proxy_get_image(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            yolo_url = f"{YOLO_BASE_URL}/images/{request_id}"
+            response = await client.get(yolo_url)
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"YOLO 이미지 요청 실패: {str(e)}")
+
+    return StreamingResponse(io.BytesIO(response.content), media_type="image/jpeg")
+
+
+# YOLO 서버의 JSON 결과 프록시
+@router.get("/detect_view/results/{request_id}")
+async def proxy_get_results(request_id: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            yolo_url = f"{YOLO_BASE_URL}/results/{request_id}"
+            response = await client.get(yolo_url)
+            response.raise_for_status()
+    except httpx.HTTPError as e:
+        raise HTTPException(status_code=500, detail=f"YOLO 결과 요청 실패: {str(e)}")
+
+    return JSONResponse(content=response.json(), status_code=response.status_code)
--- a/src/services/init.py
+++ b/src/services/init.py
--- a/src/services/api_key_service.py
+++ b/src/services/api_key_service.py
@@ -0,0 +1,171 @@
+import json
+import os
+import secrets
+import time
+
+import redis
+
+from utils.redis_utils import get_redis_client
+
+# Redis에 API 키를 저장할 때 사용할 접두사
+API_KEY_PREFIX = "api_key:"
+# 상위디렉토리에서 찾기
+API_KEYS_FILE = "api_keys.json"
+
+
+def _read_keys_from_file():
+    """Helper function to read all keys from the JSON file."""
+    if not os.path.exists(API_KEYS_FILE):
+        # 파일이 없으면 빈 파일을 생성하고 안내 메시지 출력
+        print(f"API key file not found at {API_KEYS_FILE}. Creating a new empty file.")
+        _write_keys_to_file({})
+        return {}
+
+    with open(API_KEYS_FILE, "r") as f:
+        try:
+            return json.load(f)
+        except json.JSONDecodeError:
+            print(f"Warning: Could not decode JSON from {API_KEYS_FILE}. Treating as empty.")
+            return {}
+
+
+def _write_keys_to_file(keys):
+    """Helper function to write all keys to the JSON file."""
+    with open(API_KEYS_FILE, "w") as f:
+        json.dump(keys, f, indent=4)
+
+
+def load_api_keys_from_file():
+    """
+    JSON 파일에서 API 키를 읽어 Redis에 로드합니다.
+    Redis 연결 실패 시 몇 초간 재시도하여 시작 시점의 문제를 해결합니다.
+    """
+    keys_from_file = _read_keys_from_file()
+    if not keys_from_file:
+        print("API key file is empty. Skipping loading keys to Redis.")
+        return
+
+    redis_client = get_redis_client()
+    max_retries = 5
+    retry_delay = 2  # 초
+
+    for i in range(max_retries):
+        try:
+            # Redis 연결 테스트
+            redis_client.ping()
+
+            # 연결 성공 시 키 로드
+            for key_name, key_data in keys_from_file.items():
+                if not redis_client.exists(key_name):
+                    redis_client.hset(key_name, mapping=key_data)
+                    print(f"Loaded API key from file: {key_name}")
+
+            print("Successfully loaded all keys into Redis.")
+            return  # 성공 시 함수 종료
+
+        except redis.exceptions.ConnectionError as e:
+            print(f"Could not connect to Redis (attempt {i + 1}/{max_retries}): {e}")
+            if i < max_retries - 1:
+                print(f"Retrying in {retry_delay} seconds...")
+                time.sleep(retry_delay)
+            else:
+                print("Failed to load API keys into Redis after multiple retries.")
+                break
+
+
+def generate_api_key(prefix="sk") -> str:
+    """안전한 API 키를 생성합니다. (예: sk-xxxxxxxx)"""
+    return f"{prefix}-{secrets.token_hex(16)}"
+
+
+def create_api_key(client_name: str, key_prefix="sk") -> dict:
+    """
+    새로운 API 키를 생성하고 Redis와 파일에 저장합니다.
+    """
+    api_key = generate_api_key(prefix=key_prefix)
+    redis_client = get_redis_client()
+
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+    key_data = {
+        "client_name": client_name,
+        "created_at": str(int(time.time())),
+        "is_active": "true",
+    }
+
+    # Redis에 저장 (hset 사용)
+    redis_client.hset(key_storage_name, mapping=key_data)
+
+    # 파일에 즉시 저장
+    all_keys = _read_keys_from_file()
+    all_keys[key_storage_name] = key_data
+    _write_keys_to_file(all_keys)
+
+    return {"api_key": api_key, **key_data}
+
+
+def validate_api_key(api_key: str) -> bool:
+    """
+    제공된 API 키가 유효한지 검증합니다. decode_responses=True로 인해 모든 값은 문자열입니다.
+    1. Redis에서 먼저 확인합니다.
+    2. Redis에 없으면 api_keys.json 파일에서 확인합니다.
+    3. 파일에서 유효한 키를 찾으면 Redis에 다시 동기화합니다.
+    """
+    if not api_key:
+        return False
+
+    redis_client = get_redis_client()
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+
+    # 1. Redis에서 확인 (decode_responses=True이므로 반환값은 문자열)
+    is_active_in_redis = redis_client.hget(key_storage_name, "is_active")
+    if is_active_in_redis == "true":
+        return True
+
+    # 2. Redis에 없으면 파일에서 확인
+    all_keys_from_file = _read_keys_from_file()
+    key_data_from_file = all_keys_from_file.get(key_storage_name)
+
+    if key_data_from_file and key_data_from_file.get("is_active") == "true":
+        # 3. 파일에 유효한 키가 있으면 Redis에 다시 기록 (Self-healing, hset 사용)
+        redis_client.hset(key_storage_name, mapping=key_data_from_file)
+        print(f"Key '{key_storage_name}' not found in Redis, but restored from file.")
+        return True
+
+    return False
+
+
+def revoke_api_key(api_key: str) -> bool:
+    """
+    API 키를 Redis와 파일에서 삭제하여 폐기합니다.
+    """
+    redis_client = get_redis_client()
+    key_storage_name = f"{API_KEY_PREFIX}{api_key}"
+
+    # Redis에서 삭제
+    result = redis_client.delete(key_storage_name)
+
+    if result > 0:
+        # 파일에서도 삭제
+        all_keys = _read_keys_from_file()
+        if key_storage_name in all_keys:
+            del all_keys[key_storage_name]
+            _write_keys_to_file(all_keys)
+        return True
+    return False
+
+
+def list_api_keys() -> list:
+    """
+    저장된 모든 API 키의 목록을 반환합니다.
+    (주의: 실제 환경에서는 키 자체를 노출하지 않는 것이 좋습니다)
+    """
+    redis_client = get_redis_client()
+    keys = []
+
+    # decode_responses=True이므로 모든 키와 값은 문자열.
+    for key_name in redis_client.scan_iter(f"{API_KEY_PREFIX}*"):
+        key_data = redis_client.hgetall(key_name)
+        key_data["api_key"] = key_name.replace(API_KEY_PREFIX, "", 1)
+        keys.append(key_data)
+
+    return keys
--- a/src/services/download_service.py
+++ b/src/services/download_service.py
@@ -0,0 +1,36 @@
+from fastapi.responses import FileResponse
+
+from config.setting import DEFAULT_PROMPT_PATH, STRUCTURED_PROMPT_PATH, STRUCTURED_SCHEMA_PATH
+
+
+class DownloadService:
+    @staticmethod
+    def download_default_prompt():
+        return FileResponse(
+            DEFAULT_PROMPT_PATH,
+            media_type="text/plain",
+            filename="default_prompt.txt",
+            headers=DownloadService._no_cache_headers(),
+        )
+
+    @staticmethod
+    def download_structured_prompt():
+        return FileResponse(
+            STRUCTURED_PROMPT_PATH,
+            media_type="text/plain",
+            filename="structured_prompt.txt",
+            headers=DownloadService._no_cache_headers(),
+        )
+
+    @staticmethod
+    def download_structured_schema():
+        return FileResponse(
+            STRUCTURED_SCHEMA_PATH,
+            media_type="application/json",
+            filename="structured_schema.json",
+            headers=DownloadService._no_cache_headers(),
+        )
+
+    @staticmethod
+    def _no_cache_headers():
+        return {"Cache-Control": "no-store, no-cache, must-revalidate, max-age=0", "Pragma": "no-cache", "Expires": "0"}
--- a/src/services/dummy_service.py
+++ b/src/services/dummy_service.py
@@ -0,0 +1,20 @@
+import json
+
+from fastapi.responses import JSONResponse
+
+from config.setting import STATIC_DIR
+
+
+class DummyService:
+    @staticmethod
+    async def extract_dummy():
+        """
+        static 디렉터리의 더미 JSON 응답 파일을 반환합니다.
+        """
+        dummy_path = STATIC_DIR / "dummy_response.json"
+        try:
+            with open(dummy_path, "r", encoding="utf-8") as f:
+                dummy_data = json.load(f)
+            return JSONResponse(content=dummy_data)
+        except Exception as e:
+            return JSONResponse(status_code=500, content={"error": f"❌ 더미 파일을 불러오지 못했습니다: {e}"})
--- a/src/services/inference_service.py
+++ b/src/services/inference_service.py
@@ -0,0 +1,327 @@
+import asyncio
+import json
+import logging
+from pathlib import Path
+from typing import List, Optional
+
+from fastapi import HTTPException, UploadFile
+from redis import Redis
+
+from config.setting import (
+    DEFAULT_PROMPT_PATH,
+    MINIO_BUCKET_NAME,
+    PGN_REDIS_DB,
+    PGN_REDIS_HOST,
+    PGN_REDIS_PORT,
+    STRUCTURED_PROMPT_PATH,
+)
+from services.dummy_service import DummyService
+from services.model_service import ModelInfoService
+from services.pipeline_runner import PipelineRunner
+from utils.fileio import read_any_file
+from utils.logging_utils import log_pipeline_status, log_user_request
+from utils.minio_utils import save_result_to_minio, upload_file_to_minio_v2
+
+# Redis 클라이언트 (LLM Gateway 전용)
+redis_client = Redis(host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True)
+
+logger = logging.getLogger(__name__)
+
+
+class InferenceHandler:
+    # ☑️ /general-공통처리함수
+    @staticmethod
+    async def handle_general_background(
+        request_id: str,
+        result_id: str,
+        input_file: UploadFile,
+        prompt_file: UploadFile,
+        llm_model: str,
+        ocr_model: str,
+        mode: str,
+        api_key: str,
+        request_info: Optional[str] = None,
+    ):
+        logger.info(f"[INPUT_FILE_NAME]: {input_file.filename}")
+
+        # ✅ prompt_file이 없으면 사용자 에러 응답 반환
+        if not prompt_file or not prompt_file.filename:
+            raise HTTPException(
+                status_code=400,
+                detail="❌ 프롬프트 파일(prompt_file)은 반드시 업로드해야 합니다.",
+            )
+
+        try:
+            raw = await read_any_file(prompt_file)
+            prompt = raw.decode("utf-8", errors="replace")
+            prompt_filename = getattr(prompt_file, "filename", "prompt.txt")
+        except Exception as e:
+            logger.warning(f"Failed to process prompt file: {e}")
+            prompt, prompt_filename = "", getattr(prompt_file, "filename", "unknown.txt")
+
+        try:
+            info_response = await ModelInfoService.get_model_info(mode)
+            info = json.loads(info_response.body.decode("utf-8"))
+            inner_models = info["models"]["inner_model"]["model_list"]
+            outer_models = info["models"]["outer_model"]["model_list"]
+            if mode in ["inner", "structured"]:
+                model_url_map = await ModelInfoService.get_ollama_model_map()
+            else:
+                model_url_map: dict[str, str] = {}
+        except Exception as e:
+            logger.warning(f"[MODEL] 모델 정보 조회 실패: {e}")
+
+        try:
+            presigned_url = upload_file_to_minio_v2(
+                file=input_file,
+                bucket_name=MINIO_BUCKET_NAME,
+                object_name=f"{request_id}/{input_file.filename}",
+            )
+            logger.info(f"[MinIO] presigned URL 생성 완료: {presigned_url}")
+        except Exception as e:
+            logger.error(f"[MinIO] presigned URL 생성 실패 {e}")
+
+        try:
+            # ✅ run_pipeline 재사용 (schema_override는 일반 추론이므로 None)
+            custom_mode = True
+            results_minio = await PipelineRunner.run_pipeline(
+                request_info=request_info,
+                request_id=request_id,
+                file_path=presigned_url,
+                filename=input_file.filename,
+                prompt=prompt,
+                prompt_filename=prompt_filename,
+                custom_mode=custom_mode,
+                mode=mode,
+                llm_model=llm_model,
+                ocr_model=ocr_model,
+                inner_models=inner_models,
+                outer_models=outer_models,
+                model_url_map=model_url_map,
+                api_key=api_key,
+                schema_override="N/A",
+                prompt_mode="general",
+            )
+            # ✅ 결과 Redis 저장
+            results_redis = {k: v for k, v in results_minio.items() if k != "fields"}
+            redis_key = f"pipeline_result:{result_id}"
+            redis_client.set(redis_key, json.dumps(results_redis, ensure_ascii=False), ex=60 * 60)
+            logger.info(f"[REDIS] 결과 Redis 저장 완료: {result_id}")
+
+        except Exception as e:
+            logger.error(f"[PIPELINE] ❌ result_id={result_id} 처리 실패: {e}")
+            redis_client.set(
+                f"pipeline_result:{result_id}",
+                json.dumps({"error": str(e)}),
+                ex=60 * 60,
+            )
+
+        # ✅ 결과 MinIO 저장 (전체본)
+        try:
+            minio_key = f"{request_id}/{input_file.filename.rsplit('.', 1)[0]}.json"
+            presigned_url = save_result_to_minio(
+                result_dict=results_minio,
+                object_name=minio_key,
+            )
+            logger.info(f"[MinIO] 결과 MinIO 저장 완료: {presigned_url}")
+
+        except Exception as e:
+            logger.error(f"[MinIO] 결과 저장 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="결과 파일 저장 중 오류가 발생했습니다.",
+            )
+
+    @staticmethod
+    async def handle_extract_background(
+        request_id: str,
+        result_id: str,
+        input_file: UploadFile,
+        schema_file: Optional[UploadFile],
+        prompt_file: Optional[UploadFile],
+        mode: str,
+        model_list: List[str],
+        ocr_model: str,
+        api_key: str,
+        request_info: Optional[str] = None,
+    ):
+        # ✅ dummy 요청 처리
+        if model_list == ["dummy"]:
+            try:
+                log_user_request(
+                    request_info=request_info,
+                    endpoint="dummy/extract/outer",
+                    input_filename="None",
+                    llm_model="dummy",
+                    ocr_model="dummy",
+                    prompt_filename="None",
+                    context_length=0,
+                    api_key=api_key,
+                )
+                return await DummyService.extract_dummy()
+            except Exception as e:
+                logger.info(f"Failed to log 'dummy/extract/outer' request: {e}")
+
+        try:
+            if prompt_file and getattr(prompt_file, "filename", None):
+                raw = await read_any_file(prompt_file)
+                prompt = raw.decode("utf-8", errors="replace")
+                custom_mode = True
+                prompt_filename = getattr(prompt_file, "filename", "prompt.txt")
+            else:
+                prompt_path = STRUCTURED_PROMPT_PATH if mode == "structured" else DEFAULT_PROMPT_PATH
+                with open(prompt_path, encoding="utf-8") as f:
+                    prompt = f.read()
+                custom_mode = False
+                prompt_filename = Path(prompt_path).name
+        except Exception as e:
+            logger.warning(f"Failed to process prompt file: {e}")
+            # Set default values to avoid unbound errors
+            prompt = ""
+            prompt_filename = ""
+            custom_mode = False
+
+        try:
+            if schema_file and schema_file.filename:
+                schema_content = schema_file.file.read()  # 파일 핸들로 읽기
+                schema_override = json.loads(schema_content.decode("utf-8"))
+            else:
+                schema_override = None
+        except Exception as e:
+            logger.warning(f"Failed to process schema file: {e}")
+
+        try:
+            info_response = await ModelInfoService.get_model_info(mode)
+            info = json.loads(info_response.body.decode("utf-8"))
+            inner_models = info["models"]["inner_model"]["model_list"]
+            outer_models = info["models"]["outer_model"]["model_list"]
+            if mode in ["inner", "structured"]:
+                model_url_map = await ModelInfoService.get_ollama_model_map()
+            else:
+                model_url_map: dict[str, str] = {}
+        except Exception as e:
+            logger.warning(f"[MODEL] 모델 정보 조회 실패: {e}")
+
+        try:
+            presigned_url = upload_file_to_minio_v2(
+                file=input_file,
+                bucket_name=MINIO_BUCKET_NAME,
+                object_name=f"{request_id}/{input_file.filename}",
+            )
+            logger.info(f"[MinIO] presigned URL 생성 완료: {presigned_url}")
+        except Exception as e:
+            logger.error(f"[MinIO] presigned URL 생성 실패 {e}")
+
+        try:
+            tasks = []
+            for llm_model in model_list:
+                tasks.append(
+                    PipelineRunner.run_pipeline(
+                        request_info=request_info,
+                        request_id=request_id,
+                        file_path=presigned_url,
+                        filename=input_file.filename,
+                        prompt=prompt,
+                        prompt_filename=prompt_filename,
+                        custom_mode=custom_mode,
+                        mode=mode,
+                        llm_model=llm_model,
+                        ocr_model=ocr_model,
+                        inner_models=inner_models,
+                        outer_models=outer_models,
+                        model_url_map=model_url_map if llm_model in inner_models else {},
+                        api_key=api_key,
+                        schema_override=schema_override,
+                        prompt_mode="extract",
+                    )
+                )
+
+            result_set = await asyncio.gather(*tasks)
+            results_minio = []
+            results_redis = []
+
+            for result in result_set:
+                results_minio.append(result)
+                # 'fields' 키 제외한 버전 생성
+                result_filtered = {k: v for k, v in result.items() if k != "fields"}
+                results_redis.append(result_filtered)
+
+            # ✅ 결과 Redis 저장 (요약본)
+            redis_key = f"pipeline_result:{result_id}"
+            redis_client.set(
+                redis_key,
+                json.dumps(results_redis, ensure_ascii=False),
+                ex=60 * 60,
+            )
+            logger.info(f"[REDIS] 결과 Redis 저장 완료: {result_id}")
+
+        except Exception as e:
+            logger.error(f"[PIPELINE] ❌ result_id={result_id} 처리 실패: {e}")
+            redis_client.set(
+                f"pipeline_result:{result_id}",
+                json.dumps({"error": str(e)}),
+                ex=60 * 60,
+            )
+
+        # ✅ 결과 MinIO 저장 (전체본)
+        try:
+            minio_key = f"{request_id}/{input_file.filename.rsplit('.', 1)[0]}.json"
+            presigned_url = save_result_to_minio(
+                result_dict=results_minio,
+                object_name=minio_key,
+            )
+            logger.info(f"[MinIO] 결과 MinIO 저장 완료: {presigned_url}")
+
+        except Exception as e:
+            logger.error(f"[MinIO] 결과 저장 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="결과 파일 저장 중 오류가 발생했습니다.",
+            )
+
+    @staticmethod
+    async def handle_test_background(
+        request_id: str,
+        result_id: str,
+        input_file: str,
+        prompt_file: str,
+        llm_model: str,
+        api_key: str,
+        mode: str,
+        request_info: Optional[str] = None,
+    ):
+        try:
+            info_response = await ModelInfoService.get_model_info(mode)
+            info = json.loads(info_response.body.decode("utf-8"))
+            outer_models = info["models"]["outer_model"]["model_list"]
+        except Exception as e:
+            logger.warning(f"[MODEL] 모델 정보 조회 실패: {e}")
+
+        try:
+            logger.info(f"[{request_id}] Invoking test pipeline runner.")
+
+            # 1. 파이프라인 실행
+            results = await PipelineRunner.run_test_pipeline(
+                request_id=request_id,
+                input_file=input_file,
+                prompt_file=prompt_file,
+                llm_model=llm_model,
+                outer_models=outer_models,
+                mode=mode,
+                api_key=api_key,
+                request_info=request_info,
+            )
+
+            # 2. 결과 Redis에 저장
+            redis_key = f"pipeline_result:{result_id}"
+            redis_client.set(redis_key, json.dumps(results, ensure_ascii=False), ex=60 * 60)
+            logger.info(f"[{request_id}] Test pipeline result saved to Redis: {result_id}")
+
+        except Exception as e:
+            logger.error(f"[{request_id}] Test pipeline failed: {e}")
+            log_pipeline_status(request_id, "테스트 파이프라인 실패", {"error": str(e)})
+            redis_client.set(
+                f"pipeline_result:{result_id}",
+                json.dumps({"error": str(e)}),
+                ex=60 * 60,
+            )
--- a/src/services/model_service.py
+++ b/src/services/model_service.py
@@ -0,0 +1,71 @@
+import logging
+from typing import Dict
+
+import httpx
+from fastapi.responses import JSONResponse
+
+from config.setting import OLLAMA_URLS
+
+logger = logging.getLogger(__name__)
+
+
+class ModelInfoService:
+    OUTER_MODELS = [
+        "claude-sonnet-4-20250514",
+        "claude-3-7-sonnet-20250219",
+        "claude-3-5-haiku-20241022",
+        "gemini-2.5-pro",
+        "gemini-2.5-flash",
+        "gpt-4.1",
+        "gpt-4o",
+    ]
+
+    @staticmethod
+    async def get_ollama_model_map() -> Dict[str, str]:
+        model_url_map = {}
+        for url in OLLAMA_URLS:
+            try:
+                async with httpx.AsyncClient(timeout=3.0) as client:
+                    tags_url = url.replace("/api/generate", "/api/tags")
+                    res = await client.get(tags_url)
+                    res.raise_for_status()
+                    models = res.json().get("models", [])
+                    for m in models:
+                        model_url_map[m["name"]] = url
+            except Exception as e:
+                logger.error(f"[ERROR] {url} 모델 조회 실패: {e}")
+        return model_url_map
+
+    @staticmethod
+    async def get_model_info(mode: str) -> JSONResponse:
+        inner_models = []
+        if mode in ["inner", "structured", "info"]:
+            for url in OLLAMA_URLS:
+                try:
+                    async with httpx.AsyncClient(timeout=3.0) as client:
+                        tags_url = url.replace("/generate", "/tags")
+                        res = await client.get(tags_url)
+                        res.raise_for_status()
+                        data = res.json()
+                        models = [m["name"] for m in data.get("models", [])]
+                        inner_models.extend(models)
+                except Exception as e:
+                    logger.warning(f"[OLLAMA] Ollama 모델 조회 실패 ({url}): {e}")
+
+            inner_models = list(set(inner_models))
+            logger.info(f"[OLLAMA] 사용 가능한 내부 모델 목록: {inner_models}")
+
+        return JSONResponse(
+            content={
+                "models": {
+                    "inner_model": {
+                        "default_model": "gpt-oss:20b",  # gemma3:27b
+                        "model_list": inner_models,
+                    },
+                    "outer_model": {
+                        "default_model": "gpt-4.1",
+                        "model_list": ModelInfoService.OUTER_MODELS,
+                    },
+                }
+            }
+        )
--- a/src/services/pipeline_runner.py
+++ b/src/services/pipeline_runner.py
@@ -0,0 +1,366 @@
+import asyncio
+import json
+import logging
+import time
+from typing import Dict, List, Literal, Optional
+
+import httpx
+import redis
+
+from config.setting import OCR_API_URL, OCR_REDIS_DB, OCR_REDIS_HOST, OCR_REDIS_PORT
+from utils.checking_files import token_counter
+from utils.image_converter import prepare_images_from_file
+from utils.logging_utils import log_pipeline_status, log_user_request
+from utils.text_generator import (
+    ClaudeGenerator,
+    GeminiGenerator,
+    GptGenerator,
+    OllamaGenerator,
+)
+from utils.text_processor import post_process, test_post_process
+
+logger = logging.getLogger(__name__)
+
+# Redis 클라이언트 생성 (Celery 결과용 DB=1)
+redis_client = redis.Redis(
+    host=OCR_REDIS_HOST,
+    port=OCR_REDIS_PORT,
+    db=OCR_REDIS_DB,
+    decode_responses=True,
+)
+
+
+class PipelineRunner:
+    @staticmethod
+    async def run_pipeline(
+        request_info: str,  # ✅ 추가
+        request_id: str,
+        file_path: str,
+        filename: str,
+        prompt: str,
+        prompt_filename: str,  # ✅ 추가
+        custom_mode: bool,
+        mode: str,
+        llm_model: str,
+        ocr_model: str,
+        inner_models: List[str],
+        outer_models: List[str],
+        model_url_map: Dict[str, str],
+        api_key: str,
+        schema_override: Optional[dict] = None,
+        prompt_mode: Literal["general", "extract"] = "extract",
+    ):
+        start_time = time.time()
+
+        if mode == "multimodal":
+            # 모델 유효성
+            if llm_model not in outer_models:
+                raise ValueError(
+                    f"외부 모델 리스트에 '{llm_model}'이 포함되어 있지 않습니다. outer_models: {outer_models}"
+                )
+            if not (("gpt" in llm_model) or ("gemini" in llm_model)):
+                raise ValueError("멀티모달 E2E는 gpt 계열만 지원합니다.")
+
+            # 입력 파일 → 이미지 바이트 리스트 준비
+            images = await prepare_images_from_file(file_path, filename)
+
+            # 요청 로깅(텍스트가 없으므로 prompt 길이만)
+            context_length = len(prompt)
+            try:
+                log_user_request(
+                    request_info=request_info,
+                    endpoint=f"/{prompt_mode}/{mode}",
+                    input_filename=filename,
+                    llm_model=llm_model,
+                    ocr_model=ocr_model,
+                    prompt_filename=prompt_filename,
+                    context_length=context_length,
+                    api_key=api_key,
+                )
+            except Exception as e:
+                logger.info(f"Failed to log '/{prompt_mode}/{mode}' request: {e}")
+
+            # 멀티모달 LLM 호출
+            log_pipeline_status(request_id, "멀티모달 LLM 추론 시작")
+            if "gpt" in llm_model:
+                generator = GptGenerator(model=llm_model)
+                generated_text, llm_model, llm_url = await asyncio.to_thread(
+                    generator.generate_multimodal, images, prompt, schema_override
+                )
+            elif "gemini" in llm_model:
+                generator = GeminiGenerator(model=llm_model)
+                generated_text, llm_model, llm_url = await asyncio.to_thread(
+                    generator.generate_multimodal, images, prompt, schema_override
+                )
+
+            end_time = time.time()
+            log_pipeline_status(request_id, "LLM 추론 완료 및 후처리 시작")
+
+            # 멀티모달은 OCR 텍스트/좌표 없음
+            text = ""
+            coord = None
+            ocr_model = "bypass(multimodal)"
+
+            json_data = post_process(
+                filename,
+                text,
+                generated_text,
+                coord,
+                ocr_model,
+                llm_model,
+                llm_url,
+                mode,
+                start_time,
+                end_time,
+                prompt_mode,
+            )
+            log_pipeline_status(request_id, "후처리 완료 및 결과 반환")
+            return json_data
+
+        try:
+            # OCR API 요청
+            log_pipeline_status(request_id, "OCR API 호출 시작")
+            async with httpx.AsyncClient() as client:
+                # ✅ presigned URL을 OCR API로 전달
+                ocr_resp = await client.post(
+                    OCR_API_URL,
+                    json={
+                        "file_url": file_path,  # presigned URL
+                        "filename": filename,
+                        "ocr_model": ocr_model,
+                    },
+                    timeout=None,
+                )
+                ocr_resp.raise_for_status()
+
+                # OCR API 응답에서 task_id 추출
+                task_ids_json = ocr_resp.json()
+                print(f"[DEBUG] OCR API 응답: {task_ids_json}")
+                task_ids = [item.get("task_id") for item in task_ids_json.get("results", [])]
+                if not task_ids:
+                    raise ValueError("❌ OCR API에서 유효한 task_id를 받지 못했습니다.")
+                task_id = task_ids[0]
+
+            # Redis에서 결과를 5초 간격으로 최대 10회 폴링
+            raw_result = None
+            log_pipeline_status(request_id, f"OCR 진행상태 폴링 시작 (task_id={task_id})")
+            for attempt in range(10):  # 최대 10회 시도
+                print(f"[DEBUG] OCR 결과 폴링 시도 {attempt + 1}/10...")
+                redis_key = f"ocr_result:{task_id}"
+                raw_result = redis_client.get(redis_key)
+                if raw_result:
+                    logger.info(f"✅ Redis에서 task_id '{task_id}'에 대한 OCR 결과를 찾았습니다.")
+                    break
+                await asyncio.sleep(5)
+
+            if not raw_result:  # 결과가 없으면 예외 발생
+                error_message = "❌ OCR API에서 작업을 완료하지 못했습니다. 페이지 수를 줄여주세요."
+                logger.error(error_message)
+                raise ValueError(error_message)
+
+            result_data = json.loads(raw_result)
+            text = result_data["parsed"]
+            coord = result_data.get("fields")
+            ocr_model = result_data.get("model", {}).get("ocr_model", "OCR API")
+
+        except Exception as e:
+            logger.error(f"❌ OCR 처리 중 예외 발생: {e}")
+            raise
+
+        # ✅ 입력 길이 검사
+        log_pipeline_status(request_id, "모델 입력 텍스트 길이 검사 시작")
+        token_count = token_counter(prompt, text)
+        context_length = len(prompt + text)
+
+        # 🔽 로그 기록
+        try:
+            log_user_request(
+                request_info=request_info,
+                endpoint=f"/{prompt_mode}/{mode}",
+                input_filename=filename,
+                llm_model=llm_model,
+                ocr_model=ocr_model,
+                prompt_filename=prompt_filename,
+                context_length=context_length,
+                api_key=api_key,
+                # token_count=token_count,
+            )
+        except Exception as e:
+            logger.info(f"Failed to log '/{prompt_mode}/{mode}' request: {e}")
+
+        # ✅ 120K 토큰 초과 검사
+        if token_count > 120000:
+            return post_process(
+                filename,
+                text,
+                f"⚠️ 입력 텍스트가 {token_count} 토큰으로 입력 길이를 초과했습니다. 모델 호출 생략합니다.",
+                coord,
+                ocr_model,
+                "N/A",
+                "N/A",
+                mode,
+                start_time,
+                time.time(),
+                prompt_mode,
+            )
+
+        # 2. 내부 모델 처리 (Ollama)
+        if mode in ("inner", "all", "structured"):
+            if llm_model in inner_models:
+                log_pipeline_status(request_id, "내부 LLM 추론 시작")
+                api_url = model_url_map.get(llm_model)
+                if not api_url:
+                    raise ValueError(f"❌ 모델 '{llm_model}'이 로드된 Ollama 서버를 찾을 수 없습니다.")
+
+                generator = OllamaGenerator(model=llm_model, api_url=api_url)
+
+                if mode == "structured":
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.structured_generate,
+                        text,
+                        prompt,
+                        custom_mode,
+                        schema_override,
+                    )
+                else:
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.generate, text, prompt, custom_mode, prompt_mode
+                    )
+            else:
+                raise ValueError(
+                    f"내부 모델 리스트에 '{llm_model}'이 포함되어 있지 않습니다. inner_models: {inner_models}"
+                )
+
+        # 3. 외부 모델 처리
+        elif mode in ("outer", "all", "structured"):
+            if llm_model in outer_models:
+                log_pipeline_status(request_id, "외부 LLM 추론 시작")
+                if "claude" in llm_model:
+                    generator = ClaudeGenerator(model=llm_model)
+                elif "gemini" in llm_model:
+                    generator = GeminiGenerator(model=llm_model)
+                elif "gpt" in llm_model:
+                    generator = GptGenerator(model=llm_model)
+                else:
+                    raise ValueError("지원되지 않는 외부 모델입니다. ['gemini', 'claude', 'gpt'] 중 선택하세요.")
+
+                if mode == "structured":
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.structured_generate,
+                        text,
+                        prompt,
+                        custom_mode,
+                        schema_override,
+                    )
+                else:
+                    generated_text, llm_model, llm_url = await asyncio.to_thread(
+                        generator.generate, text, prompt, custom_mode, prompt_mode
+                    )
+            else:
+                raise ValueError(
+                    f"외부 모델 리스트에 '{llm_model}'이 포함되어 있지 않습니다. outer_models: {outer_models}"
+                )
+        else:
+            raise ValueError(
+                f"❌ 지원되지 않는 모드입니다. 'inner', 'outer', 'all', 'structured' 중에서 선택하세요. (입력: {mode})"
+            )
+
+        log_pipeline_status(request_id, "LLM 추론 완료 및 후처리 시작")
+        end_time = time.time()
+
+        # 4. 후처리
+        json_data = post_process(
+            filename,
+            text,
+            generated_text,
+            coord,
+            ocr_model,
+            llm_model,
+            llm_url,
+            mode,
+            start_time,
+            end_time,
+            prompt_mode,
+        )
+
+        log_pipeline_status(request_id, "후처리 완료 및 결과 반환")
+        return json_data
+
+    @staticmethod
+    async def run_test_pipeline(
+        request_id: str,
+        input_file: str,
+        prompt_file: str,
+        llm_model: str,
+        outer_models: List[str],
+        mode: str,
+        api_key: str,
+        request_info: Optional[str],
+    ):
+        log_pipeline_status(request_id, "테스트 파이프라인 시작", {"input": input_file})
+        logger.info(f"[{request_id}] Running test pipeline with model: {llm_model}")
+
+        # 0) 모드/모델 가드
+        if mode not in ("outer", "all", "structured"):
+            raise ValueError(
+                f"❌ 지원되지 않는 모드입니다. 'inner', 'outer', 'all', 'structured' 중에서 선택하세요. (입력: {mode})"
+            )
+        if llm_model not in outer_models:
+            raise ValueError(f"외부 모델 리스트에 '{llm_model}'이 포함되어 있지 않습니다. outer_models: {outer_models}")
+
+        # 1) 외부 LLM 추론기 선택
+        log_pipeline_status(request_id, "외부 LLM 추론 시작", {"model": llm_model})
+        if "claude" in llm_model:
+            generator = ClaudeGenerator(model=llm_model)
+        elif "gemini" in llm_model:
+            generator = GeminiGenerator(model=llm_model)
+        elif "gpt" in llm_model:
+            generator = GptGenerator(model=llm_model)
+        else:
+            raise ValueError("지원되지 않는 외부 모델입니다. ['gemini', 'claude', 'gpt'] 중 선택하세요.")
+
+        # 2) 생성 (동기 SDK 호환 위해 thread offload)
+        try:
+            generated_text, llm_model_resolved, llm_url = await asyncio.to_thread(
+                generator.generate, input_file, prompt_file, custom_mode=True, prompt_mode="extract"
+            )
+            logger.info(f"[{request_id}] LLM generate succeeded.")
+        except Exception as e:
+            logger.error(f"[{request_id}] LLM generate failed: {e}")
+            log_pipeline_status(request_id, "LLM 생성 실패", {"error": str(e)})
+            raise
+
+        log_pipeline_status(request_id, "LLM 추론 완료 및 후처리 시작")
+
+        # 3) 후처리
+        try:
+            json_data = test_post_process(
+                text=input_file,
+                generated_text=generated_text,
+                coord="N/A",
+                ocr_model="N/A",
+                llm_model=llm_model_resolved,
+                llm_url=llm_url,
+                mode=mode,
+                start_time="N/A",
+                end_time="N/A",
+                prompt_mode="extract",
+            )
+        except Exception as e:
+            logger.error(f"[{request_id}] test_post_process 실패: {e}")
+            log_pipeline_status(request_id, "후처리 실패", {"error": str(e)})
+            raise
+
+        log_pipeline_status(request_id, "후처리 완료 및 결과 반환")
+        return json_data
+
+        log_pipeline_status(request_id, "테스트 파이프라인 완료")
+
+        final_result = {
+            "request_id": request_id,
+            "model": llm_model,
+            "input_file": input_file,
+            "prompt_file": prompt_file,
+            "processed_text": "This is a simulated result from the test pipeline.",
+        }
+        return final_result
--- a/src/services/prompt.py
+++ b/src/services/prompt.py
@@ -0,0 +1,36 @@
+SUMMARY_PROMPT_TEMPLATE = """
+/no_think
+너는 방금 끝난 회의의 내용을 정리해서 팀원들에게 공유해야 하는 프로젝트 매니저야.
+아래의 STT 회의록 초안은 오타나 문맥 오류가 있을 수 있어. 무리하게 해석하기보다는 문맥상 가장 자연스럽고 합리적인 내용으로 정리하고, 애매한 부분은 그 불확실성을 그대로 언급해도 괜찮아.
+아래 양식 외의 내용은 절대 포함하지 마. 각 항목의 제목과 번호는 반드시 그대로 유지해.
+출력은 json으로 해
+
+# 양식
+1. 회의 주요 키워드 (5개 내외로 작성)
+2. 논의된 주요 안건 목록(Action Items)
+3. 각 안건별 핵심 논의 내용 요약
+4. 최종적으로 합의된 결정 사항들
+5. 다음 회의에서 논의할 내용이나 미결 사항 (있다면 작성)
+
+# 내용
+{context}
+"""
+
+ONLY_GEMINI_PROMPT_TEMPLATE = """
+다음은 여러 명이 참여한 회의의 전사 기록이다. 각 발화자는 "SPEAKER_01", "SPEAKER_02" 와 같은 형식으로 구분되어 있다.
+같은 내용이지만 SPEAKER의 순서가 다를 수 도 있다.
+아래의 STT 회의록 초안은 오타나 문맥 오류가 있을 수 있어. 무리하게 해석하기보다는 문맥상 가장 자연스럽고 합리적인 내용으로 정리하고, 애매한 부분은 그 불확실성을 그대로 언급해도 괜찮아.
+각 화자의 발언을 고려하여 
+아래 양식 외의 내용은 절대 포함하지 마. 각 항목의 제목과 번호는 반드시 그대로 유지해.
+출력은 json으로 해
+
+# 양식
+1. 회의 주요 키워드 (5개 내외로 작성)
+2. 논의된 주요 안건 목록(Action Items)
+3. 각 안건별 핵심 논의 내용 요약
+4. 최종적으로 합의된 결정 사항들
+5. 다음 회의에서 논의할 내용이나 미결 사항 (있다면 작성)
+
+# 내용
+{context}
+"""
--- a/src/services/report.py
+++ b/src/services/report.py
@@ -0,0 +1,196 @@
+import asyncio
+import json
+import logging
+import os
+import re
+
+import httpx
+from anthropic import AsyncAnthropic
+from dotenv import load_dotenv
+from google.generativeai import GenerativeModel  # gemini
+from openai import AsyncOpenAI
+
+from services.prompt import ONLY_GEMINI_PROMPT_TEMPLATE, SUMMARY_PROMPT_TEMPLATE
+
+load_dotenv()
+
+logger = logging.getLogger(__name__)
+tasks_store = {}
+ask_gpt_name = "gpt-4.1-mini"
+ask_ollama_qwen_name = "qwen3:custom"
+ask_gemini_name = "gemini-2.5-flash"
+ask_claude_name = "claude-3-7-sonnet-latest"
+
+
+def parse_json_safe(text: str):
+    """응답 텍스트가 JSON 포맷이 아닐 수도 있으니 안전하게 파싱 시도"""
+    try:
+        # 혹시 ```json ... ``` 형식 포함 시 제거
+        if text.startswith("```json"):
+            text = text.strip("```json").strip("```").strip()
+        return json.loads(text)
+    except Exception:
+        return {"raw_text": text}
+
+
+async def ask_gpt4(text: str):
+    try:
+        client = AsyncOpenAI(api_key=os.getenv("OPENAI_API_KEY"))
+        response = await client.chat.completions.create(
+            model=ask_gpt_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                }
+            ],
+            temperature=0,
+        )
+        return ask_gpt_name, parse_json_safe(response.choices[0].message.content)
+    except Exception as e:
+        logger.error(f"ask_gpt4 error: {e}")
+        return ask_gpt_name, {"error": str(e)}
+
+
+def fix_incomplete_json(text: str) -> str:
+    open_braces = text.count("{")
+    close_braces = text.count("}")
+    if open_braces > close_braces:
+        text += "}" * (open_braces - close_braces)
+    return text
+
+
+async def ask_ollama_qwen(text: str):
+    try:
+        async with httpx.AsyncClient() as client:
+            res = await client.post(
+                "http://172.16.10.176:11434/api/generate",
+                json={
+                    "model": "qwen3:custom",
+                    "prompt": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                },
+                timeout=300,
+            )
+            raw_text = res.text
+
+            # 1. <think> 태그 제거
+            raw_text = re.sub(r"</?think>", "", raw_text)
+
+            # 2. 각 줄별 JSON 파싱 시도 (스트림 JSON 형식)
+            json_objects = []
+            for line in raw_text.splitlines():
+                line = line.strip()
+                if not line:
+                    continue
+                try:
+                    obj = json.loads(line)
+                    json_objects.append(obj)
+                except json.JSONDecodeError:
+                    # 무시하거나 로그 남기기
+                    pass
+
+            # 3. 여러 JSON 조각 중 'response' 필드 내용만 합치기 (필요시)
+            full_response = "".join(obj.get("response", "") for obj in json_objects)
+
+            # 4. 합쳐진 response에서 JSON 부분만 추출
+            json_match = re.search(r"\{.*\}", full_response, re.DOTALL)
+            if json_match:
+                json_str = json_match.group(0)
+                try:
+                    parsed_json = json.loads(json_str)
+                    return "qwen3:custom", parsed_json
+                except json.JSONDecodeError:
+                    return "qwen3:custom", {
+                        "error": "Invalid JSON in response",
+                        "raw_text": full_response,
+                    }
+            else:
+                return "qwen3:custom", {
+                    "error": "No JSON found in response",
+                    "raw_text": full_response,
+                }
+
+    except Exception as e:
+        return "qwen3:custom", {"error": str(e)}
+
+
+async def ask_gemini(text: str):
+    try:
+        model = GenerativeModel(model_name=ask_gemini_name)
+        response = model.generate_content(SUMMARY_PROMPT_TEMPLATE.format(context=text))
+        return ask_gemini_name, parse_json_safe(response.text)
+    except Exception as e:
+        logger.error(f"ask_gemini error: {e}")
+        return ask_gemini_name, {"error": str(e)}
+
+
+async def dialog_ask_gemini(text: str):
+    try:
+        model = GenerativeModel(model_name=ask_gemini_name)
+        response = model.generate_content(ONLY_GEMINI_PROMPT_TEMPLATE.format(context=text))
+        return ask_gemini_name, parse_json_safe(response.text)
+    except Exception as e:
+        logger.error(f"ask_gemini error: {e}")
+        return ask_gemini_name, {"error": str(e)}
+
+
+async def ask_claude(text: str):
+    try:
+        client = AsyncAnthropic(api_key=os.getenv("ANTHROPIC_API_KEY"))
+        response = await client.messages.create(
+            model=ask_claude_name,
+            messages=[
+                {
+                    "role": "user",
+                    "content": SUMMARY_PROMPT_TEMPLATE.format(context=text),
+                }
+            ],
+            max_tokens=12800,
+            stream=False,
+        )
+        raw = response.content[0].text
+        return ask_claude_name, parse_json_safe(raw)
+    except Exception as e:
+        logger.error(f"ask_claude error: {e}")
+        return ask_claude_name, {"error": str(e)}
+
+
+async def total_summation(text: str) -> dict:
+    tasks = [ask_gpt4(text), ask_ollama_qwen(text), ask_gemini(text), ask_claude(text)]
+    results = await asyncio.gather(*tasks)
+    return dict(results)
+
+
+async def run_model_task(model_func, text, key, task_id):
+    try:
+        model_name, result = await model_func(text)
+        tasks_store[task_id][key] = {
+            "status": "completed",
+            "model_name": model_name,
+            "result": result,
+        }
+    except Exception as e:
+        tasks_store[task_id][key] = {
+            "status": "failed",
+            "error": str(e),
+        }
+
+
+async def run_all_models(text: str, task_id: str):
+    # 초기 상태 세팅
+    tasks_store[task_id] = {
+        "gpt4": {"status": "pending", "result": None},
+        "qwen3": {"status": "pending", "result": None},
+        "gemini": {"status": "pending", "result": None},
+        "claude": {"status": "pending", "result": None},
+        "finished": False,
+    }
+
+    await asyncio.gather(
+        run_model_task(ask_gpt4, text, "gpt4", task_id),
+        run_model_task(ask_ollama_qwen, text, "qwen3", task_id),
+        run_model_task(ask_gemini, text, "gemini", task_id),
+        run_model_task(ask_claude, text, "claude", task_id),
+    )
+
+    tasks_store[task_id]["finished"] = True
--- a/src/static/dummy_response.json
+++ b/src/static/dummy_response.json
@@ -0,0 +1,42 @@
+[
+  {
+    "filename": "250107_out_SYJV-250001_Advanced Mobilization.pdf",
+    "outer_model": {
+      "ocr_model": "OCR not used",
+      "llm_model": "gpt-4.1",
+      "api_url": "OpenAI Python SDK"
+    },
+    "time": {
+      "duration_sec": "8.24",
+      "started_at": 1747614863.8500028,
+      "ended_at": 1747614872.089025
+    },
+    "fields": [],
+    "parsed": "SEOYOUNG JOINT VENTURE \n \n \nRef. No. SYJV-250001 \nJan / 07 / 2025 \n \nMr. BENJAMIN A. BAUTISTA \nProject Director \nRoads Management Cluster 1 (Bilateral) – UPMO \nDepartment of Public Works and Highways \n2nd Street, Port Area, Manila \n \nThru \n: \nANTONIO ERWIN R. ARANAZ \n \n \nProject Manager \n \nSubject \n: \nAdvanced Mobilization of Experts \n \n \nConsulting Services for the Independent Design Check of the Panay-Guimaras-\nNegros Island Bridges Project [Loan Agreement No.: PHL-23] \n \nDear Mr. Bautista, \n \nWith reference to the above-mentioned consulting services, we respectfully inform the \nadvanced mobilization of Experts. We, SEOYOUNG JV, listed below the mobilized experts in \naccordance with the provisions of the time schedule. \n \nIt will be appreciated if we can receive your response the soonest possible time. Your \nfavorable consideration hereof is highly appreciated. \n \n \nVery Truly Yours,  \n \n \nJONG HAK, KIM \nTeam Leader \nIDC Services for PGN Bridges Project, SEOYOUNG JV \n \nEnclosures :  \n 1. Mobilization of International Key Experts in Home (Korea) \n 2. Mobilization of International Non-Key Experts in Home (Korea) \n 3. Mobilization of Local Key Experts in Field (Philippines) \n 4. Mobilization of Local Non-Key Experts in Field (Philippines) \n 5. CVs of Experts \n \n \nSEOYOUNG JOINT VENTURE \n \n \n1. Mobilization of International Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nKIM, JONG HAK \nTeam Leader \nJan 05, 2025 \n2 \nLEE, SANG HEE \nBridge Structural Engineer \nJan 05, 2025 \n3 \nJANG, SEI CHANG \nBridge Analysis Engineer \nJan 05, 2025 \n4 \nLEE, JIN WOO \nBridge Foundation Engineer \nJan 05, 2025 \n5 \nLEE, KEUN HO \nBridge Seismic Engineer \nJan 05, 2025 \n6 \nKIM, YOUNG SOO \nBridge Engineer (Pylon) \nJan 05, 2025 \n7 \nSONG, HYE GUM \nBridge Engineer (Cable) \nJan 05, 2025 \n8 \nLEE, JAE SUNG \nBridge Engineer (Wind) \nJan 05, 2025 \n9 \nSHIN, GYOUNG SEOB \nGeotechnical Engineer \nJan 05, 2025 \n \n2. Mobilization of International Non-Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nKOH, JONG UP \nHighway Engineer \nJan 05, 2025 \n2 \nPARK, JAE JIN \nTraffic Analysis Specialist \nJan 05, 2025 \n3 \nSONG, YONG CHUL \nOffshore Engineer \nJan 05, 2025 \n4 \nHA, MIN KYU \nDrainage Design Engineer \nJan 05, 2025 \n5 \nJANG, MYUNG HEE \nGeologist \nJan 05, 2025 \n6 \nKIM, IK HWAN \nQuantity Engineer \nJan 05, 2025 \n \n3. Mobilization of Local Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nMark Anthony V. Apelo \nBridge Engineer (Analysis) \nJan 05, 2025 \n2 \nMelodina F. Tuano \nBridge Engineer (Substructure) \nJan 05, 2025 \n3 \nAurora T. Fabro \nBridge Engineer (Superstructure1) \nJan 05, 2025 \n4 \nRogelio T Sumbe \nBridge Engineer (Superstructure2) \nJan 05, 2025 \n5 \nGuillermo Gregorio A. Mina \nBridge Engineer (Foundation) \nJan 05, 2025 \n \n4. Mobilization of Local Non-Key Experts \nNo. \nName \nPosition \nActual Date of \nMobilization \n1 \nElvira G. Guirindola \nHighway Engineer 1 \nJan 05, 2025 \n2 \nDaniel S. Baptista \nHighway Engineer 2 \nJan 05, 2025 \n3 \nMario M. Quimboy \nQuantity Engineer 1 \nJan 05, 2025 \n4 \nAnaliza C. Bauda \nQuantity Engineer 2 \nJan 05, 2025 \n \n",
+    "generated": "```json\n{\n  \"공문 번호\": \"SYJV-250001\",\n  \"공문 일자\": \"Jan / 07 / 2025\",\n  \"수신처\": \"Department of Public Works and Highways\",\n  \"수신자\": \"Project Director\",\n  \"수신자 약자\": \"PD\",\n  \"발신처\": \"SEOYOUNG JOINT VENTURE\",\n  \"발신자\": \"Team Leader\",\n  \"발신자 약자\": \"TL\",\n  \"공문 제목\": \"Advanced Mobilization of Experts\",\n  \"공문 제목 요약\": \"전문가 사전 동원 보고\",\n  \"공문 내용 요약\": \"프로젝트에 필요한 전문가들이 사전 동원되었음을 알림\",\n  \"공문간 연계\": \"없음\",\n  \"공문 종류\": \"기술/성과물\",\n  \"공문 유형\": \"보고\",\n  \"첨부문서 제목\": [\n    \"Mobilization of International Key Experts in Home (Korea)\",\n    \"Mobilization of International Non-Key Experts in Home (Korea)\",\n    \"Mobilization of Local Key Experts in Field (Philippines)\",\n    \"Mobilization of Local Non-Key Experts in Field (Philippines)\",\n    \"CVs of Experts\"\n  ],\n  \"첨부문서 수\": 5\n}\n```",
+    "processed": {
+      "공문번호": "SYJV-250001",
+      "공문일자": "Jan / 07 / 2025",
+      "수신처": "Department of Public Works and Highways",
+      "수신자": "Project Director",
+      "수신자약자": "PD",
+      "발신처": "SEOYOUNG JOINT VENTURE",
+      "발신자": "Team Leader",
+      "발신자약자": "TL",
+      "공문제목": "Advanced Mobilization of Experts",
+      "공문제목요약": "전문가 사전 동원 보고",
+      "공문내용요약": "프로젝트에 필요한 전문가들이 사전 동원되었음을 알림",
+      "공문간연계": "없음",
+      "공문종류": "기술/성과물",
+      "공문유형": "보고",
+      "첨부문서제목": [
+        "Mobilization of International Key Experts in Home (Korea)",
+        "Mobilization of International Non-Key Experts in Home (Korea)",
+        "Mobilization of Local Key Experts in Field (Philippines)",
+        "Mobilization of Local Non-Key Experts in Field (Philippines)",
+        "CVs of Experts"
+      ],
+      "첨부문서수": 5
+    }
+  }
+]
--- a/src/static/html/extract_guide.html
+++ b/src/static/html/extract_guide.html
@@ -0,0 +1,83 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>📄 공문 추출·번역 API 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>📄 문서 추출·번역 API 가이드</h1>
+<p>
+🔹 아래는 <strong>/extract</strong> 계열 API에 프롬프트를 작성하고 사용하는 방법에 대한 안내입니다.
+</p>
+
+<h3>📌 사용 가능한 API 종류</h3>
+<P>
+ 🔹 <strong>/extract/inner</strong>: 내부 모델을 사용<br>
+ 🔹 <strong>/extract/outer</strong>: 외부 모델을 사용<br>
+ 🔹 <strong>/extract/all</strong>: 내부 + 외부 모델을 동시에 사용<br>
+ 🔹 <strong>/extract/structured</strong>: 고정된 JSON 필드로 정형 응답
+</p>
+
+<hr>
+
+<h2>✅ "/extract/inner", "/extract/outer", "/extract/all"</h2>
+<p>
+ 🔹 문서 추출 항목을 다양하게 변경하며 시도할 경우에 사용합니다.<br>
+ 🔹 해당 API의 업로드 파일은 2가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_extract_swagger.png" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>files</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>prompt_file</strong>: <span class="warn">(선택)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 프롬프트 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 프롬프트를 사용</li>
+        </ul>
+    </li>
+</ul>
+
+<p class="warn">Tip. 프롬프트 업로드⭕ 경우, <strong>"JSON으로 작성해주세요"</strong> 문구는 자동으로 삽입되므로 직접 <strong>작성할 필요가 없습니다.</strong><p>
+<p>→ 따라서, <strong>프롬프트 작성은 아래처럼 항목 설명만 작성</strong>하면 됩니다:</p>
+
+<code>  1. 공문번호: 문서 번호를 기입하세요.
+  2. 공문일자: 공문 발행일을 작성하세요.
+  3. 수신처: 수신 기관이나 부서명을 작성하세요.
+  4. 수신자: 수신자의 이름 또는 직책을 기입하세요.
+  ...</code>
+
+<hr>
+
+<h2>✅ "extract/structured"</h2>
+<p>
+ 🔹 문서 추출 항목을 고정하여 정해진 필드 형식으로 응답 받기 위해 사용합니다.<br>
+ 🔹 해당 API의 업로드 파일은 3가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_extract_structured_swagger.png" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>files</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>schema_file</strong>: <span class="warn">(선택)</span> 응답 구조를 정의한 스키마 파일(.json)을 업로드합니다
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 필드 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 필드를 사용</li>
+        </ul>
+    </li>    
+    <li><strong>prompt_file</strong>: <span class="warn">(선택)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.
+        <ul>
+            <li><strong>업로드⭕</strong>: 사용자 정의 프롬프트 사용</li>
+            <li><strong>업로드❌</strong>: 내부에 정의된 기본 프롬프트를 사용</li>
+        </ul>
+    </li>
+</ul>
+
+<p class="warn">※ schemna json 작성은 "Guide Book" 첫 번째인 "schema_file_guide"를 참고해주세요.</p>
+
+</body>
+</html>
--- a/src/static/html/extraction_structured_guide.html
+++ b/src/static/html/extraction_structured_guide.html
@@ -0,0 +1,32 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>📄 /extract/structured 프롬프트 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+    <h1>📄 /extract/structured 프롬프트 가이드</h1>
+    <p>아래는 <strong>/extract</strong> 계열 API에 프롬프트를 작성하고 사용하는 방법에 대한 안내입니다.</p>
+
+    <hr>
+
+    <h2>✅ 항목은 고정하되, 항목별 '지시문' 을 수정하고 싶은 경우</h2>
+    <h3>🖥️ 사용 API: <strong>/extract/structured</strong></h3>
+    <p>🔹 항목은 16개로 <strong>고정</strong>되어 있으며 <strong>추가/삭제/변경 불가</strong>합니다.</p>
+    <p>🔹 <strong>각 항목에 대한 '지시문' 설명만 작성</strong>할 수 있습니다.</p>
+
+    <code>
+1. 공문번호: 공문서 상단에 표기된 문서 번호를 추출합니다.
+2. 공문일자: 공문이 발행된 날짜를 추출합니다.
+3. 수신처: 문서를 수신하는 기관 또는 부서를 식별합니다.
+...
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
+    </code>
+</body>
+</html>
--- a/src/static/html/general_guide.html
+++ b/src/static/html/general_guide.html
@@ -0,0 +1,176 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>🧾 일반 추론 API 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>🧾 일반 추론 API 가이드</h1>
+<p>
+🔹 <strong>/general</strong> 계열 API를 활용하여 문서 기반 질문-응답 요약을 수행하는 방법을 안내합니다.<br>
+🔹 공문 외에 다양한 도메인에 적용 가능하며, 사용자는 <strong>URL(Markdwon)</strong> 또는 <strong>JSON</strong> 구조로 답변을 받습니다.
+</p>
+
+<h3>📌 사용 가능한 API 종류</h3>
+<p>
+🔹 <strong>/general/inner</strong>: 내부 모델을 사용하여 일반 요약 수행<br>
+🔹 <strong>/general/outer</strong>: 외부 모델(GPT, Claude, Gemini 등)을 사용하여 요약 수행
+</p>
+
+<hr>
+
+<h2>✅ 프롬프트 작성 예시</h2>
+<p>
+🔹 <strong>프롬프트 파일은 반드시 업로드</strong>해야 합니다.<br>
+🔹 [예시] 질문은 다음과 같이 구성할 수 있습니다:
+</p>
+
+<code>문서 분석
+
+[Q1] 이 문서의 주요 내용을 요약해주세요.
+
+[Q2] 발신자와 수신자 정보를 정리해주세요.
+
+[Q3] 문서에서 요청하는 주요 조치를 요약해주세요.
+
+[Q4] 날짜, 장소, 인명 등 주요 엔티티를 추출해주세요.
+
+[Q5] 이 문서의 목적이나 배경을 기술해주세요.
+</code>
+
+<hr>
+<h2>✅ Schema JSON 작성 예시</h2>
+<p>
+🔹 <strong>schema_file은 선택사항</strong>이며, JSON 형식으로 답변 받기 위해선 작성이 필요합니다.<br>
+🔹 추출이 필요한 항목과 항목의 답변을 정의할 때 사용합니다.<br>
+🔹 특수 항목은 <strong>enum</strong> 또는 <strong>type</strong> 값을 값정할 수 있습니다.
+</p>
+
+<pre>
+{
+    "title": "DocumentSummary",
+    "type": "object",
+    "properties": {
+        "공문번호": { "type": "string" },
+        "공문일자": { "type": "string" },
+        "수신체": { "type": "string" },
+        "수신자": { "type": "string" },
+        "수신자_약자": { "type": "string" },
+        "발신체": { "type": "string" },
+        "발신자": { "type": "string" },
+        "발신자_약자": { "type": "string" },
+        "공문제목": { "type": "string" },
+        "공문제목요약": { "type": "string" },
+        "공문내용요약": { "type": "string" },
+        "공문간연계": { "type": "string" },
+        "공문종류": {
+        "type": "string",
+        "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+        },
+        "공문유형": {
+        "type": "string",
+        "enum": ["보고", "요청", "지시", "회신", "계약"]
+        },
+        "첨부문서제목": { "type": "string" },
+        "첨부문서수": { "type": "integer" }
+    },
+    "required": [
+        "공문번호", "공문일자", "수신체", "수신자", "수신자_약자",
+        "발신체", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+        "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+    ]
+}</pre>
+<h3>📌 주요 키·속성 설명</h3>
+<p>🔹 위 JSON 예시는 <strong>Schema 구조</strong>를 정의하는 방식으로 작성되어 있으며, 각 키의 의미는 다음과 같습니다:</p>
+<ul>
+    <li><strong>title</strong>: 스키마의 이름 또는 제목을 정의합니다. 주로 문서나 데이터 객체의 이름을 지정하는 데 사용됩니다.<br>
+        [예시]: <strong>"title": "DocumentSummary"</strong> → 이 JSON은 DocumentSummary라는 이름의 구조입니다.</li>
+    <br>  
+    <li><strong>type</strong>: 이 JSON 구조 자체가 어떤 형태의 데이터인지 정의합니다.<br>
+        [예시]: <strong>"type": "object"</strong> → 이 스키마는 key-value 쌍으로 이루어진 객체(object)입니다.</li>
+    <br>
+    <li><strong>properties</strong>: 객체 안에 포함된 각 필드(속성)를 정의하는 부분입니다.<br>
+        이 안에는 각각의 필드 이름(key)과 해당 값의 <strong>type</strong> 및 <strong>enum</strong> 등 상세 정보가 포함됩니다.<br>
+        [예시]: <strong>"공문번호": { "type": "string" }</strong> → 공문번호는 문자열 타입이어야 함을 의미합니다.</li>
+    <br>
+    <ul>
+        <li><strong>type</strong>: 해당 값의 데이터 유형을 지정합니다. 주요 유형은 다음과 같습니다:
+            <ul>
+                <li><strong>string</strong>: 문자열 (예: "서울특별시")</li>
+                <li><strong>integer</strong>: 정수 (예: 3, 25)</li>
+                <li><strong>boolean</strong>: 참/거짓 값 (예: true, false)</li>
+            </ul>
+        </li>
+        <li><strong>enum</strong>: 해당 필드가 가질 수 있는 값을 목록으로 제한합니다. 지정된 값 외에는 허용되지 않습니다.
+            <br>[예시]: <strong>"공문종류": { "type": "string", "enum": ["행정/일반", "기술/성과물", "회의/기타"] }</strong>
+        </li>
+    </ul>
+    <br>
+    
+    <li><strong>required</strong>: 필수로 입력되어야 하는 항목들의 리스트입니다.<br>
+        이 배열에 나열된 필드가 누락될 경우, JSON이 유효하지 않은 것으로 간주됩니다.<br>
+        [예시]: <strong>"required": ["공문번호", "공문일자", ...]</strong> → 이 필드들은 반드시 포함되어야 합니다.</li>
+</ul>
+<p class="warn">Tip. schemna json을 사용하는 경우, <strong>프롬프트의 각 항목에 대한 지시문(description)을 각분으로 설정</strong>해주면 더 좋습니다.</p>
+<code>  1. 공문번호: 문서 번호를 기입하세요. (예시: Ref. No. SYJV-250031)
+  2. 공문일자: 공문 발행일을 작성하세요. (예시: Mar / 28 / 2025)
+  3. 수신처: 수신 기관이나 부서명을 작성하세요. (예시: Department of Public Works and Highways)
+  ...
+  16. 첨부문서수: 첨부문서제목을 바탕으로 문서의 개수를 작성하세요.
+</code>
+</body>
+</html>
+
+<hr>
+
+<h2>✅ 사용 절차 안내</h2>
+<p>
+🔹 해당 API에 업로드 가능한 파일은 3가지로 구성됩니다:
+</p>
+<img src="static/image/FastAPI_general.png" alt="FastAPI general 입력 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 API 첨부 파일 설명</h3>
+<ul>
+    <li><strong>input_file</strong>: <span class="warn">(필수)</span> PDF, 이미지 등 추론 대상 파일을 업로드합니다.</li>
+    <li><strong>prompt_file</strong>: <span class="warn">(필수)</span> 질문이 포함된 질문이 포함된 프롬프트 텍스트(.txt)를 업로드합니다.</li>
+    <li><strong>schema_file</strong>: <span class="warn">(선택)</span> 응답 구조를 정의한 스키마 파일(.json)을 업로드합니다.</li>
+</ul>
+
+<hr>
+
+<h2>1️⃣ Markdown 형식 응답 예시(schema file 미업로드)</h2>
+<p>
+🔹 모델은 질문에 대해 <strong>줄글 형식의 응답을 생성</strong>하며, 응답 JSON에는 다음 필드가 포함됩니다:
+</p>
+<img src="static/image/FastAPI_general_response.png" alt="FastAPI general 결과 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 주요 답변 키 설명</h3>
+<ul>
+    <li><strong>generated</strong>: 마크다운 형식의 응답 텍스트</li>
+    <li><strong>summary_html</strong>: 마크다운을 HTML로 변환하여 저장한 URL</li>
+    🔗<a href="http://172.16.10.176:8888/view/generated_html/Contract_for_Main_Office.html" target="_blank">
+    http://172.16.10.176:8888/view/generated_html/Contract_for_Main_Office.html
+    </a>
+</ul>
+<img src="static/image/FastAPI_general_result.png" alt="FastAPI general 결과 화면 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+
+<hr>
+
+<h2>2️⃣ 구조화 JSON 형식 응답 예시(schema file 업로드)</h2>
+<p>
+🔹 /general API에 <strong>schema_file</strong>을 함께 업로드한 경우, 모델은 지정된 JSON Schema에 따라 항목별 응답을 생성합니다.
+</p>
+<img src="static/image/FastAPI_general_JSONresult.png" alt="FastAPI structured 응답 예시" width="600" style="border: 2px solid #ccc; border-radius: 4px;"/>
+<h3>📌 주요 답변 키 설명</h3>
+<ul>
+    <li><strong>generated</strong>: JSON 구조의 응답 데이터</li>
+    <li><strong>processed</strong>: 구조화된 응답이므로 별도의 후처리는 생략되며, 안내 메시지만 포함됩니다.</li>
+</ul>
+<p class="warn">※ Claude 모델은 <strong>영문 필드명만 허용</strong>합니다.</p>
+
+</body>
+</html>
--- a/src/static/html/schema_file_guide.html
+++ b/src/static/html/schema_file_guide.html
@@ -0,0 +1,98 @@
+<!DOCTYPE html>
+<html lang="ko">
+<head>
+    <meta charset="UTF-8">
+    <title>🧾 스키마 파일 작성 가이드</title>
+    <style>
+        body { font-family: 'Arial', sans-serif; margin: 40px; line-height: 1.6; }
+        h1, h2 { color: #2c3e50; }
+        code, pre { background: #f4f4f4; padding: 10px; display: block; white-space: pre-wrap; border-left: 4px solid #3498db; }
+        .warn { color: #c0392b; font-weight: bold; }
+    </style>
+</head>
+<body>
+<h1>🧾 JSON Schema file 작성 가이드</h2>
+<p>
+🔹 JSON Schema는 AI 모델이 생성해야 할 <strong>응답의 구조를 정의</strong>할 때 사용됩니다.<br>
+🔹 schema_file을 설정하면 문서에서 추출해야 할 항목과 각 항목의 데이터 형식을 명확하게 지정할 수 있습니다.
+</p>
+<h3>📌 사용 되는 API 종류</h3>
+<p>
+🔹 <strong>/extract/structed</strong>
+</p>
+
+<hr>
+<h2>✅ Schema JSON 작성 예시</h2>
+<p>🔹 [예시] 공문 요약을 위한 JSON Schema 작성 예시입니다:</p>
+
+<pre>{
+  "title": "DocumentSummary",
+  "type": "object",
+  "properties": {
+    "공문번호": { "type": "string" },
+    "공문일자": { "type": "string" },
+    "수신체": { "type": "string" },
+    "수신자": { "type": "string" },
+    "수신자_약자": { "type": "string" },
+    "발신체": { "type": "string" },
+    "발신자": { "type": "string" },
+    "발신자_약자": { "type": "string" },
+    "공문제목": { "type": "string" },
+    "공문제목요약": { "type": "string" },
+    "공문내용요약": { "type": "string" },
+    "공문간연계": { "type": "string" },
+    "공문종류": {
+      "type": "string",
+      "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+    },
+    "공문유형": {
+      "type": "string",
+      "enum": ["보고", "요청", "지시", "회신", "계약"]
+    },
+    "첨부문서제목": { "type": "string" },
+    "첨부문서수": { "type": "integer" }
+  },
+  "required": [
+    "공문번호", "공문일자", "수신체", "수신자", "수신자_약자",
+    "발신체", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+    "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+  ]
+}</pre>
+
+<hr>
+
+<h3>📌 주요 키 설명</h3>
+<p>🔹 위 JSON 예시는 <strong>Schema 구조</strong>를 정의하는 방식으로 작성되어 있으며, 각 키의 의미는 다음과 같습니다:</p>
+<ul>
+  <li><strong>title</strong>: JSON 스키마의 이름 또는 제목을 정의합니다. 일반적으로 문서나 데이터 객체의 이름으로 사용됩니다.</li>
+  <li><strong>type</strong>: 이 JSON 전체 구조가 어떤 데이터 형태인지 지정합니다. 예: object, array, string 등.</li>
+  <li><strong>properties</strong>: 객체 내부에 포함된 각 항목(필드)을 정의하는 공간입니다. 각 항목에 대해 <strong>type</strong>이나 <strong>enum</strong>을 지정할 수 있습니다.</li>
+  <li><strong>required</strong>: 필수로 입력되어야 할 항목을 배열 형태로 나열합니다. 이 <strong>항목들이 누락되면 JSON 유효성 검사에서 실패</strong>하게 됩니다.</li>
+</ul>
+
+<hr>
+
+<h3>📌 필드 속성 설명</h3>
+<p>🔹 각 항목에 정의되는 <strong>type</strong>과 <strong>enum</strong>의 의미는 다음과 같습니다:</p>
+<ul>
+  <li><strong>type</strong>: 해당 필드의 데이터 유형을 명시합니다. 주요 유형은 다음과 같습니다:
+    <ul>
+      <li><strong>string</strong>: 문자열 값 (예: "서울특별시")</li>
+      <li><strong>integer</strong>: 정수 값 (예: 3, 25)</li>
+      <li><strong>boolean</strong>: 참/거짓 논리값 (예: true, false)</li>
+    </ul>
+  </li>
+  <li><strong>enum</strong>: 해당 항목이 가질 수 있는 값을 제한할 때 사용합니다. 배열로 허용 가능한 값을 정의하며, 그 외 값은 허용되지 않습니다.<br>
+    예: <strong>"공문종류"는 "행정/일반", "기술/성과물", "회의/기타" 중 하나여야 함</strong>
+  </li>
+</ul>
+
+<p class="warn">Tip. 프롬프트 작성 시 각 항목에 대한 <strong>지시문(description)</strong>을 따로 설정하면 AI 응답의 품질이 더욱 향상됩니다.</p>
+<code>  1. 공문번호: 문서 번호를 기입하세요. (예시: Ref. No. SYJV-250031)
+  2. 공문일자: 공문 발행일을 작성하세요. (예시: Mar / 28 / 2025)
+  3. 수신처: 수신 기관이나 부서명을 작성하세요. (예시: Department of Public Works and Highways)
+  ...
+  16. 첨부문서수: 첨부문서제목을 바탕으로 문서의 개수를 작성하세요.
+</code>
+</body>
+</html>
--- a/src/static/html/swagger_ui.html
+++ b/src/static/html/swagger_ui.html
@@ -0,0 +1,48 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+  <meta charset="UTF-8">
+  <title>HANMAC AI Cell</title>
+  <link rel="stylesheet" type="text/css" href="https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui.css" />
+  <link rel="icon" href="/static/image/favicon.ico" type="image/x-icon">
+  <style>
+    body {
+      margin: 0;
+      padding: 0;
+    }
+    #logo-bar {
+      display: flex;
+      align-items: center;
+      background-color: #f5f5f5;
+      padding: 10px 20px;
+      border-bottom: 1px solid #ccc;
+    }
+    #logo-bar img {
+      height: 40px;
+      margin-right: 15px;
+    }
+    #logo-bar h1 {
+      font-size: 20px;
+      font-weight: bold;
+    }
+  </style>
+</head>
+<body>
+  <!-- ✅ 로고 영역 -->
+  <div id="logo-bar">
+    <img src="/static/image/logo.png" alt="HANMAC Logo" />
+    <h1>HANMAC AI</h1>
+  </div>
+
+  <!-- Swagger UI -->
+  <div id="swagger-ui"></div>
+
+  <script src="https://cdn.jsdelivr.net/npm/swagger-ui-dist/swagger-ui-bundle.js?v=3"></script>
+  <script>
+    SwaggerUIBundle({
+      url: "/openapi.json",
+      dom_id: "#swagger-ui",
+    });
+  </script>
+</body>
+</html>
--- a/src/static/image/FastAPI_extract_structured_swagger.png
+++ b/src/static/image/FastAPI_extract_structured_swagger.png
--- a/src/static/image/FastAPI_extract_swagger.png
+++ b/src/static/image/FastAPI_extract_swagger.png
--- a/src/static/image/FastAPI_general.png
+++ b/src/static/image/FastAPI_general.png
--- a/src/static/image/FastAPI_general_JSONresult.png
+++ b/src/static/image/FastAPI_general_JSONresult.png
--- a/src/static/image/FastAPI_general_response.png
+++ b/src/static/image/FastAPI_general_response.png
--- a/src/static/image/FastAPI_general_result.png
+++ b/src/static/image/FastAPI_general_result.png
--- a/src/static/image/f-logo.png
+++ b/src/static/image/f-logo.png
--- a/src/static/image/favicon.ico
+++ b/src/static/image/favicon.ico
--- a/src/static/image/logo.png
+++ b/src/static/image/logo.png
--- a/src/static/prompt/d6c_test_prompt_eng.txt
+++ b/src/static/prompt/d6c_test_prompt_eng.txt
@@ -0,0 +1,30 @@
+Instructions:
+- Accuracy is critically important.
+- The reference number must be extracted only from the line that starts with "Ref. No."
+- For items marked as “Korean”, the answer must be written in Korean.
+- Output only the following 13 fields, no more, no less.
+- If the information is unknown, write "확인필요". If it is clearly missing, write "없음".
+
+1. 공문 번호: Extract only the "Ref. No." line in the format "ENG-NUM"
+2. 공문 일자: YYYY.MM.DD
+3. 수신자: Extract the job title of the recipient only
+4. 수신자 약자: Abbreviation of recipient's title
+5. 발신자: Extract the job title of the sender only from the signature block at the end of the document
+- Do not extract contact persons mentioned in the body
+- Do not include the organization names
+6.. 발신자 약자: Abbreviation of sender's title
+7. 공문 제목: Extract only the first line or the first bold phrase directly following the “Subject:” label, Do NOT include secondary lines or explanatory phrases, such as project names or descriptions.
+8. 공문 제목 요약: Write a 10–20 character summary in Korean
+9. 공문 내용 요약: Provide a brief summary in Korean   
+10. 공문간 연계: Write "있음" only if the content of the document explicitly mentions, references, or responds to another document, Do not infer linkage based on date similarity, numbering (e.g., “PH-00”), or reference format alone.
+If no explicit mention of another document is found in the content, write "없음"
+11. 공문 종류: Choose one of the following
+-행정/일반=for administrative topics such as personnel, dispatch, budget, contracts
+-기술/성과물=for technical discussions, schedules, deliverables, technical meetings
+12. 공문 유형: Choose one from
+-보고=One-way communication of facts or plans
+-요청=Requests or inquiries to the recipient
+-지시=Orders or commands from authority
+-회신=Replies or feedback to prior documents
+-계약=Official correspondence related to contract terms
+13. 첨부문서 수: Provide the number only
--- a/src/static/prompt/default_prompt_v0.1.txt
+++ b/src/static/prompt/default_prompt_v0.1.txt
@@ -0,0 +1,31 @@
+다음은 스캔된 문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+정확성이 매우 중요하므로 반드시 공문에 포함된 텍스트만 사용하여 작성해주세요.  
+
+다음 주어진 항목을 반드시 JSON 형식(```json)으로 작성해주세요:
+
+1. 공문 번호: 공문 번호로 Ref. No.를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: Ref. No. SYJV-250031)
+2. 공문 일자: 공문에 적혀 있는 날짜입니다. 번역하지 않고 그대로 표기해주세요. (예시: Mar / 28 / 2025)
+3. 수신처: 공문을 받는 사람이 속한 조직명 (예시: Department of Public Works and Highways)
+4. 수신자: 공문을 받은 사람의 직책 (예시: Project Director)
+5. 수신자(약자): 수신자 직책 약자 (예시: PD)
+6. 발신처: 공문을 보낸 사람이 속한 조직명 (예시: SEOYOUNG JOINT VENTURE)
+7. 발신자: 공문을 보낸 사람의 직책 (예시: Team Leader)
+8. 발신자(약자): 발신자 직책 약자 (예시: TL)
+9. 공문 제목: 공문의 제목으로 SUBJECT 의미합니다. 적당한 길이로 끊어야 하는데 윗 문장이 프로젝트 이름으로 판단되는 경우, 9.1 프로젝트 항목을 신설해 리턴 (예시: Submission of Comment Matrix for Design Deliverable)
+10. 공문 제목 요약: 공문 제목을 10~20자 사이로 요약해주세요. 반드시 한글로 작성합니다.
+11. 공문 내용 요약: 공문 내용을 요약해주세요. 반드시 한글로 작성합니다.
+12. 공문간 연계: 연계된 공문이 있으면 공문번호를 알려주세요. 공문번호만 필요합니다. 없는 경우는 없음으로 표기해주세요.
+13. 공문 종류: 공문 종류는 공문의 내용을 분석해서 다음 3가지 중 반드시 하나를 선택합니다.
+  * 행정/일반 – 인사, 파견, 조직, 비용(예산), 계약 등 경영/행정 관련 
+  * 기술/성과물 – 일정 협의, 작업계획, 성과물 제출, 기술적 업무 회의, 성과물 전달 등 
+  * 회의/기타 – 회의록 등 위에 내용 이외의 것
+14. 공문 유형: 공문 유형은 공문의 내용을 분석해서 다음의 5가지 중 반드시 하나를 선택합니다.
+  * 보고 : 완료된 사실이나 계획을 일방적으로 알리는 공문
+  * 요청 : 상대방의 행동 또는 답변을 유도하는 공문
+  * 지시 : 권한 있는 주체가 수행을 명령하는 공문
+  * 회신 : 기존 공무에 대해 응답하거나 의견을 제공하는 공문
+  * 계약 : 계약조건 변경과 관련된 공식 공문
+15. 첨부문서제목: 공문의 첨부 문서는 Enclosures: 를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: 1. Comment Matrix_4.4.2 Draft Detailed Engineer Design Report (Section A) )
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
+17. 번역본: 원문 본문 전체를 의미 왜곡 없이 한국어로 번역해 주세요. 원문이 이미 한국어라면 원문을 그대로 사용합니다. 고유명사/기관명/직책/Ref. No./날짜/첨부명 등은 원문 표기(대소문자·구두점 포함) 유지하고, 목록·번호·줄바꿈 등 서식은 가능한 한 보존하세요. OCR 하이픈 분리/비정상 줄바꿈은 자연스럽게 복구합니다.
--- a/src/static/prompt/i18n_test_prompt_kor.txt
+++ b/src/static/prompt/i18n_test_prompt_kor.txt
@@ -0,0 +1,24 @@
+주의:
+- **정확성이 매우 중요합니다.**
+- 한글로 작성하라고 명시된 항목은 반드시 한글로 작성해야 합니다.
+- 반드시 아래 **1~10번 항목만** 출력하며, 절대 누락하지 마세요.
+- 항목을 알 수 없으면 "확인필요", 항목이 문서에 존재하지 않으면 "없음"이라고 작성하세요.
+
+1. 공문 번호
+2. 공문 일자: YYYY.MM.DD 
+3. 수신자
+4. 발신자: 담당
+5. 공문 제목
+6. 공문 내용 요약: **한글로** 간단하게 요약
+7. 공문간 연계: 다른 공문과의 연관이 명시되어 있으면 "있음", 없으면 "없음"으로 작성
+8. 공문 종류: 아래 중 하나를 선택
+   - 행정/일반: 인사, 파견, 조직, 예산, 계약 등 행정 관련 내용
+   - 기술/성과물: 일정, 작업계획, 성과물 제출, 기술 업무 등
+9. 공문 유형: 아래 중 하나를 선택
+   - 보고: 완료된 사실이나 계획을 알리는 경우
+   - 요청: 상대방의 행동이나 응답을 요구하는 경우
+   - 지시: 권한 있는 주체가 수행을 명령하는 경우
+   - 회신: 기존 공문에 대한 응답이나 의견인 경우
+   - 계약: 계약 조건 변경과 관련된 공문
+10. 첨부문서 수: 숫자만 작성
+
--- a/src/static/prompt/structured_prompt_v0.1.txt
+++ b/src/static/prompt/structured_prompt_v0.1.txt
@@ -0,0 +1,29 @@
+다음은 스캔된 문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+정확성이 매우 중요하므로 반드시 공문에 포함된 텍스트만 사용하여 작성해주세요.  
+
+1. 공문 번호: 공문 번호로 Ref. No.를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: Ref. No. SYJV-250031)
+2. 공문 일자: 공문에 적혀 있는 날짜입니다. 번역하지 않고 그대로 표기해주세요. (예시: Mar / 28 / 2025)
+3. 수신처: 공문을 받는 사람이 속한 조직명 (예시: Department of Public Works and Highways)
+4. 수신자: 공문을 받은 사람의 직책 (예시: Project Director)
+5. 수신자(약자): 수신자 직책 약자 (예시: PD)
+6. 발신처: 공문을 보낸 사람이 속한 조직명 (예시: SEOYOUNG JOINT VENTURE)
+7. 발신자: 공문을 보낸 사람의 직책 (예시: Team Leader)
+8. 발신자(약자): 발신자 직책 약자 (예시: TL)
+9. 공문 제목: 공문의 제목으로 SUBJECT 의미합니다. 적당한 길이로 끊어야 하는데 윗 문장이 프로젝트 이름으로 판단되는 경우, 9.1 프로젝트 항목을 신설해 리턴 (예시: Submission of Comment Matrix for Design Deliverable)
+10. 공문 제목 요약: 공문 제목을 10~20자 사이로 요약해주세요. 반드시 한글로 작성합니다.
+11. 공문 내용 요약: 공문 내용을 요약해주세요. 반드시 한글로 작성합니다.
+12. 공문간 연계: 연계된 공문이 있으면 공문번호를 알려주세요. 공문번호만 필요합니다. 없는 경우는 없음으로 표기해주세요.
+13. 공문 종류: 공문 종류는 공문의 내용을 분석해서 다음 3가지 중 반드시 하나를 선택합니다.
+  * 행정/일반 – 인사, 파견, 조직, 비용(예산), 계약 등 경영/행정 관련 
+  * 기술/성과물 – 일정 협의, 작업계획, 성과물 제출, 기술적 업무 회의, 성과물 전달 등 
+  * 회의/기타 – 회의록 등 위에 내용 이외의 것
+14. 공문 유형: 공문 유형은 공문의 내용을 분석해서 다음의 5가지 중 반드시 하나를 선택합니다.
+  * 보고 : 완료된 사실이나 계획을 일방적으로 알리는 공문
+  * 요청 : 상대방의 행동 또는 답변을 유도하는 공문
+  * 지시 : 권한 있는 주체가 수행을 명령하는 공문
+  * 회신 : 기존 공무에 대해 응답하거나 의견을 제공하는 공문
+  * 계약 : 계약조건 변경과 관련된 공식 공문
+15. 첨부문서제목: 공문의 첨부 문서는 Enclosures: 를 의미합니다. 없는 경우는 없음으로 표기해주세요. (예시: 1. Comment Matrix_4.4.2 Draft Detailed Engineer Design Report (Section A) )
+16. 첨부문서수: 찾은 첨부문서 개수를 알려주세요.
+17. 번역본: 원문 본문 전체를 의미 왜곡 없이 한국어로 번역해 주세요. 원문이 이미 한국어라면 원문을 그대로 사용합니다. 고유명사/기관명/직책/Ref. No./날짜/첨부명 등은 원문 표기(대소문자·구두점 포함) 유지하고, 목록·번호·줄바꿈 등 서식은 가능한 한 보존하세요. OCR 하이픈 분리/비정상 줄바꿈은 자연스럽게 복구합니다.
--- a/src/static/structured_schema.json
+++ b/src/static/structured_schema.json
@@ -0,0 +1,34 @@
+{
+    "title": "DocumentSummary",
+    "type": "object",
+    "properties": {
+      "공문번호": { "type": "string" },
+      "공문일자": { "type": "string" },
+      "수신처": { "type": "string" },
+      "수신자": { "type": "string" },
+      "수신자_약자": { "type": "string" },
+      "발신처": { "type": "string" },
+      "발신자": { "type": "string" },
+      "발신자_약자": { "type": "string" },
+      "공문제목": { "type": "string" },
+      "공문제목요약": { "type": "string" },
+      "공문내용요약": { "type": "string" },
+      "공문간연계": { "type": "string" },
+      "공문종류": {
+        "type": "string",
+        "enum": ["행정/일반", "기술/성과물", "회의/기타"]
+      },
+      "공문유형": {
+        "type": "string",
+        "enum": ["보고", "요청", "지시", "회신", "계약"]
+      },
+      "첨부문서제목": { "type": "string" },
+      "첨부문서수": { "type": "integer" }
+    },
+    "required": [
+      "공문번호", "공문일자", "수신처", "수신자", "수신자_약자",
+      "발신처", "발신자", "발신자_약자", "공문제목", "공문제목요약",
+      "공문내용요약", "공문종류", "공문유형", "첨부문서제목", "첨부문서수"
+    ]
+  }
+  
--- a/src/static/sw.js
+++ b/src/static/sw.js
@@ -0,0 +1 @@
+// This is a placeholder file to prevent 404 errors from browsers trying to fetch a service worker.
--- a/src/utils/init.py
+++ b/src/utils/init.py
--- a/src/utils/checking_files.py
+++ b/src/utils/checking_files.py
@@ -0,0 +1,58 @@
+import os
+from io import BytesIO
+
+import tiktoken
+from fastapi import HTTPException, UploadFile
+
+from config.setting import ALLOWED_EXTENSIONS
+
+
+def validate_all_files(*upload_files: UploadFile):
+    for upload_file in upload_files:
+        if not upload_file:
+            continue
+
+        _, ext = os.path.splitext(upload_file.filename.lower())
+
+        if ext not in ALLOWED_EXTENSIONS:
+            raise HTTPException(
+                status_code=400,
+                detail=(
+                    f"파일 '{upload_file.filename}'은(는) 지원하지 않는 확장자입니다. "
+                    f"허용된 확장자는 {', '.join(ALLOWED_EXTENSIONS)} 입니다."
+                ),
+            )
+
+
+def token_counter(prompt: str, text: str) -> int:
+    try:
+        enc = tiktoken.get_encoding("cl100k_base")  # OpenAI 기반 tokenizer
+        token_count = len(enc.encode(prompt + text))
+    except Exception:
+        token_count = len(prompt + text) // 4  # fallback: 대략적 추정
+    return token_count
+
+
+# ✅ UploadFile을 대신할 수 있는 간단한 래퍼 클래스
+class SimpleUploadFile:
+    def __init__(
+        self,
+        filename: str,
+        content: bytes,
+        content_type: str = "application/octet-stream",
+    ):
+        self.filename = filename
+        self.file = BytesIO(content)
+        self.content_type = content_type
+
+
+# ✅ UploadFile 객체 복사 → SimpleUploadFile로 변환
+def clone_upload_file(upload_file: UploadFile) -> SimpleUploadFile:
+    file_bytes = upload_file.file.read()
+    upload_file.file.seek(0)
+
+    return SimpleUploadFile(
+        filename=upload_file.filename,
+        content=file_bytes,
+        content_type=upload_file.content_type,
+    )
--- a/src/utils/checking_keys.py
+++ b/src/utils/checking_keys.py
@@ -0,0 +1,73 @@
+import logging
+import os
+
+from dotenv import load_dotenv
+from fastapi import HTTPException, Security
+from fastapi.security import APIKeyHeader
+from snowflake import SnowflakeGenerator
+
+from services.api_key_service import validate_api_key
+
+logger = logging.getLogger(__name__)
+load_dotenv()
+
+# .env 파일에서 관리자 API 키를 로드
+ADMIN_API_KEY = os.getenv("ADMIN_API_KEY")
+
+# 헤더 설정
+api_key_header = APIKeyHeader(name="X-API-KEY", auto_error=False, description="Client-specific API Key")
+admin_api_key_header = APIKeyHeader(name="X-Admin-KEY", auto_error=False, description="Key for administrative tasks")
+
+
+def get_api_key(api_key: str = Security(api_key_header)):
+    """요청 헤더의 X-API-KEY가 유효한지 Redis를 통해 검증합니다."""
+    if not validate_api_key(api_key):
+        logger.warning(f"유효하지 않은 API 키로 접근 시도: {api_key}")
+        raise HTTPException(status_code=401, detail="Invalid or missing API Key")
+    return api_key
+
+
+def get_admin_key(admin_key: str = Security(admin_api_key_header)):
+    """관리자용 API 키를 검증합니다."""
+    if not ADMIN_API_KEY:
+        logger.error("ADMIN_API_KEY가 서버에 설정되지 않았습니다. 관리자 API를 사용할 수 없습니다.")
+        raise HTTPException(status_code=500, detail="Server configuration error")
+
+    if not admin_key or admin_key != ADMIN_API_KEY:
+        logger.warning("유효하지 않은 관리자 키로 관리 API 접근 시도.")
+        raise HTTPException(status_code=403, detail="Not authorized for this operation")
+    return admin_key
+
+
+class APIKeyLoader:
+    @staticmethod
+    def load_gemini_key() -> str:
+        key = os.getenv("GEMINI_API_KEY")
+        if not key:
+            logger.error("GEMINI_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("GEMINI_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+    @staticmethod
+    def load_claude_key() -> str:
+        key = os.getenv("ANTHROPIC_API_KEY")
+        if not key:
+            logger.error("ANTHROPIC_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("ANTHROPIC_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+    @staticmethod
+    def load_gpt_key() -> str:
+        key = os.getenv("OPENAI_API_KEY")
+        if not key:
+            logger.error("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
+            raise ValueError("OPENAI_API_KEY 환경 변수가 설정되지 않았습니다.")
+        return key
+
+
+def create_key(node: int = 1) -> str:
+    """
+    Snowflake 알고리즘 기반 고유 키 생성기 (request_id용)
+    """
+    generator = SnowflakeGenerator(node)
+    return str(next(generator))
--- a/src/utils/custom_router.py
+++ b/src/utils/custom_router.py
@@ -0,0 +1,70 @@
+import json
+import time
+from typing import Callable
+
+from fastapi import APIRouter, Request, Response
+from fastapi.responses import JSONResponse
+from fastapi.routing import APIRoute
+
+from config.setting import APP_VERSION
+
+
+class TimedRoute(APIRoute):
+    def get_route_handler(self) -> Callable:
+        original_route_handler = super().get_route_handler()
+
+        async def custom_route_handler(request: Request) -> Response:
+            start = time.perf_counter()
+            response: Response = await original_route_handler(request)
+            duration = time.perf_counter() - start
+
+            # JSON 응답만 처리
+            if getattr(response, "media_type", None) == "application/json":
+                body_bytes = b""
+
+                # 1) 스트리밍 응답이면 모두 수집
+                body_iter = getattr(response, "body_iterator", None)
+                if body_iter is not None:
+                    async for chunk in body_iter:
+                        body_bytes += chunk
+                else:
+                    # 일반 응답
+                    body_bytes = getattr(response, "body", b"")
+
+                # 2) 파싱 시도
+                try:
+                    text = body_bytes.decode("utf-8") if body_bytes else ""
+                    payload = json.loads(text) if text else None
+                except Exception:
+                    # 파싱 실패: 바디/길이 불일치 위험 없도록 원본 그대로 반환
+                    return response
+
+                # 3) dict일 때만 필드 주입
+                if isinstance(payload, dict):
+                    payload.setdefault("app_version", APP_VERSION)
+                    payload.setdefault("process_time", f"{duration:.4f}")
+
+                # 4) 새 JSONResponse로 재구성 (Content-Length 자동 일치)
+                #    원본 상태코드/헤더/미디어타입 유지
+                new_headers = dict(response.headers)
+                # 압축/길이 관련 헤더는 제거(재계산되도록)
+                for h in ("content-length", "Content-Length", "content-encoding", "Content-Encoding"):
+                    new_headers.pop(h, None)
+
+                return JSONResponse(
+                    content=payload,
+                    status_code=response.status_code,
+                    headers=new_headers,
+                    media_type=response.media_type,
+                )
+
+            # JSON 아니라면 바디 불문, 원본 그대로
+            return response
+
+        return custom_route_handler
+
+
+class CustomAPIRouter(APIRouter):
+    def __init__(self, *args, **kwargs):
+        super().__init__(*args, **kwargs)
+        self.route_class = TimedRoute
--- a/src/utils/fileio.py
+++ b/src/utils/fileio.py
@@ -0,0 +1,40 @@
+import inspect
+from typing import Any
+
+
+async def read_any_file(obj: Any) -> bytes:
+    if obj is None:
+        return b""
+
+    # 1) Starlette UploadFile (async read)
+    try:
+        from starlette.datastructures import UploadFile as StarletteUploadFile
+    except Exception:
+        StarletteUploadFile = None
+
+    if StarletteUploadFile and isinstance(obj, StarletteUploadFile):
+        return await obj.read()
+
+    # 2) 객체에 read()가 있으면 호출 (sync/async 모두 처리)
+    if hasattr(obj, "read"):
+        data = obj.read()
+        if inspect.isawaitable(data):
+            data = await data
+        return data
+
+    # 3) .file.read() 패턴
+    if hasattr(obj, "file") and hasattr(obj.file, "read"):
+        return obj.file.read()
+
+    # 4) .content 속성 지원(SimpleUploadFile 류)
+    if hasattr(obj, "content"):
+        data = obj.content
+        return data if isinstance(data, (bytes, bytearray)) else str(data).encode("utf-8", "replace")
+
+    # 5) 이미 bytes/str
+    if isinstance(obj, (bytes, bytearray)):
+        return bytes(obj)
+    if isinstance(obj, str):
+        return obj.encode("utf-8", "replace")
+
+    raise TypeError(f"Unsupported file-like type: {type(obj)}")
--- a/src/utils/image_converter.py
+++ b/src/utils/image_converter.py
@@ -0,0 +1,31 @@
+import io
+from pathlib import Path
+from typing import List
+
+import httpx
+
+
+async def prepare_images_from_file(file_url: str, filename: str, max_pages: int = 5, dpi: int = 180) -> List[bytes]:
+    """presigned URL → bytes. PDF이면 앞쪽 max_pages 페이지만 이미지로 변환하여 bytes 리스트 반환"""
+    async with httpx.AsyncClient() as client:
+        resp = await client.get(file_url, timeout=None)
+        resp.raise_for_status()
+        file_bytes = resp.content
+
+    ext = Path(filename).suffix.lower()
+    if ext in [".pdf", ".tif", ".tiff"]:
+        try:
+            from pdf2image import convert_from_bytes
+        except ImportError as e:
+            raise RuntimeError("pdf2image가 필요합니다. `pip install pdf2image poppler-utils`") from e
+
+        pil_images = convert_from_bytes(file_bytes, dpi=dpi)
+        images = []
+        for i, im in enumerate(pil_images[:max_pages]):
+            buf = io.BytesIO()
+            im.save(buf, format="PNG")
+            images.append(buf.getvalue())
+        return images
+    else:
+        # 단일 이미지
+        return [file_bytes]
--- a/src/utils/logging_utils.py
+++ b/src/utils/logging_utils.py
@@ -0,0 +1,143 @@
+import json
+import logging
+from datetime import datetime
+
+import redis
+from fastapi import Depends, Request
+
+from config.setting import PGN_REDIS_DB, PGN_REDIS_HOST, PGN_REDIS_PORT
+from utils.checking_keys import get_api_key
+from utils.request_utils import get_client_ip, get_swagger_port
+
+logger = logging.getLogger(__name__)
+
+redis_client = redis.Redis(host=PGN_REDIS_HOST, port=PGN_REDIS_PORT, db=PGN_REDIS_DB, decode_responses=True)
+
+
+def log_user_request(
+    request_info: str,
+    endpoint: str,
+    input_filename: str,
+    llm_model: str,
+    ocr_model: str,
+    prompt_filename: str,
+    context_length: int,
+    api_key: str,
+):
+    client_ip = get_client_ip(request_info)
+    swagger_port = get_swagger_port(request_info)
+
+    # ✅ Loki용 JSON 로그 출력
+    logger.info(
+        json.dumps(
+            {
+                "ip": client_ip,
+                "swagger_port": swagger_port,
+                "endpoint": endpoint,
+                "input_filename": input_filename,
+                "prompt_filename": prompt_filename,
+                "llm_model": llm_model,
+                "ocr_model": ocr_model,
+                "context_length": context_length,
+                "api_key": api_key,
+                "event": "inference_log",
+            }
+        )
+    )
+
+
+def log_generation_info(custom_mode: bool, user_prompt: str = ""):
+    logger.info(f"[GENERATE-PROMPT-USED] 사용자 정의 프롬프트 사용유무: {custom_mode}")
+
+    if custom_mode:
+        logger.info(f"[GENERATE-USER-PROMPT]\n{user_prompt}")
+    else:
+        logger.info("[GENERATE-DEFAULT-PROMPT] Default_prompt")
+
+
+def log_pipeline_status(request_id: str, status_message: str, step_info: dict = None):
+    log_entry = {
+        "status": status_message,
+        "timestamp": datetime.now().isoformat(),
+        "step_info": step_info,
+    }
+    redis_client.rpush(f"pipeline_status:{request_id}", json.dumps(log_entry))
+
+
+def ns_to_sec(ns: int) -> float:
+    """나노초를 초로 변환"""
+    return round(ns / 1e9, 3)  # 소수점 3자리
+
+
+def log_ollama_stats(res: dict):
+    """Ollama 응답 JSON 내 추론 통계를 한 줄 JSON 로그로 출력 (초 단위 변환 + token/s 포함)"""
+    # 원본 값
+    total_duration = res.get("total_duration")
+    load_duration = res.get("load_duration")
+    prompt_eval_count = res.get("prompt_eval_count")
+    prompt_eval_duration = res.get("prompt_eval_duration")
+    eval_count = res.get("eval_count")
+    eval_duration = res.get("eval_duration")
+
+    # 초 단위로 변환
+    stats = {
+        "model": res.get("model"),
+        "total_duration_ns": total_duration,
+        "total_duration_sec": ns_to_sec(total_duration),
+        "load_duration_ns": load_duration,
+        "load_duration_sec": ns_to_sec(load_duration),
+        "prompt_eval_count": prompt_eval_count,
+        "prompt_eval_duration_ns": prompt_eval_duration,
+        "prompt_eval_duration_sec": ns_to_sec(prompt_eval_duration),
+        "eval_count": eval_count,
+        "eval_duration_ns": eval_duration,
+        "eval_duration_sec": ns_to_sec(eval_duration),
+    }
+
+    # token/s 계산
+    if eval_count and eval_duration:
+        stats["generation_speed_tok_per_sec"] = round(eval_count / (eval_duration / 1e9), 2)
+
+    logger.info("[OLLAMA-STATS] " + json.dumps(stats, ensure_ascii=False))
+
+
+class EndpointLogger:
+    def __init__(self, request: Request, api_key: str = Depends(get_api_key)):
+        self.request = request
+        self.api_key = api_key
+
+    def log(
+        self,
+        ocr_model: str = "N/A",
+        llm_model: str = "N/A",
+        input_filename: str = "N/A",
+        prompt_filename: str = "N/A",
+        context_length: int = 0,
+    ):
+        try:
+            log_user_request(
+                request_info=self.request,
+                endpoint=self.request.url.path,
+                input_filename=input_filename,
+                llm_model=llm_model,
+                ocr_model=ocr_model,
+                prompt_filename=prompt_filename,
+                context_length=context_length,
+                api_key=self.api_key,
+            )
+        except Exception as e:
+            logger.warning(f"Failed to log request for endpoint {self.request.url.path}: {e}")
+
+
+class HealthCheckFilter(logging.Filter):
+    def filter(self, record: logging.LogRecord) -> bool:
+        # The access log record for uvicorn has the data in `args`.
+        # record.args = (client_addr, method, path, http_version, status_code)
+        # e.g. ('127.0.0.1:37894', 'GET', '/health/API', '1.1', 200)
+        if isinstance(record.args, tuple) and len(record.args) == 5:
+            method = record.args[1]
+            path = record.args[2]
+            status_code = record.args[4]
+            if method == "GET" and isinstance(path, str) and path.startswith("/health") and status_code == 200:
+                return False
+        return True
--- a/src/utils/minio_utils.py
+++ b/src/utils/minio_utils.py
@@ -0,0 +1,155 @@
+import io
+import json
+import logging
+from datetime import timedelta
+from typing import Optional
+
+from fastapi import UploadFile
+from minio import Minio
+from minio.error import S3Error
+
+from config.setting import (
+    MINIO_ACCESS_KEY,
+    MINIO_ENDPOINT,
+    MINIO_RESULTS_BUCKET_NAME,
+    MINIO_SECRET_KEY,
+)
+
+# MinIO 클라이언트 전역 생성
+minio_client = Minio(
+    MINIO_ENDPOINT,
+    access_key=MINIO_ACCESS_KEY,
+    secret_key=MINIO_SECRET_KEY,
+    secure=False,
+)
+
+logger = logging.getLogger(__name__)
+
+
+def get_minio_client():
+    """
+    MinIO 클라이언트를 반환합니다. 연결 확인을 위해 list_buckets() 호출로 테스트합니다.
+    """
+    try:
+        client = Minio(
+            MINIO_ENDPOINT,
+            access_key=MINIO_ACCESS_KEY,
+            secret_key=MINIO_SECRET_KEY,
+            secure=False,  # HTTPS 사용 여부에 맞게 설정
+        )
+
+        # ✅ 연결 테스트 (버킷 목록 조회)
+        client.list_buckets()
+
+        return client
+    except Exception as e:
+        raise RuntimeError(f"MinIO 연결 실패: {e}")
+
+
+def save_result_to_minio(result_dict: dict, object_name: str) -> str:
+    """
+    결과 JSON(dict)을 BytesIO로 인코딩하여 MinIO에 저장하고 presigned URL 반환
+    """
+    try:
+        # JSON -> BytesIO
+        result_bytes = io.BytesIO(json.dumps(result_dict, ensure_ascii=False).encode("utf-8"))
+        result_bytes.seek(0)
+
+        # MinIO에 업로드
+        minio_client.put_object(
+            bucket_name=MINIO_RESULTS_BUCKET_NAME,
+            object_name=object_name,
+            data=result_bytes,
+            length=result_bytes.getbuffer().nbytes,
+            content_type="application/json",
+        )
+
+        # presigned URL 생성
+        presigned_url = minio_client.presigned_get_object(
+            MINIO_RESULTS_BUCKET_NAME,
+            object_name,
+        )
+        return presigned_url
+
+    except Exception as e:
+        logger.error(f"❌ MinIO 작업 실패: {e}")
+        raise
+
+
+def upload_file_to_minio_v2(file: UploadFile, bucket_name: str, object_name: str) -> str:
+    """
+    파일을 MinIO에 업로드하고, presigned URL을 반환합니다.
+
+    Args:
+        file (UploadFile): FastAPI의 UploadFile 객체
+        bucket_name (str): 업로드할 버킷 이름
+        object_name (str): 저장될 객체 이름 (경로 포함 가능)
+
+    Returns:
+        str: 생성된 presigned URL
+    """
+    try:
+        # 1. 버킷 존재 확인 및 생성
+        found = minio_client.bucket_exists(bucket_name)
+        if not found:
+            minio_client.make_bucket(bucket_name)
+            logger.info(f"✅ 버킷 '{bucket_name}' 생성 완료.")
+
+        # 2. 파일 업로드
+        file.file.seek(0)  # 파일 포인터를 처음으로 이동
+        minio_client.put_object(
+            bucket_name,
+            object_name,
+            file.file,
+            length=-1,  # 파일 크기를 모를 때 -1로 설정
+            part_size=10 * 1024 * 1024,  # 10MB 단위로 청크 업로드
+        )
+        logger.info(f"✅ '{object_name}' -> '{bucket_name}' 업로드 성공.")
+
+        # 3. Presigned URL 생성
+        presigned_url = minio_client.presigned_get_object(
+            bucket_name,
+            object_name,
+            expires=timedelta(days=7),  # URL 만료 기간 (예: 7일, 필요에 따라 조절 가능)
+        )
+        logger.info(f"✅ Presigned URL 생성 완료: {presigned_url}")
+
+        return presigned_url
+
+    except Exception as e:
+        logger.error(f"❌ MinIO 작업 실패: {e}")
+        raise  # 실패 시 예외를 다시 발생시켜 호출 측에서 처리하도록 함
+
+
+def fetch_result_from_minio(request_id: str) -> Optional[dict]:
+    try:
+        # MinIO에서 객체 목록 가져오기 (폴더 내 전체 파일 조회)
+        objects = minio_client.list_objects(
+            bucket_name=MINIO_RESULTS_BUCKET_NAME,
+            prefix=f"{request_id}/",
+            recursive=True,
+        )
+
+        json_obj = next((obj for obj in objects if obj.object_name.endswith(".json")), None)
+        if not json_obj:
+            logger.warning(f"[MINIO] request_id: {request_id} 경로에 .json 파일이 존재하지 않습니다.")
+            return None
+
+        object_name = json_obj.object_name
+        print(f"[MINIO] request_id: {request_id}에 대한 결과를 가져옵니다. 대상 파일: {object_name}")
+        # 객체 다운로드
+        response = minio_client.get_object(MINIO_RESULTS_BUCKET_NAME, object_name)
+        content = response.read()
+
+        # JSON 디코드
+        result_dict = json.loads(content.decode("utf-8"))
+
+        logger.info(f"[MINIO] 결과 JSON 로드 성공: {object_name}")
+        return result_dict
+
+    except S3Error as e:
+        logger.error(f"[MINIO] S3Error 발생: {e}")
+        return None
+    except Exception as e:
+        logger.error(f"[MINIO] 기타 오류 발생: {e}")
+        return None
--- a/src/utils/redis_utils.py
+++ b/src/utils/redis_utils.py
@@ -0,0 +1,23 @@
+# utils/redis_utils.py
+
+import redis
+
+from config.setting import PGN_REDIS_DB, PGN_REDIS_HOST, PGN_REDIS_PORT
+
+
+def get_redis_client():
+    """
+    Redis 클라이언트를 반환합니다. decode_responses=True 설정으로 문자열을 자동 디코딩합니다.
+    """
+    try:
+        redis_client = redis.Redis(
+            host=PGN_REDIS_HOST,
+            port=PGN_REDIS_PORT,
+            db=PGN_REDIS_DB,
+            decode_responses=True,
+        )
+        # 연결 확인 (ping)
+        redis_client.ping()
+        return redis_client
+    except redis.ConnectionError as e:
+        raise RuntimeError(f"Redis 연결 실패: {e}")
--- a/src/utils/request_utils.py
+++ b/src/utils/request_utils.py
@@ -0,0 +1,27 @@
+from fastapi import Request
+
+
+# 🔽 사용자 IP 확인
+def get_client_ip(request: Request) -> str:
+    xff = request.headers.get("X-Forwarded-For")
+    if xff:
+        return xff.split(",")[0].strip()  # 첫 번째 값(실제 클라이언트 IP)
+    xri = request.headers.get("X-Real-IP")
+    if xri:
+        return xri  # Nginx가 전달한 원래 클라이언트 IP
+    return request.client.host  # 마지막 fallback (프록시/NAT IP)
+
+
+# 🔽 요청 SWAGGER 포트 확인
+def get_swagger_port(request: Request) -> str:
+    # 우선순위: X-Forwarded-Port → request.url.port → Host 헤더 → 기본 포트 추정
+    port = request.headers.get("X-Forwarded-Port")
+    if port:
+        return port
+    if request.url.port:
+        return str(request.url.port)
+    host_header = request.headers.get("host")
+    if host_header and ":" in host_header:
+        return host_header.split(":")[1]
+    # 마지막으로 기본 포트(HTTPS 443, HTTP 80) 추정
+    return "443" if request.headers.get("X-Forwarded-Proto") == "https" else "80"
--- a/src/utils/text_formatter.py
+++ b/src/utils/text_formatter.py
@@ -0,0 +1,16 @@
+class PromptFormatter:
+    SYSTEM_PROMPT = """
+다음은 스캔된 공문서에서 OCR로 추출된 원시 텍스트입니다.  
+오타나 줄바꿈 오류가 있을 수 있으니 의미를 유추하여 정확한 정보를 추출해주세요.
+
+다음 주어진 항목을 JSON 형식(```json)으로 작성해주세요:
+"""
+
+    @staticmethod
+    def format(text: str, user_prompt: str = None, custom_mode: bool = False, prompt_mode: str = "extract") -> str:
+        if custom_mode and prompt_mode == "extract":
+            return (
+                f"{PromptFormatter.SYSTEM_PROMPT}\n\n{user_prompt}\n\n다음은 OCR로 추출된 원시 텍스트입니다:\n\n{text}"
+            )
+        else:
+            return f"{user_prompt}\n\n다음은 OCR로 추출된 원시 텍스트입니다:\n\n{text}"
--- a/src/utils/text_generator.py
+++ b/src/utils/text_generator.py
@@ -0,0 +1,437 @@
+import copy
+import json
+import logging
+from collections import OrderedDict
+from typing import Optional
+
+import anthropic
+import google.generativeai as genai
+import requests
+from anthropic._exceptions import BadRequestError, OverloadedError
+from fastapi import HTTPException
+from google.api_core.exceptions import ResourceExhausted
+from openai import OpenAI
+
+from .checking_keys import APIKeyLoader
+from .logging_utils import log_generation_info, log_ollama_stats
+from .text_formatter import PromptFormatter
+
+logger = logging.getLogger(__name__)
+
+
+# ✅ 1. Ollama Gen
+class OllamaGenerator:
+    def __init__(self, model="gemma3:27b", api_url="http://pgn_ollama_gemma:11534/api/generate"):
+        self.model = model
+        self.api_url = api_url
+
+    # ✅ 1-1. Gen-General
+    def generate(self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        # /no_think 자동 부착
+        if "qwen" in self.model.lower():
+            prompt += " /no_think"
+
+        payload = {"model": self.model, "prompt": prompt, "stream": False}
+        try:
+            response = requests.post(self.api_url, json=payload)
+            response.raise_for_status()
+
+            res = response.json()
+            if "response" not in res:
+                raise ValueError("[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다.")
+
+            log_ollama_stats(res)
+
+            return res["response"], self.model, self.api_url
+        # ☑️ GEMINI API 초과 시, exception
+        except Exception as e:
+            logger.error(f"[OLLAMA-ERROR] 서버 연결 실패: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Ollama 서빙 서버에 연결할 수 없습니다.\n서버가 실행 중인지 확인하세요.",
+            )
+
+    # ✅ 1-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        payload = {
+            "model": self.model,
+            "prompt": prompt,
+            "format": schema_override,
+            "stream": False,
+        }
+
+        response = requests.post(self.api_url, json=payload)
+        response.raise_for_status()
+
+        res = response.json()
+        if "response" not in res:
+            raise ValueError("[GENERATE-OLLAMA-ERROR] LLM 응답에 'response' 키가 없습니다.")
+
+        # ✅ 추론 통계 정보 로그 추가
+        log_ollama_stats(res)
+
+        # ✅ 클래스 검증 제거 → JSON 파싱만 수행
+        try:
+            structured = json.loads(res["response"])
+            return structured, self.model, self.api_url
+        except json.JSONDecodeError as e:
+            logger.error(f"[PARSE-ERROR] LLM 응답이 JSON으로 파싱되지 않음: {e}")
+            raise ValueError("LLM 응답이 JSON 형식이 아닙니다.")
+
+
+# ✅ 2. Gemini Gen
+class GeminiGenerator:
+    def __init__(self, model="gemini-2.5-pro-exp-03-25"):
+        self.api_key = APIKeyLoader.load_gemini_key()
+        genai.configure(api_key=self.api_key)
+        self.model = genai.GenerativeModel(model)
+
+    def clean_schema_for_gemini(self, schema: dict) -> dict:
+        # Gemini는 title 등 일부 필드를 허용하지 않음
+        cleaned = dict(schema)  # shallow copy
+        cleaned.pop("title", None)
+        cleaned.pop("$schema", None)
+        # 기타 필요 시 추가 제거
+        return cleaned
+
+    # ✅ 2-1. Gen-General
+    def generate(self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.model.generate_content(prompt)
+
+            if not response.text:
+                raise ValueError("[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다.")
+            return (
+                response.text,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        # ☑️ GEMINI API 초과 시, exception
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+    # ✅ 2-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        response_schema = self.clean_schema_for_gemini(schema_override) if schema_override else None
+
+        try:
+            response = self.model.generate_content(
+                contents=prompt,
+                generation_config=genai.GenerationConfig(
+                    response_mime_type="application/json",
+                    response_schema=response_schema,
+                ),
+            )
+
+            if not response.text:
+                raise ValueError("❌ Gemini 응답에서 구조화된 데이터를 파싱하지 못했습니다.")
+
+            parsed = json.loads(response.text)
+            if isinstance(parsed, list) and isinstance(parsed[0], dict):
+                structured = parsed[0]
+
+            elif isinstance(parsed, dict):
+                structured = parsed
+
+            elif isinstance(parsed, list) and isinstance(parsed[0], str):
+                structured = json.loads(parsed[0])
+
+            else:
+                raise ValueError("❌ 응답 형식이 예상과 다릅니다.")
+
+            # ✅ 필드 순서 정렬
+            if schema_override and "properties" in schema_override:
+                ordered_keys = list(schema_override["properties"].keys())
+                structured = OrderedDict((key, structured.get(key)) for key in ordered_keys)
+
+            return (
+                structured,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        # ☑️ GEMINI API 초과 시, exception
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-STRUCTURED-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="'Gemini' 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+        except json.JSONDecodeError as e:
+            logger.error(f"[GEMINI-STRUCTURED-PARSE-ERROR] 응답 JSON 파싱 실패: {e}")
+            raise ValueError("Gemini 응답이 JSON 형식이 아닙니다.")
+
+    def generate_multimodal(self, images, prompt, schema_override=None):
+        import io
+
+        from PIL import Image
+
+        content = [prompt]
+        for image_bytes in images:
+            try:
+                img = Image.open(io.BytesIO(image_bytes))
+                content.append(img)
+            except Exception as e:
+                logger.error(f"[GEMINI-MULTIMODAL-ERROR] 이미지 처리 실패: {e}")
+                raise HTTPException(status_code=400, detail=f"이미지 파일을 처리할 수 없습니다: {e}")
+
+        try:
+            response = self.model.generate_content(content)
+
+            if not response.text:
+                raise ValueError("[GENERATE-GEMINI-ERROR] LLM 응답에 'response' 가 없습니다.")
+            return (
+                response.text,
+                self.model.model_name.split("/")[-1],
+                "google.generativeai SDK",
+            )
+
+        except ResourceExhausted as e:
+            logger.error(f"[GEMINI-MULTIMODAL-ERROR] 할당량 초과: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Gemini 모델의 일일 API 사용량이 초과되었습니다.\n'claude-3-7-sonnet-20250219' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+        except Exception as e:
+            logger.error(f"[GEMINI-MULTIMODAL-ERROR] Gemini 응답 파싱 실패: {e}")
+            raise HTTPException(status_code=500, detail=f"❌ Gemini 응답 생성에 실패했습니다: {e}")
+
+
+# ✅ 3. Cluade Gen
+class ClaudeGenerator:
+    def __init__(self, model="claude-3-7-sonnet-20250219"):
+        self.api_key = APIKeyLoader.load_claude_key()
+        self.client = anthropic.Anthropic(api_key=self.api_key)
+        self.model = model
+
+    # ✅ 3-1. Gen-General
+    def generate(self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=4096,
+                messages=[{"role": "user", "content": prompt}],
+            )
+
+            if not response.content[0].text:
+                raise ValueError("[GENERATE-CLAUDE-ERROR] LLM 응답에 'response' 가 없습니다.")
+
+            return response.content[0].text, self.model, "anthropic.Anthropic SDK"
+
+        # ☑️ CLAUDE API 초과 시, exception
+        except (BadRequestError, OverloadedError) as e:
+            logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+    # ✅ 3-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        # ✅ Claude는 JSON Schema의 key가 모두 영문이어야 함
+        if schema_override:
+            try:
+                for k in schema_override.get("properties", {}).keys():
+                    if any(ord(ch) > 127 for ch in k):
+                        # 한글 포함 여부 확인
+                        raise HTTPException(
+                            status_code=400,
+                            detail="❌ Claude 모델은 JSON Schema의 필드명이 영어로만 구성되어 있어야 합니다. 필드명을 영문으로 수정해 주세요.",
+                        )
+            except Exception as e:
+                raise HTTPException(status_code=400, detail=f"스키마 처리 중 오류 발생: {str(e)}")
+
+        tools = [
+            {
+                "name": "build_text_analysis_result",
+                "description": "Extract structured fields from OCR text in document format",
+                "input_schema": schema_override,
+            }
+        ]
+        try:
+            response = self.client.messages.create(
+                model=self.model,
+                max_tokens=4096,
+                messages=[{"role": "user", "content": prompt}],
+                tools=tools,
+                tool_choice={"type": "tool", "name": "build_text_analysis_result"},
+            )
+
+            structured = response.content[0].input
+            return structured, self.model, "anthropic.Anthropic SDK"
+
+        # ☑️ CLAUDE API 초과 시, exception
+        except (BadRequestError, OverloadedError) as e:
+            logger.error(f"[CLAUDE-STRUCTURED-ERROR] Claude API 에러 발생: {e}")
+            raise HTTPException(
+                status_code=500,
+                detail="Claude 모델의 일일 API 사용량이 초과되었습니다.\n'gemini-2.5-pro-exp-03-25' 또는 'gpt-4.1' 모델로 다시 시도하세요.",
+            )
+
+
+# ✅ 4. GPT Gen
+class GptGenerator:
+    def __init__(self, model="gpt-4o"):
+        # ✅ OpenAI API Key 로딩 및 유효성 검증
+        raw = APIKeyLoader.load_gpt_key()
+        if not raw:
+            raise RuntimeError("OPENAI_API_KEY가 설정되지 않았습니다.")
+        self.api_key = raw.strip()
+        if not self.api_key.startswith(("sk-", "sk-proj-")):
+            raise RuntimeError("유효하지 않은 OpenAI API Key 형식입니다.")
+
+        self.client = OpenAI(api_key=self.api_key)
+        self.model = model
+
+    def enforce_strict_schema(self, schema: dict) -> dict:
+        strict_schema = copy.deepcopy(schema)
+
+        # ✅ required 자동 보완
+        props = strict_schema.get("properties", {})
+        existing_required = set(strict_schema.get("required", []))
+        all_keys = set(props.keys())
+
+        # 누락된 필드를 required에 추가
+        missing_required = all_keys - existing_required
+        strict_schema["required"] = list(existing_required | missing_required)
+
+        # ✅ additionalProperties 보장
+        if "additionalProperties" not in strict_schema:
+            strict_schema["additionalProperties"] = False
+
+        return strict_schema
+
+    # ✅ 4-1. Gen-General
+    def generate(self, text, user_prompt=None, custom_mode=False, prompt_mode: str = "extract"):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode, prompt_mode)
+
+        try:
+            response = self.client.responses.create(model=self.model, input=prompt)
+        except Exception as e:
+            logger.error(f"[GENERATE-GPT-ERROR] OpenAI API 호출 중 예외 발생: {e}")
+            raise RuntimeError("GPT 생성 요청 중 오류가 발생했습니다.") from e
+
+        try:
+            if not response.output or not response.output[0].content or not response.output[0].content[0].text:
+                raise ValueError("LLM 응답에 'response'가 없습니다.")
+        except Exception as e:
+            logger.error(f"[GENERATE-GPT-ERROR] 응답 파싱 실패: {e} | 원본 응답: {response}")
+            raise RuntimeError("GPT 응답 파싱 중 오류가 발생했습니다.") from e
+
+        return response.output[0].content[0].text, self.model, "OpenAI Python SDK"
+
+    # ✅ 4-2. Gen-Structure
+    def structured_generate(
+        self,
+        text,
+        user_prompt=None,
+        custom_mode=False,
+        schema_override: Optional[dict] = None,
+    ):
+        log_generation_info(custom_mode, user_prompt)
+        prompt = PromptFormatter.format(text, user_prompt, custom_mode)
+
+        schema = self.enforce_strict_schema(schema_override) if schema_override else {}
+
+        # ✅ Function Calling 방식으로 schema_override 전달
+        tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "build_summary",
+                    "description": "Extract structured document summary from OCR text.",
+                    "parameters": schema,
+                    "strict": True,
+                },
+            }
+        ]
+
+        try:
+            response = self.client.beta.chat.completions.parse(
+                model=self.model,
+                messages=[
+                    {
+                        "role": "system",
+                        "content": "You are an assistant that extracts structured document summary from OCR text.",
+                    },
+                    {"role": "user", "content": prompt},
+                ],
+                tools=tools,
+                tool_choice={"type": "function", "function": {"name": "build_summary"}},
+            )
+
+            tool_call = response.choices[0].message.tool_calls[0]
+            arguments_json = tool_call.function.arguments
+            structured = json.loads(arguments_json)
+
+            return structured, self.model, "OpenAI Function Calling"
+
+        except Exception as e:
+            logger.error(f"[GPT-STRUCTURED-ERROR] GPT 응답 파싱 실패: {e}")
+            raise HTTPException(status_code=500, detail="❌ GPT 구조화 응답 생성에 실패했습니다.")
+
+    def generate_multimodal(self, images, prompt, schema_override=None):
+        import base64
+
+        content = [{"type": "text", "text": prompt}]
+        for image_bytes in images:
+            base64_image = base64.b64encode(image_bytes).decode("utf-8")
+            content.append(
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/png;base64,{base64_image}"},
+                }
+            )
+
+        messages = [{"role": "user", "content": content}]
+
+        try:
+            response = self.client.chat.completions.create(model=self.model, messages=messages, max_tokens=4096)
+            generated_text = response.choices[0].message.content
+            return generated_text, self.model, "OpenAI Python SDK"
+        except Exception as e:
+            logger.error(f"[GPT-MULTIMODAL-ERROR] GPT-4o 응답 파싱 실패: {e}")
+            raise HTTPException(status_code=500, detail="❌ GPT-4o 응답 생성에 실패했습니다.")
--- a/src/utils/text_processor.py
+++ b/src/utils/text_processor.py
@@ -0,0 +1,212 @@
+import datetime
+import json
+import logging
+import re
+import unicodedata
+from pathlib import Path
+from typing import Literal
+
+import markdown2
+
+from config.setting import SUMMARY_HTML_DIR
+
+logger = logging.getLogger(__name__)
+
+
+def safe_filename(filename: str) -> str:
+    # 확장자 제거
+    print(f"[FILE NAME] {filename}")
+    base = Path(filename).stem
+    base = unicodedata.normalize("NFKC", base)
+    base = base.replace(" ", "_")
+    base = re.sub(r"[^\w\-\.가-힣]", "_", base, flags=re.UNICODE)
+    base = re.sub(r"_+", "_", base).strip("._-")
+
+    # 비어있으면 안전한 기본값
+    if not base:
+        base = f"result_{datetime.now().strftime('%Y%m%d_%H%M%S')}"
+
+    return f"{base}.html"
+
+
+def post_process(
+    file_name,
+    text,
+    generated_text,
+    coord,
+    ocr_model,
+    llm_model,
+    llm_url,
+    mode,
+    start_time,
+    end_time,
+    prompt_mode: Literal["general", "extract"] = "extract",
+):
+    result_dict = {}
+
+    # ✅ 구조화 모드는 후처리 생략
+    if mode == "structured":
+        result_dict = {
+            "message": "✅ 구조화된 JSON 모델 출력입니다. post_process 후처리 생략됨.",
+            "note": "generated 필드 참조 바랍니다.",
+        }
+
+    # ✅ 일반 추론 모드일 경우
+    elif prompt_mode == "general":
+        html_content = markdown2.markdown(generated_text.strip())
+        html_filename = safe_filename(file_name)
+        html_path = SUMMARY_HTML_DIR / html_filename
+        html_path.write_text(html_content, encoding="utf-8")
+
+        summary_url = f"http://172.16.10.176:8888/view/generated_html/{html_filename}"
+
+        result_dict = {
+            "message": "✅ 줄글로 생성된 모델 출력입니다. post_process 후처리 생략됨.",
+            "note": "아래 url에 접속하여 markdown 형식으로 응답 확인하세요.",
+            "summary_html": summary_url,
+        }
+
+    # ✅ 추출 기반 후처리 (extract)
+    else:
+        # ✅ JSON 코드블럭 형식 처리
+        if "```json" in generated_text:
+            try:
+                logger.debug("[PROCESS-JSON] JSON 코드블럭 형식 후처리 진행합니다.")
+                json_block = re.search(r"```json\s*(\{.*?\})\s*```", generated_text, re.DOTALL)
+                if json_block:
+                    parsed_json = json.loads(json_block.group(1))
+                    result_dict = {re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", k): v for k, v in parsed_json.items()}
+            except Exception as e:
+                logger.error("[PROCESS-ERROR] JSON 코드블럭 파싱 실패:", e)
+
+        # ✅ 길이 초과 메시지 감지 및 처리
+        elif "입력 텍스트가" in generated_text and "모델 호출 생략" in generated_text:
+            result_dict = {
+                "message": "⚠️ 입력 텍스트가 너무 깁니다. LLM 모델 호출을 생략했습니다.",
+                "note": "OCR로 추출된 원본 텍스트(parsed)를 참고해 주세요.",
+            }
+
+        else:
+            # ✅ "1.제목:" 또는 "1. 제목:" 형식 처리
+            logger.debug("[PROCESS-STRING] JSON 코드블럭 형식이 아닙니다.")
+            blocks = re.split(r"\n(?=\d+\.\s*[^:\n]+:)", generated_text.strip())
+
+            for block in blocks:
+                if ":" in block:
+                    key_line, *rest = block.split(":", 1)
+                    key = re.sub(r"^\d+\.\s*", "", key_line).strip()
+                    cleaned_key = re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", key)
+
+                    value = rest[0].strip() if rest else ""
+                    value = re.sub(r"^[^\w가-힣a-zA-Z]+", "", value).strip()
+
+                    result_dict[cleaned_key] = value
+
+    json_data = {
+        "filename": file_name,
+        f"{mode}_model": {
+            "ocr_model": ocr_model,
+            "llm_model": llm_model,
+            "api_url": llm_url,
+        },
+        "time": {
+            "duration_sec": f"{end_time - start_time:.2f}",
+            "started_at": start_time,
+            "ended_at": end_time,
+        },
+        "fields": coord,
+        "parsed": text,
+        "generated": generated_text,
+        "processed": result_dict,
+    }
+
+    # final_result
+    logger.info(json.dumps(json_data["processed"], indent=2, ensure_ascii=False))
+
+    return json_data
+
+
+def ocr_process(filename, ocr_model, coord, text, start_time, end_time):
+    json_data = {
+        "filename": filename,
+        "model": {"ocr_model": ocr_model},
+        "time": {
+            "duration_sec": f"{end_time - start_time:.2f}",
+            "started_at": start_time,
+            "ended_at": end_time,
+        },
+        "fields": coord,
+        "parsed": text,
+    }
+
+    return json_data
+
+
+def test_post_process(
+    text,
+    generated_text,
+    coord,
+    ocr_model,
+    llm_model,
+    llm_url,
+    mode,
+    start_time,
+    end_time,
+    prompt_mode: Literal["general", "extract"] = "extract",
+):
+    # ✅ JSON 코드블럭 형식 처리
+    if "```json" in generated_text:
+        try:
+            logger.debug("[PROCESS-JSON] JSON 코드블럭 형식 후처리 진행합니다.")
+            json_block = re.search(r"```json\s*(\{.*?\})\s*```", generated_text, re.DOTALL)
+            if json_block:
+                parsed_json = json.loads(json_block.group(1))
+                result_dict = {re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", k): v for k, v in parsed_json.items()}
+        except Exception as e:
+            logger.error("[PROCESS-ERROR] JSON 코드블럭 파싱 실패:", e)
+
+    # ✅ 길이 초과 메시지 감지 및 처리
+    elif "입력 텍스트가" in generated_text and "모델 호출 생략" in generated_text:
+        result_dict = {
+            "message": "⚠️ 입력 텍스트가 너무 깁니다. LLM 모델 호출을 생략했습니다.",
+            "note": "OCR로 추출된 원본 텍스트(parsed)를 참고해 주세요.",
+        }
+
+    else:
+        # ✅ "1.제목:" 또는 "1. 제목:" 형식 처리
+        logger.debug("[PROCESS-STRING] JSON 코드블럭 형식이 아닙니다.")
+        blocks = re.split(r"\n(?=\d+\.\s*[^:\n]+:)", generated_text.strip())
+
+        for block in blocks:
+            if ":" in block:
+                key_line, *rest = block.split(":", 1)
+                key = re.sub(r"^\d+\.\s*", "", key_line).strip()
+                cleaned_key = re.sub(r"[^ㄱ-ㅎ가-힣a-zA-Z]", "", key)
+
+                value = rest[0].strip() if rest else ""
+                value = re.sub(r"^[^\w가-힣a-zA-Z]+", "", value).strip()
+
+                result_dict[cleaned_key] = value
+
+    json_data = {
+        "filename": "N/A",
+        f"{mode}_model": {
+            "ocr_model": ocr_model,
+            "llm_model": llm_model,
+            "api_url": llm_url,
+        },
+        "time": {
+            "duration_sec": "N/A",
+            "started_at": start_time,
+            "ended_at": end_time,
+        },
+        "fields": coord,
+        "parsed": text,
+        "generated": generated_text,
+        "processed": result_dict,
+    }
+
+    # final_result
+    logger.info(json.dumps(json_data["processed"], indent=2, ensure_ascii=False))
+
+    return json_data
				`@@ -0,0 +1 @@`
				`// This is a placeholder file to prevent 404 errors from browsers trying to fetch a service worker.`