Deepseek-OCR 환경 설정

2025-10-27 13:50:19 +09:00
parent 153129649d
commit 96b47df624
21 changed files with 62 additions and 3481 deletions
--- a/model_services/deepseek_ocr/Dockerfile
+++ b/model_services/deepseek_ocr/Dockerfile
@@ -1,32 +1,39 @@
-# 1. vLLM 호환을 위해 NVIDIA CUDA 베이스 이미지 선택
-# 참고: vLLM 버전에 따라 적절한 CUDA 버전을 선택해야 할 수 있습니다.
-FROM nvidia/cuda:12.1.0-devel-ubuntu22.04
+# PyTorch 2.6.0 + CUDA 12.6 + cuDNN9
+FROM pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel

-# 2. 환경 변수 설정 및 기본 패키지 설치
-ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && apt-get install -y \
-    python3.9 \
-    python3.9-pip \
-    git \
+# 기본 환경 변수 설정
+ENV DEBIAN_FRONTEND=noninteractive \
+    HF_HOME=/workspace/.cache/huggingface \
+    CUDA_HOME=/usr/local/cuda \
+    LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
+    PIP_DISABLE_PIP_VERSION_CHECK=1 \
+    PYTHONUNBUFFERED=1 \
+    TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas \
+    TORCH_CUDA_ARCH_LIST="8.0"
+
+WORKDIR /workspace
+
+# 필수 빌드 도구 설치
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      git build-essential ninja-build \
    && rm -rf /var/lib/apt/lists/*

-# python3.9을 기본 python/pip으로 설정
-RUN update-alternatives --install /usr/bin/python python /usr/bin/python3.9 1 && \
-    update-alternatives --install /usr/bin/pip pip /usr/bin/pip 1
+# pip 업그레이드
+RUN python -m pip install -U pip setuptools wheel

-# 3. 작업 디렉토리 설정
-WORKDIR /app
+# 기존 라이브러리 제거 및 특정 버전 재설치
+RUN pip uninstall -y vllm torch torchvision torchaudio triton flash-attn || true
+RUN pip install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0

-# 4. 소스 코드 및 의존성 파일 복사
-# (main.py, requirements.txt, deepseek_ocr.py, process/, config.py 등 모든 파일)
-COPY . .
+# 프로젝트 의존성 설치
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt

-# 5. Python 의존성 설치
-# vLLM은 torch를 필요로 하므로 함께 설치합니다.
-RUN pip install --no-cache-dir -r requirements.txt
+# vLLM 특정 버전 설치
+RUN pip install vllm==0.8.5

-# 6. 서비스 포트 노출
-EXPOSE 8000
+# FlashAttention 소스에서 빌드하여 설치
+RUN pip cache purge && \
+    pip install --no-cache-dir --no-build-isolation --no-binary=flash-attn flash-attn==2.7.3

-# 7. FastAPI 서버 실행
-CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]
+WORKDIR /workspace
--- a/model_services/deepseek_ocr/config.py
+++ b/model_services/deepseek_ocr/config.py
@@ -8,25 +8,24 @@
 BASE_SIZE = 1024
 IMAGE_SIZE = 640
 CROP_MODE = True
-MIN_CROPS= 2
-MAX_CROPS= 6 # max:9; If your GPU memory is small, it is recommended to set it to 6.
-MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count.
-NUM_WORKERS = 64 # image pre-process (resize/padding) workers 
+MIN_CROPS = 2
+MAX_CROPS = 6  # max:9; If your GPU memory is small, it is recommended to set it to 6.
+MAX_CONCURRENCY = 100  # If you have limited GPU memory, lower the concurrency count.
+NUM_WORKERS = 64  # image pre-process (resize/padding) workers
 PRINT_NUM_VIS_TOKENS = False
 SKIP_REPEAT = True
-MODEL_PATH = 'deepseek-ai/DeepSeek-OCR' # change to your model path
+MODEL_PATH = "deepseek-ai/DeepSeek-OCR"  # change to your model path

 # TODO: change INPUT_PATH
-# .pdf: run_dpsk_ocr_pdf.py; 
-# .jpg, .png, .jpeg: run_dpsk_ocr_image.py; 
+# .pdf: run_dpsk_ocr_pdf.py;
+# .jpg, .png, .jpeg: run_dpsk_ocr_image.py;
 # Omnidocbench images path: run_dpsk_ocr_eval_batch.py

-FILE_NAME='2025-27484-M21472.pdf'

-INPUT_PATH = f'/workspace/2025-27484-M21472.pdf' 
-OUTPUT_PATH = '/workspace/output/'
+INPUT_PATH = "/workspace/2018-0802140959-217049.pdf"
+OUTPUT_PATH = "/workspace/output/"

-PROMPT = '<image>\n<|grounding|>Convert the document to markdown.'
+PROMPT = "<image>\n<|grounding|>Convert the document to markdown."
 # PROMPT = '<image>\nFree OCR.'
 # TODO commonly used prompts
 # document: <image>\n<|grounding|>Convert the document to markdown.
--- a/model_services/deepseek_ocr/docker-compose.yml
+++ b/model_services/deepseek_ocr/docker-compose.yml
@@ -0,0 +1,21 @@
+services:
+  deepseek_ocr_vllm:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: deepseek-ocr-vllm:cu126
+    container_name: deepseek_ocr_vllm
+    working_dir: /workspace
+    volumes:
+      - ./:/workspace
+    gpus: all
+    shm_size: "8g"
+    ipc: "host"
+    environment:
+      - HF_HOME=/workspace/.cache/huggingface
+      - CUDA_HOME=/usr/local/cuda
+      - LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH}
+      - PIP_DISABLE_PIP_VERSION_CHECK=1
+      - PYTHONUNBUFFERED=1
+    tty: true
+    entrypoint: ["/bin/bash"]
--- a/model_services/deepseek_ocr/main.py
+++ b/model_services/deepseek_ocr/main.py
@@ -1,112 +0,0 @@
-import os
-import base64
-import io
-import time
-
-from fastapi import FastAPI
-from pydantic import BaseModel
-from PIL import Image
-
-# vLLM 및 모델 관련 import
-from vllm import AsyncLLMEngine, SamplingParams
-from vllm.engine.arg_utils import AsyncEngineArgs
-from vllm.model_executor.models.registry import ModelRegistry
-
-# DeepSeek-OCR 관련 로컬 import
-from deepseek_ocr import DeepseekOCRForCausalLM
-from process.image_process import DeepseekOCRProcessor
-from process.ngram_norepeat import NoRepeatNGramLogitsProcessor
-
-# --- Configuration ---
-# Docker 환경에서는 환경 변수를 사용하거나, Dockerfile에서 모델을 다운로드하는 것이 좋습니다.
-# 여기서는 config.py의 기본값을 사용하되, 환경 변수로 재정의할 수 있도록 합니다.
-MODEL_PATH = os.environ.get("MODEL_PATH", "deepseek-ai/deepseek-vl-7b-base") 
-# 참고: 실제 `config.py`는 로컬 경로를 사용하므로, 허깅페이스 모델 ID로 대체합니다.
-# 이 모델을 사용하려면 인터넷 연결이 필요하며, 처음 실행 시 다운로드됩니다.
-
-# --- Model Initialization ---
-
-# 1. 커스텀 모델 등록
-ModelRegistry.register_model("DeepseekOCRForCausalLM", DeepseekOCRForCausalLM)
-
-# 2. vLLM 엔진 설정
-engine_args = AsyncEngineArgs(
-    model=MODEL_PATH,
-    hf_overrides={"architectures": ["DeepseekOCRForCausalLM"]},
-    max_model_len=8192,
-    enforce_eager=False,
-    trust_remote_code=True,
-    tensor_parallel_size=1, # 단일 GPU 사용
-    gpu_memory_utilization=0.90, # GPU 메모리 사용률
-)
-engine = AsyncLLMEngine.from_engine_args(engine_args)
-
-# 3. Deepseek OCR 프로세서 초기화
-processor = DeepseekOCRProcessor()
-
-# 4. FastAPI 앱 초기화
-app = FastAPI()
-
-# --- Pydantic Models ---
-class InferenceRequest(BaseModel):
-    # Base64로 인코딩된 이미지 문자열
-    base64_image: str
-
-class InferenceResponse(BaseModel):
-    text: str
-
-# --- API Endpoints ---
-
-@app.get("/")
-def health_check():
-    return {"status": "DeepSeek-OCR service is running"}
-
-@app.post("/process", response_model=InferenceResponse)
-async def process_image(request: InferenceRequest):
-    """
-    Base64 인코딩된 이미지를 받아 OCR 추론을 수행합니다.
-    """
-    try:
-        # 1. Base64 이미지 디코딩
-        image_data = base64.b64decode(request.base64_image)
-        image = Image.open(io.BytesIO(image_data)).convert('RGB')
-
-        # 2. 이미지 전처리
-        prompt = "<image>"
-        image_features = processor.tokenize_with_images(
-            images=[image], 
-            bos=True, 
-            eos=True, 
-            cropping=False # CROP_MODE 기본값 사용
-        )
-
-        # 3. 샘플링 파라미터 설정 (기존 스크립트 참조)
-        logits_processors = [NoRepeatNGramLogitsProcessor(ngram_size=30, window_size=90, whitelist_token_ids={128821, 128822})]
-        sampling_params = SamplingParams(
-            temperature=0.0,
-            max_tokens=8192,
-            logits_processors=logits_processors,
-            skip_special_tokens=False,
-        )
-
-        # 4. vLLM으로 추론 실행
-        request_id = f"dpsk-request-{int(time.time())}"
-        vllm_request = {
-            "prompt": prompt,
-            "multi_modal_data": {"image": image_features}
-        }
-        
-        final_output = None
-        async for request_output in engine.generate(vllm_request, sampling_params, request_id):
-            # 스트리밍 결과의 마지막 최종본을 사용
-            final_output = request_output
-
-        if final_output and final_output.outputs:
-            generated_text = final_output.outputs[0].text
-            return InferenceResponse(text=generated_text)
-        else:
-            raise Exception("Model generated no output.")
-
-    except Exception as e:
-        # 실제 운영 환경에서는 로깅을 추가하는 것이 좋습니다.
-        return {"error": f"An error occurred: {str(e)}"}, 500