diff --git a/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py b/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py index 1fcdbd6..08517f7 100644 --- a/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py +++ b/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py @@ -17,8 +17,8 @@ model = model.eval().cuda().to(torch.bfloat16) # prompt = "\nFree OCR. " prompt = "\n<|grounding|>Convert the document to markdown. " -image_file = 'your_image.jpg' -output_path = 'your/output/dir' +image_file = '/workspace/2025-27484-M21472.pdf' +output_path = '/workspace/output_hf' diff --git a/DeepSeek-OCR-master/DeepSeek-OCR-vllm/config.py b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/config.py index e2adfd1..37e48a1 100644 --- a/DeepSeek-OCR-master/DeepSeek-OCR-vllm/config.py +++ b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/config.py @@ -21,8 +21,10 @@ MODEL_PATH = 'deepseek-ai/DeepSeek-OCR' # change to your model path # .jpg, .png, .jpeg: run_dpsk_ocr_image.py; # Omnidocbench images path: run_dpsk_ocr_eval_batch.py -INPUT_PATH = '' -OUTPUT_PATH = '' +FILE_NAME='2025-27484-M21472.pdf' + +INPUT_PATH = f'/workspace/2025-27484-M21472.pdf' +OUTPUT_PATH = '/workspace/output/' PROMPT = '\n<|grounding|>Convert the document to markdown.' # PROMPT = '\nFree OCR.' diff --git a/DeepSeek-OCR-master/DeepSeek-OCR-vllm/requirements.txt b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/requirements.txt new file mode 100644 index 0000000..68d3283 --- /dev/null +++ b/DeepSeek-OCR-master/DeepSeek-OCR-vllm/requirements.txt @@ -0,0 +1,10 @@ +transformers +tokenizers +PyMuPDF +img2pdf +einops +easydict +addict +Pillow +numpy +matplotlib \ No newline at end of file diff --git a/DeepSeek_OCR_paper.pdf b/DeepSeek_OCR_paper.pdf deleted file mode 100644 index c147cc8..0000000 Binary files a/DeepSeek_OCR_paper.pdf and /dev/null differ diff --git a/Dockerfile.hf b/Dockerfile.hf new file mode 100644 index 0000000..57cf292 --- /dev/null +++ b/Dockerfile.hf @@ -0,0 +1,48 @@ +# PyTorch 2.6.0 + CUDA 12.6 + cuDNN9 +FROM pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel + +ENV DEBIAN_FRONTEND=noninteractive \ + HF_HOME=/workspace/.cache/huggingface \ + CUDA_HOME=/usr/local/cuda \ + LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PYTHONUNBUFFERED=1 \ + HF_HUB_DISABLE_TELEMETRY=1 + +ARG TORCH_CUDA_ARCH_LIST=80 +ENV TORCH_CUDA_ARCH_LIST=${TORCH_CUDA_ARCH_LIST} +ENV TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas + +WORKDIR /workspace + +# 빌드 도구 +RUN apt-get update && apt-get install -y --no-install-recommends \ + git build-essential ninja-build cmake \ + && rm -rf /var/lib/apt/lists/* + +RUN python -m pip install -U pip setuptools wheel packaging ninja + +# 명시 재설치(일관성) +RUN pip install --no-cache-dir \ + torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 + +# DeepSeek-OCR(HF) 호환 스택 +RUN pip install --no-cache-dir \ + "transformers==4.43.3" \ + "accelerate==0.33.0" \ + "tokenizers==0.19.1" \ + "numpy==1.26.4" \ + "safetensors>=0.4.2" \ + "einops" "timm>=0.9" + +# flash-attn 2.7.3 (CUDA 12.6에서 빌드) +RUN pip install --no-cache-dir --no-build-isolation --no-binary=flash-attn flash-attn==2.7.3 + +# vLLM 제거(혹시 들어오더라도 충돌 방지) +RUN pip uninstall -y vllm || true + +# OpenCV ↔ numpy 1.26 호환 버전 고정 +RUN pip install --no-cache-dir "opencv-python-headless==4.8.1.78" + +# 앱 소스 +COPY DeepSeek-OCR-master/DeepSeek-OCR-hf/ /workspace/DeepSeek-OCR-hf/ diff --git a/Dockerfile.vllm b/Dockerfile.vllm new file mode 100644 index 0000000..dac8525 --- /dev/null +++ b/Dockerfile.vllm @@ -0,0 +1,41 @@ +# PyTorch 2.6.0 + CUDA 12.6 + cuDNN9 +FROM pytorch/pytorch:2.6.0-cuda12.6-cudnn9-devel + +# 기본 환경 변수 +ENV DEBIAN_FRONTEND=noninteractive \ + HF_HOME=/workspace/.cache/huggingface \ + CUDA_HOME=/usr/local/cuda \ + LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \ + PIP_DISABLE_PIP_VERSION_CHECK=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /workspace + +# 필수 빌드 도구 +RUN apt-get update && apt-get install -y --no-install-recommends \ + git build-essential ninja-build \ + && rm -rf /var/lib/apt/lists/* + +# pip 툴업 +RUN python -m pip install -U pip setuptools wheel packaging ninja + +# (베이스 이미지에 torch 2.6.0이 이미 포함) +# torchvision/torchaudio 버전을 보장하려면 아래 라인을 사용하세요. +# cu126 인덱스가 없을 수 있으므로 실패해도 빌드가 계속되게 처리합니다. +RUN pip install --no-cache-dir \ + torchvision==0.21.0 torchaudio==2.6.0 \ + --extra-index-url https://download.pytorch.org/whl/cu126 || \ + echo "Skip reinstalling torchvision/torchaudio (fallback to base image)" + +# 프로젝트 의존성 설치 +COPY requirements.txt /tmp/requirements.txt +RUN pip install --no-cache-dir -r /tmp/requirements.txt + +# vLLM 0.8.5 고정 (CUDA 12.x용 wheel 자동 선택) +RUN pip install --no-cache-dir vllm==0.8.5 + +# FlashAttention (CUDA 12.6에서 빌드) +RUN pip install --no-cache-dir flash-attn==2.7.3 --no-build-isolation + +# 작업 디렉터리 유지 +WORKDIR /workspace diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..2aabfd5 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,42 @@ +services: + deepseek_ocr_vllm: + build: + context: . + dockerfile: Dockerfile.vllm + image: deepseek-ocr-vllm:cu126 + container_name: deepseek_ocr_vllm + working_dir: /workspace + volumes: + - ./DeepSeek-OCR-master/DeepSeek-OCR-vllm:/workspace + gpus: all + shm_size: "8g" + ipc: "host" + environment: + - HF_HOME=/workspace/.cache/huggingface + - CUDA_HOME=/usr/local/cuda + - LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} + - PIP_DISABLE_PIP_VERSION_CHECK=1 + - PYTHONUNBUFFERED=1 + tty: true + entrypoint: ["/bin/bash"] + + # deepseek_ocr_hf: + # build: + # context: . + # dockerfile: Dockerfile.hf + # image: deepseek-ocr-hf:cu126 + # container_name: deepseek_ocr_hf + # working_dir: /workspace + # volumes: + # - ./DeepSeek-OCR-hf:/workspace + # gpus: all + # shm_size: "8g" + # ipc: "host" + # environment: + # - HF_HOME=/workspace/.cache/huggingface + # - CUDA_HOME=/usr/local/cuda + # - LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} + # - PIP_DISABLE_PIP_VERSION_CHECK=1 + # - PYTHONUNBUFFERED=1 + # tty: true + # entrypoint: ["/bin/bash"] \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index dc0cb84..68d3283 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ -transformers==4.46.3 -tokenizers==0.20.3 +transformers +tokenizers PyMuPDF img2pdf einops @@ -7,3 +7,4 @@ easydict addict Pillow numpy +matplotlib \ No newline at end of file