From 732e7c8cc018f42cef9b95fe3159af8fb43cba97 Mon Sep 17 00:00:00 2001
From: kyy <b24053@hanmaceng.co.kr>
Date: Thu, 6 Nov 2025 15:12:26 +0900
Subject: [PATCH] =?UTF-8?q?=EC=84=9C=EB=B9=99=20=EC=84=A4=EC=A0=95=20?=
 =?UTF-8?q?=EB=B3=80=EC=88=98=20=EB=B0=8F=20=ED=8C=8C=EC=9D=BC=20=EC=B6=94?=
 =?UTF-8?q?=EA=B0=80?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .env                      | 29 +++++++++++++++++++++++++++++
 config/engine_settings.py | 35 +++++++++++++++++++++++++++++++++++
 requirements.txt          |  1 +
 services/ocr_engine.py    | 37 ++++++++++++++++++-------------------
 4 files changed, 83 insertions(+), 19 deletions(-)
 create mode 100644 .env
 create mode 100644 config/engine_settings.py

diff --git a/.env b/.env
new file mode 100644
index 0000000..e38ada4
--- /dev/null
+++ b/.env
@@ -0,0 +1,29 @@
+# --------------------------------------------------------------------------
+# vLLM Engine Configuration
+# --------------------------------------------------------------------------
+# 이 파일의 주석(#)을 제거하고 값을 수정하여 기본 엔진 설정을 재정의할 수 있습니다.
+# 설정 가능한 변수 목록은 config/engine_settings.py 파일을 참고하세요.
+
+# Hugging Face 모델 경로 또는 로컬 경로
+# MODEL_PATH="deepseek-ai/DeepSeek-OCR"
+
+# 텐서 병렬 처리 크기 (Multi-GPU 환경에서 사용)
+# TENSOR_PARALLEL_SIZE=1
+
+# 최대 GPU 메모리 사용률 (0.0 ~ 1.0)
+# GPU_MEMORY_UTILIZATION=0.15
+
+# KV 캐시 블록 크기
+# BLOCK_SIZE=256
+
+# 최대 모델 길이
+# MAX_MODEL_LEN=8192
+
+# Eager 모드 강제 실행 여부 (True / False)
+# ENFORCE_EAGER=False
+
+# 원격 코드 신뢰 여부 (True / False)
+# TRUST_REMOTE_CODE=True
+
+# 사용자 정의 모델 아키텍처 (쉼표로 구분)
+# ARCHITECTURES="DeepseekOCRForCausalLM"
diff --git a/config/engine_settings.py b/config/engine_settings.py
new file mode 100644
index 0000000..000d739
--- /dev/null
+++ b/config/engine_settings.py
@@ -0,0 +1,35 @@
+import os
+
+
+def _str_to_bool(value: str) -> bool:
+    """환경 변수(문자열)를 boolean 값으로 변환합니다."""
+    return value.lower() in ("true", "1", "t")
+
+
+# --------------------------------------------------------------------------
+# vLLM Engine Configuration
+# .env 파일에 동일한 이름의 환경 변수를 설정하여 아래 기본값을 재정의할 수 있습니다.
+# --------------------------------------------------------------------------
+
+# 사용자 정의 모델 아키텍처
+# 여러 개일 경우 쉼표로 구분: "Arch1,Arch2"
+_architectures_str = os.getenv("ARCHITECTURES", "DeepseekOCRForCausalLM")
+ARCHITECTURES = [arch.strip() for arch in _architectures_str.split(",")]
+
+# KV 캐시 블록 크기
+BLOCK_SIZE = int(os.getenv("BLOCK_SIZE", "256"))
+
+# 최대 모델 길이
+MAX_MODEL_LEN = int(os.getenv("MAX_MODEL_LEN", "8192"))
+
+# Eager 모드 강제 실행 여부
+ENFORCE_EAGER = _str_to_bool(os.getenv("ENFORCE_EAGER", "False"))
+
+# 원격 코드 신뢰 여부
+TRUST_REMOTE_CODE = _str_to_bool(os.getenv("TRUST_REMOTE_CODE", "True"))
+
+# 텐서 병렬 처리 크기
+TENSOR_PARALLEL_SIZE = int(os.getenv("TENSOR_PARALLEL_SIZE", "1"))
+
+# GPU 메모리 사용률
+GPU_MEMORY_UTILIZATION = float(os.getenv("GPU_MEMORY_UTILIZATION", "0.15"))
diff --git a/requirements.txt b/requirements.txt
index 33b2cef..c993bd8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -11,3 +11,4 @@ matplotlib
 fastapi
 uvicorn[standard]
 python-multipart
+python-dotenv
diff --git a/services/ocr_engine.py b/services/ocr_engine.py
index 25e8c3f..d3240a8 100644
--- a/services/ocr_engine.py
+++ b/services/ocr_engine.py
@@ -4,7 +4,7 @@ import logging
 import re
 
 import fitz
-from config.model_settings import CROP_MODE, MODEL_PATH, PROMPT
+from config import engine_settings, model_settings
 from PIL import Image, ImageOps
 from vllm import AsyncLLMEngine, SamplingParams
 from vllm.engine.arg_utils import AsyncEngineArgs
@@ -30,14 +30,14 @@ async def init_engine():
         return
 
     engine_args = AsyncEngineArgs(
-        model=MODEL_PATH,
-        hf_overrides={"architectures": ["DeepseekOCRForCausalLM"]},
-        block_size=256,
-        max_model_len=8192,
-        enforce_eager=False,
-        trust_remote_code=True,
-        tensor_parallel_size=1,
-        gpu_memory_utilization=0.75,
+        model=model_settings.MODEL_PATH,
+        hf_overrides={"architectures": engine_settings.ARCHITECTURES},
+        block_size=engine_settings.BLOCK_SIZE,
+        max_model_len=engine_settings.MAX_MODEL_LEN,
+        enforce_eager=engine_settings.ENFORCE_EAGER,
+        trust_remote_code=engine_settings.TRUST_REMOTE_CODE,
+        tensor_parallel_size=engine_settings.TENSOR_PARALLEL_SIZE,
+        gpu_memory_utilization=engine_settings.GPU_MEMORY_UTILIZATION,
     )
     _engine = AsyncLLMEngine.from_engine_args(engine_args)
 
@@ -75,7 +75,7 @@ def _postprocess_text(text: str, page_num: int = 0) -> str:
     matches_other = []
     for match_tuple in matches:
         full_match_str = match_tuple[0]  # 전체 매치된 부분
-        ref_content = match_tuple[1]     # <|ref|> 안의 내용
+        ref_content = match_tuple[1]  # <|ref|> 안의 내용
 
         if "image" in ref_content:
             matches_images.append(full_match_str)
@@ -105,7 +105,6 @@ def _postprocess_text(text: str, page_num: int = 0) -> str:
     return processed_text
 
 
-
 # --------------------------------------------------------------------------
 # 3. 핵심 처리 함수
 # --------------------------------------------------------------------------
@@ -115,14 +114,14 @@ async def _process_single_image(image: Image.Image, page_num: int = 0) -> str:
     """단일 PIL 이미지를 받아 OCR을 수행하고 후처리된 텍스트를 반환합니다."""
     if _engine is None:
         raise RuntimeError("vLLM engine not initialized yet")
-    if "<image>" not in PROMPT:
+    if "<image>" not in model_settings.PROMPT:
         raise ValueError("프롬프트에 '<image>' 토큰이 없어 OCR을 수행할 수 없습니다.")
 
     image_features = processor.tokenize_with_images(
-        images=[image], bos=True, eos=True, cropping=CROP_MODE
+        images=[image], bos=True, eos=True, cropping=model_settings.CROP_MODE
     )
 
-    request = {"prompt": PROMPT, "multi_modal_data": {"image": image_features}}
+    request = {"prompt": model_settings.PROMPT, "multi_modal_data": {"image": image_features}}
     request_id = f"request-{asyncio.get_running_loop().time()}"
 
     raw_output = ""
@@ -168,7 +167,10 @@ async def process_document(file_bytes: bytes, content_type: str, filename: str)
     if content_type == "application/octet-stream":
         if filename.lower().endswith(".pdf"):
             inferred_content_type = "application/pdf"
-        elif any(filename.lower().endswith(ext) for ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp"]):
+        elif any(
+            filename.lower().endswith(ext)
+            for ext in [".jpg", ".jpeg", ".png", ".bmp", ".gif", ".webp"]
+        ):
             inferred_content_type = "image/jpeg"  # 구체적인 타입은 중요하지 않음
 
     if inferred_content_type.startswith("image/"):
@@ -189,9 +191,7 @@ async def process_document(file_bytes: bytes, content_type: str, filename: str)
             raise ValueError(f"PDF 파일을 처리하는 데 실패했습니다: {e}")
 
         # 각 페이지를 비동기적으로 처리 (페이지 번호 전달)
-        tasks = [
-            _process_single_image(img, page_num=i) for i, img in enumerate(images)
-        ]
+        tasks = [_process_single_image(img, page_num=i) for i, img in enumerate(images)]
         page_results = await asyncio.gather(*tasks)
 
         full_text = "\n<--- Page Split --->\n".join(page_results)
@@ -202,4 +202,3 @@ async def process_document(file_bytes: bytes, content_type: str, filename: str)
             f"지원하지 않는 파일 형식입니다: {content_type}. "
             "이미지(JPEG, PNG 등) 또는 PDF 파일을 업로드해주세요."
         )
-