Initial commit

2025-10-20 10:48:36 +08:00
commit 093b215067
24 changed files with 3434 additions and 0 deletions
--- a/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py
+++ b/DeepSeek-OCR-master/DeepSeek-OCR-hf/run_dpsk_ocr.py
@@ -0,0 +1,34 @@
+from transformers import AutoModel, AutoTokenizer
+import torch
+import os
+
+
+os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+
+
+model_name = 'deepseek-ai/DeepSeek-OCR'
+
+
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_name, _attn_implementation='flash_attention_2', trust_remote_code=True, use_safetensors=True)
+model = model.eval().cuda().to(torch.bfloat16)
+
+
+
+# prompt = "<image>\nFree OCR. "
+prompt = "<image>\n<|grounding|>Convert the document to markdown. "
+image_file = 'your_image.jpg'
+output_path = 'your/output/dir'
+
+
+
+# infer(self, tokenizer, prompt='', image_file='', output_path = ' ', base_size = 1024, image_size = 640, crop_mode = True, test_compress = False, save_results = False):
+
+# Tiny: base_size = 512, image_size = 512, crop_mode = False
+# Small: base_size = 640, image_size = 640, crop_mode = False
+# Base: base_size = 1024, image_size = 1024, crop_mode = False
+# Large: base_size = 1280, image_size = 1280, crop_mode = False
+
+# Gundam: base_size = 1024, image_size = 640, crop_mode = True
+
+res = model.infer(tokenizer, prompt=prompt, image_file=image_file, output_path = output_path, base_size = 1024, image_size = 640, crop_mode=True, save_results = True, test_compress = True)