Skip to content

Commit 2caba44

Browse files
Revert to DeepSeek-OCR and disable scale-to-zero
- Reverted from DeepSeek-OCR-2 to DeepSeek-OCR (weight mismatch issue) - Disabled scale-to-zero: min=1, max=20, desired=1 - Keep g5.xlarge with bfloat16 for better performance Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent bdf38a6 commit 2caba44

5 files changed

Lines changed: 19 additions & 20 deletions

File tree

docker/Dockerfile

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# DeepSeek-OCR-2 vLLM Docker Image
1+
# DeepSeek-OCR vLLM Docker Image
22
# Based on official vLLM OpenAI image for better compatibility
33
# Supports BF16 inference on g5.xlarge (A10G GPU)
44

@@ -15,11 +15,11 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1515
git ca-certificates curl && \
1616
rm -rf /var/lib/apt/lists/*
1717

18-
# Fetch upstream DeepSeek-OCR-2 sources at build time
18+
# Fetch upstream DeepSeek-OCR sources at build time
1919
RUN git clone --depth 1 https://github.com/deepseek-ai/DeepSeek-OCR.git /app/DeepSeek-OCR-src
2020

2121
# Copy the DeepSeek-OCR vLLM implementation (correct nested path)
22-
# Note: DeepSeek-OCR-2 uses the same vLLM implementation structure
22+
# Copy the vLLM implementation for DeepSeek-OCR
2323
RUN cp -r /app/DeepSeek-OCR-src/DeepSeek-OCR-master/DeepSeek-OCR-vllm /app/DeepSeek-OCR-vllm
2424

2525
# Copy custom files to replace the originals (transparent replacement approach)
@@ -35,7 +35,7 @@ COPY custom_run_dpsk_ocr_eval_batch.py ./DeepSeek-OCR-vllm/run_dpsk_ocr_eval_bat
3535
# Copy the startup script
3636
COPY start_server.py .
3737

38-
# Upgrade pip and install core dependencies with specific versions for DeepSeek-OCR-2
38+
# Upgrade pip and install core dependencies
3939
RUN pip install --no-cache-dir --upgrade pip && \
4040
pip install --no-cache-dir \
4141
torch==2.6.0 \
@@ -76,8 +76,8 @@ ENV HF_HOME="/app/models"
7676
ENV TRANSFORMERS_CACHE="/app/models"
7777
ENV HUGGINGFACE_HUB_CACHE="/app/models"
7878

79-
# Default model configuration for DeepSeek-OCR-2
80-
ENV MODEL_PATH="deepseek-ai/DeepSeek-OCR-2"
79+
# Default model configuration for DeepSeek-OCR
80+
ENV MODEL_PATH="deepseek-ai/DeepSeek-OCR"
8181
ENV VLLM_TORCH_DTYPE="bfloat16"
8282

8383
# Make the scripts executable

docker/custom_config.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Custom configuration for DeepSeek-OCR-2 vLLM
1+
# Custom configuration for DeepSeek-OCR vLLM
22
# This file replaces the original config.py during Docker build
33
# Modify the PROMPT value below to change the default prompt used by the OCR service
44

@@ -9,10 +9,10 @@
99
# Small: base_size = 640, image_size = 640, crop_mode = False
1010
# Base: base_size = 1024, image_size = 1024, crop_mode = False
1111
# Large: base_size = 1280, image_size = 1280, crop_mode = False
12-
# Gundam: base_size = 1024, image_size = 768, crop_mode = True (recommended for OCR-2)
12+
# Gundam: base_size = 1024, image_size = 768, crop_mode = True
1313

1414
BASE_SIZE = 1024
15-
IMAGE_SIZE = 768 # Updated for DeepSeek-OCR-2 (larger than OCR-1)
15+
IMAGE_SIZE = 512 # Standard size for DeepSeek-OCR
1616
CROP_MODE = True
1717
MIN_CROPS = 2
1818
MAX_CROPS = 6 # max:9; If your GPU memory is small, it is recommended to set it to 6.
@@ -21,9 +21,9 @@
2121
PRINT_NUM_VIS_TOKENS = False
2222
SKIP_REPEAT = True
2323

24-
# DeepSeek-OCR-2 Model Configuration
24+
# DeepSeek-OCR Model Configuration
2525
# Use environment variables for flexibility (Golden AMI may override)
26-
MODEL_PATH = os.environ.get('MODEL_PATH', 'deepseek-ai/DeepSeek-OCR-2')
26+
MODEL_PATH = os.environ.get('MODEL_PATH', 'deepseek-ai/DeepSeek-OCR')
2727
VLLM_TORCH_DTYPE = os.environ.get('VLLM_TORCH_DTYPE', 'bfloat16') # BF16 for g5 (A10G GPU)
2828

2929
# Check for pre-cached model in Golden AMI location

docker/start_server.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -125,7 +125,7 @@ def initialize_model():
125125
global llm, sampling_params
126126

127127
if llm is None:
128-
print("Initializing DeepSeek-OCR-2 model...")
128+
print("Initializing DeepSeek-OCR model...")
129129
print(f"Model path from config: {MODEL_PATH}")
130130

131131
# Get environment variable overrides
@@ -163,7 +163,7 @@ def initialize_model():
163163

164164
# Initialize vLLM engine with the Hugging Face repository ID
165165
llm = LLM(
166-
model=model_path, # Use HF repository ID: "deepseek-ai/DeepSeek-OCR-2"
166+
model=model_path, # Use HF repository ID: "deepseek-ai/DeepSeek-OCR"
167167
hf_overrides={"architectures": ["DeepseekOCRForCausalLM"]},
168168
enforce_eager=True,
169169
trust_remote_code=True,

src/constructs/deepseek-ocr-ecs.ts

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ export class DeepSeekOcrEc2GpuConstruct extends Construct {
246246
{
247247
capacityProvider: capacityProvider.capacityProviderName,
248248
weight: 1,
249-
base: 0, // Scale-to-zero: no base capacity required
249+
base: 1, // Always keep at least 1 task running
250250
},
251251
],
252252
placementStrategies: [
@@ -388,13 +388,13 @@ export class DeepSeekOcrEc2GpuConstruct extends Construct {
388388
// GPU configuration
389389
gpuCount: 1, // Request 1 GPU
390390

391-
// Environment variables - DeepSeek-OCR-2 with BF16 on g5.xlarge
391+
// Environment variables - DeepSeek-OCR with BF16 on g5.xlarge
392392
environment: {
393393
// GPU settings
394394
CUDA_VISIBLE_DEVICES: '0',
395395

396-
// DeepSeek-OCR-2 model configuration
397-
MODEL_PATH: 'deepseek-ai/DeepSeek-OCR-2', // HuggingFace repo ID for OCR-2
396+
// DeepSeek-OCR model configuration
397+
MODEL_PATH: 'deepseek-ai/DeepSeek-OCR', // HuggingFace repo ID
398398
VLLM_TORCH_DTYPE: 'bfloat16', // BF16 for A10G GPU (g5 instances)
399399

400400
// Model caching directories - check Golden AMI cache first

src/stacks/ecs.stack.ts

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,13 +42,12 @@ export class EcsStack extends cdk.Stack {
4242
kmsKey.grantEncryptDecrypt(taskRole);
4343

4444
// ECS Cluster Stack - g5.xlarge for DeepSeek-OCR-2 with BF16 support
45-
// Scale-to-zero enabled: min=0, max=20, desired=0
4645
const ecsClusterConstruct = new DeepSeekOcrEc2GpuConstruct(this, 'EcsGpuService', {
4746
vpc,
4847
securityGroups,
49-
minCapacity: 0, // Scale-to-zero: no minimum instances
48+
minCapacity: 1, // Always keep at least 1 instance running
5049
maxCapacity: 20, // Allow scaling up to 20 instances
51-
desiredCapacity: 0, // Scale-to-zero: start with 0 instances
50+
desiredCapacity: 1, // Start with 1 instance
5251
dockerBuildContext: path.join(__dirname, '../../docker'),
5352
kmsKey,
5453
instanceType: ec2.InstanceType.of(ec2.InstanceClass.G5, ec2.InstanceSize.XLARGE), // A10G GPU for BF16

0 commit comments

Comments
 (0)