Skip to content

Commit bdf38a6

Browse files
feat: Upgrade to DeepSeek-OCR-2, g5.xlarge, scale-to-zero, and Golden AMI
Infrastructure upgrades for better performance and cost optimization: - Upgrade model from DeepSeek-OCR to DeepSeek-OCR-2 with BF16 support - Switch from g4dn.xlarge (T4) to g5.xlarge (A10G) for BF16 inference - Enable scale-to-zero (min=0, max=20, desired=0) to save costs when idle - Add Golden AMI support for ~5 min cold starts (vs 25-65 min) - Add Packer template for building Golden AMI with pre-cached model - Add GitHub Actions workflow for automated AMI builds Changes: - Update Dockerfile with torch==2.6.0, transformers==4.46.3, tokenizers==0.20.3 - Update custom_config.py with IMAGE_SIZE=768 and bfloat16 dtype - Update start_server.py to detect Golden AMI cache and GPU type - Update CDK construct with goldenAmiId prop and g5 instance support - Add packer/deepseek-ocr-golden.pkr.hcl - Add .github/workflows/build-ami.yml Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
1 parent 65c9c47 commit bdf38a6

7 files changed

Lines changed: 583 additions & 50 deletions

File tree

.github/workflows/build-ami.yml

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
# GitHub Actions workflow to build DeepSeek-OCR Golden AMI
2+
# Triggers on changes to packer/ or docker/ directories
3+
4+
name: Build Golden AMI
5+
6+
on:
7+
push:
8+
branches:
9+
- main
10+
paths:
11+
- 'packer/**'
12+
- 'docker/**'
13+
pull_request:
14+
branches:
15+
- main
16+
paths:
17+
- 'packer/**'
18+
- 'docker/**'
19+
workflow_dispatch:
20+
inputs:
21+
aws_region:
22+
description: 'AWS Region for AMI'
23+
required: false
24+
default: 'us-east-1'
25+
instance_type:
26+
description: 'Instance type for Packer build'
27+
required: false
28+
default: 'g5.xlarge'
29+
30+
env:
31+
AWS_REGION: ${{ github.event.inputs.aws_region || 'us-east-1' }}
32+
PACKER_VERSION: '1.10.0'
33+
34+
jobs:
35+
validate:
36+
name: Validate Packer Template
37+
runs-on: ubuntu-latest
38+
steps:
39+
- name: Checkout code
40+
uses: actions/checkout@v4
41+
42+
- name: Setup Packer
43+
uses: hashicorp/setup-packer@main
44+
with:
45+
version: ${{ env.PACKER_VERSION }}
46+
47+
- name: Initialize Packer
48+
working-directory: packer
49+
run: packer init deepseek-ocr-golden.pkr.hcl
50+
51+
- name: Validate Packer template
52+
working-directory: packer
53+
run: packer validate deepseek-ocr-golden.pkr.hcl
54+
55+
build:
56+
name: Build Golden AMI
57+
runs-on: ubuntu-latest
58+
needs: validate
59+
# Only build on push to main or manual trigger (not on PRs)
60+
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
61+
62+
permissions:
63+
id-token: write
64+
contents: read
65+
66+
outputs:
67+
ami_id: ${{ steps.build.outputs.ami_id }}
68+
ami_name: ${{ steps.build.outputs.ami_name }}
69+
70+
steps:
71+
- name: Checkout code
72+
uses: actions/checkout@v4
73+
74+
- name: Configure AWS credentials
75+
uses: aws-actions/configure-aws-credentials@v4
76+
with:
77+
role-to-assume: ${{ secrets.AWS_PACKER_ROLE_ARN }}
78+
aws-region: ${{ env.AWS_REGION }}
79+
80+
- name: Setup Packer
81+
uses: hashicorp/setup-packer@main
82+
with:
83+
version: ${{ env.PACKER_VERSION }}
84+
85+
- name: Initialize Packer
86+
working-directory: packer
87+
run: packer init deepseek-ocr-golden.pkr.hcl
88+
89+
- name: Build AMI
90+
id: build
91+
working-directory: packer
92+
env:
93+
PKR_VAR_aws_region: ${{ env.AWS_REGION }}
94+
PKR_VAR_instance_type: ${{ github.event.inputs.instance_type || 'g5.xlarge' }}
95+
PKR_VAR_vpc_id: ${{ secrets.PACKER_VPC_ID }}
96+
PKR_VAR_subnet_id: ${{ secrets.PACKER_SUBNET_ID }}
97+
run: |
98+
# Run Packer build
99+
packer build -machine-readable deepseek-ocr-golden.pkr.hcl | tee build.log
100+
101+
# Extract AMI ID from manifest
102+
AMI_ID=$(jq -r '.builds[0].artifact_id | split(":")[1]' manifest.json)
103+
AMI_NAME=$(jq -r '.builds[0].custom_data.ami_name // "unknown"' manifest.json)
104+
105+
echo "ami_id=$AMI_ID" >> $GITHUB_OUTPUT
106+
echo "ami_name=$AMI_NAME" >> $GITHUB_OUTPUT
107+
108+
echo "### AMI Build Complete :rocket:" >> $GITHUB_STEP_SUMMARY
109+
echo "" >> $GITHUB_STEP_SUMMARY
110+
echo "| Property | Value |" >> $GITHUB_STEP_SUMMARY
111+
echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY
112+
echo "| AMI ID | \`$AMI_ID\` |" >> $GITHUB_STEP_SUMMARY
113+
echo "| Region | ${{ env.AWS_REGION }} |" >> $GITHUB_STEP_SUMMARY
114+
115+
- name: Upload manifest artifact
116+
uses: actions/upload-artifact@v4
117+
with:
118+
name: ami-manifest
119+
path: packer/manifest.json
120+
retention-days: 90
121+
122+
update-cdk-context:
123+
name: Update CDK Context
124+
runs-on: ubuntu-latest
125+
needs: build
126+
if: needs.build.outputs.ami_id != ''
127+
128+
permissions:
129+
contents: write
130+
pull-requests: write
131+
132+
steps:
133+
- name: Checkout code
134+
uses: actions/checkout@v4
135+
136+
- name: Update cdk.json with new AMI ID
137+
run: |
138+
AMI_ID="${{ needs.build.outputs.ami_id }}"
139+
140+
# Update cdk.json if it exists and has goldenAmiId context
141+
if [ -f cdk.json ]; then
142+
# Check if goldenAmiId exists in context
143+
if jq -e '.context.goldenAmiId' cdk.json > /dev/null 2>&1; then
144+
jq --arg ami "$AMI_ID" '.context.goldenAmiId = $ami' cdk.json > cdk.json.tmp
145+
mv cdk.json.tmp cdk.json
146+
echo "Updated cdk.json with goldenAmiId: $AMI_ID"
147+
else
148+
echo "goldenAmiId not found in cdk.json context, skipping update"
149+
fi
150+
fi
151+
152+
- name: Create Pull Request
153+
uses: peter-evans/create-pull-request@v6
154+
with:
155+
commit-message: "chore: update Golden AMI to ${{ needs.build.outputs.ami_id }}"
156+
title: "chore: Update Golden AMI to ${{ needs.build.outputs.ami_id }}"
157+
body: |
158+
## Golden AMI Update
159+
160+
A new Golden AMI has been built and is ready for deployment.
161+
162+
| Property | Value |
163+
|----------|-------|
164+
| AMI ID | `${{ needs.build.outputs.ami_id }}` |
165+
| Region | ${{ env.AWS_REGION }} |
166+
| Build | [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) |
167+
168+
### Changes Included
169+
This AMI includes:
170+
- Pre-baked DeepSeek-OCR-2 model (~6GB)
171+
- NVIDIA drivers and container toolkit
172+
- vLLM and flash-attn dependencies
173+
- ECS agent configuration
174+
175+
### Deployment
176+
After merging, deploy with:
177+
```bash
178+
STAGE=dev npm run deploy:dev
179+
```
180+
branch: chore/update-golden-ami
181+
delete-branch: true
182+
183+
notify:
184+
name: Notify Build Status
185+
runs-on: ubuntu-latest
186+
needs: [build]
187+
if: always()
188+
189+
steps:
190+
- name: Build succeeded
191+
if: needs.build.result == 'success'
192+
run: |
193+
echo "### :white_check_mark: Golden AMI Build Succeeded" >> $GITHUB_STEP_SUMMARY
194+
echo "" >> $GITHUB_STEP_SUMMARY
195+
echo "AMI ID: \`${{ needs.build.outputs.ami_id }}\`" >> $GITHUB_STEP_SUMMARY
196+
197+
- name: Build failed
198+
if: needs.build.result == 'failure'
199+
run: |
200+
echo "### :x: Golden AMI Build Failed" >> $GITHUB_STEP_SUMMARY
201+
echo "" >> $GITHUB_STEP_SUMMARY
202+
echo "Please check the build logs for details." >> $GITHUB_STEP_SUMMARY

docker/Dockerfile

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
1-
# DeepSeek-OCR vLLM Docker Image
1+
# DeepSeek-OCR-2 vLLM Docker Image
22
# Based on official vLLM OpenAI image for better compatibility
3+
# Supports BF16 inference on g5.xlarge (A10G GPU)
34

45
FROM vllm/vllm-openai:v0.8.5
56

@@ -14,15 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
1415
git ca-certificates curl && \
1516
rm -rf /var/lib/apt/lists/*
1617

17-
# Fetch upstream DeepSeek-OCR sources at build time
18+
# Fetch upstream DeepSeek-OCR-2 sources at build time
1819
RUN git clone --depth 1 https://github.com/deepseek-ai/DeepSeek-OCR.git /app/DeepSeek-OCR-src
1920

2021
# Copy the DeepSeek-OCR vLLM implementation (correct nested path)
22+
# Note: DeepSeek-OCR-2 uses the same vLLM implementation structure
2123
RUN cp -r /app/DeepSeek-OCR-src/DeepSeek-OCR-master/DeepSeek-OCR-vllm /app/DeepSeek-OCR-vllm
2224

23-
# Optional sanity check (can be removed once verified)
24-
# RUN ls -la /app/DeepSeek-OCR-src && ls -la /app/DeepSeek-OCR-src/DeepSeek-OCR-master
25-
2625
# Copy custom files to replace the originals (transparent replacement approach)
2726
COPY custom_config.py ./DeepSeek-OCR-vllm/config.py
2827
COPY custom_image_process.py ./DeepSeek-OCR-vllm/process/image_process.py
@@ -36,9 +35,14 @@ COPY custom_run_dpsk_ocr_eval_batch.py ./DeepSeek-OCR-vllm/run_dpsk_ocr_eval_bat
3635
# Copy the startup script
3736
COPY start_server.py .
3837

38+
# Upgrade pip and install core dependencies with specific versions for DeepSeek-OCR-2
39+
RUN pip install --no-cache-dir --upgrade pip && \
40+
pip install --no-cache-dir \
41+
torch==2.6.0 \
42+
transformers==4.46.3 \
43+
tokenizers==0.20.3
44+
3945
# Install Python dependencies (explicit list to avoid conflicts)
40-
# If you prefer upstream requirements, you can add:
41-
# RUN pip install --no-cache-dir -r /app/DeepSeek-OCR-src/requirements.txt
4246
RUN pip install --no-cache-dir \
4347
PyMuPDF \
4448
img2pdf \
@@ -54,26 +58,28 @@ RUN pip install --no-cache-dir \
5458
uvicorn[standard]==0.24.0 \
5559
python-multipart==0.0.6
5660

57-
# Install flash-attn for optimal performance
61+
# Install flash-attn for optimal performance with BF16
5862
# Note: This requires CUDA toolkit and may fail on some systems
5963
RUN pip install --no-cache-dir flash-attn==2.7.3 --no-build-isolation || \
6064
(echo "WARNING: flash-attn installation failed. The model will still work but may be slower." && \
6165
echo "This is expected if CUDA development tools are not available in the base image.")
6266

63-
# Downgrade tokenizers to compatible version if needed
64-
RUN pip install --no-cache-dir tokenizers==0.13.3 || echo "Using existing tokenizers version"
65-
6667
# Add the DeepSeek-OCR directory to PYTHONPATH
6768
ENV PYTHONPATH="/app/DeepSeek-OCR-vllm:${PYTHONPATH}"
6869

6970
# Create directories for outputs and model cache
7071
RUN mkdir -p /app/outputs /app/models
7172

7273
# Set default Hugging Face cache directory
74+
# These can be overridden to use Golden AMI pre-cached models
7375
ENV HF_HOME="/app/models"
7476
ENV TRANSFORMERS_CACHE="/app/models"
7577
ENV HUGGINGFACE_HUB_CACHE="/app/models"
7678

79+
# Default model configuration for DeepSeek-OCR-2
80+
ENV MODEL_PATH="deepseek-ai/DeepSeek-OCR-2"
81+
ENV VLLM_TORCH_DTYPE="bfloat16"
82+
7783
# Make the scripts executable
7884
RUN chmod +x /app/start_server.py
7985

docker/custom_config.py

Lines changed: 22 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,38 @@
1-
# Custom configuration for DeepSeek-OCR vLLM
1+
# Custom configuration for DeepSeek-OCR-2 vLLM
22
# This file replaces the original config.py during Docker build
33
# Modify the PROMPT value below to change the default prompt used by the OCR service
44

5-
# TODO: change modes
5+
import os
6+
7+
# Processing modes for different use cases:
68
# Tiny: base_size = 512, image_size = 512, crop_mode = False
79
# Small: base_size = 640, image_size = 640, crop_mode = False
810
# Base: base_size = 1024, image_size = 1024, crop_mode = False
911
# Large: base_size = 1280, image_size = 1280, crop_mode = False
10-
# Gundam: base_size = 1024, image_size = 640, crop_mode = True
12+
# Gundam: base_size = 1024, image_size = 768, crop_mode = True (recommended for OCR-2)
1113

1214
BASE_SIZE = 1024
13-
IMAGE_SIZE = 640
15+
IMAGE_SIZE = 768 # Updated for DeepSeek-OCR-2 (larger than OCR-1)
1416
CROP_MODE = True
15-
MIN_CROPS= 2
16-
MAX_CROPS= 6 # max:9; If your GPU memory is small, it is recommended to set it to 6.
17-
MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count.
18-
NUM_WORKERS = 64 # image pre-process (resize/padding) workers
17+
MIN_CROPS = 2
18+
MAX_CROPS = 6 # max:9; If your GPU memory is small, it is recommended to set it to 6.
19+
MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count.
20+
NUM_WORKERS = 64 # image pre-process (resize/padding) workers
1921
PRINT_NUM_VIS_TOKENS = False
2022
SKIP_REPEAT = True
2123

22-
# IMPORTANT: Use the Hugging Face repository ID, not a local path
23-
# vLLM will download and cache the model automatically
24-
MODEL_PATH = 'deepseek-ai/DeepSeek-OCR' # Hugging Face repository ID
25-
VLLM_TORCH_DTYPE = 'half'
24+
# DeepSeek-OCR-2 Model Configuration
25+
# Use environment variables for flexibility (Golden AMI may override)
26+
MODEL_PATH = os.environ.get('MODEL_PATH', 'deepseek-ai/DeepSeek-OCR-2')
27+
VLLM_TORCH_DTYPE = os.environ.get('VLLM_TORCH_DTYPE', 'bfloat16') # BF16 for g5 (A10G GPU)
28+
29+
# Check for pre-cached model in Golden AMI location
30+
GOLDEN_AMI_MODEL_CACHE = '/mnt/ecs-data/models'
31+
if os.path.exists(GOLDEN_AMI_MODEL_CACHE):
32+
# Use Golden AMI pre-cached model if available
33+
os.environ.setdefault('HF_HOME', GOLDEN_AMI_MODEL_CACHE)
34+
os.environ.setdefault('TRANSFORMERS_CACHE', GOLDEN_AMI_MODEL_CACHE)
35+
os.environ.setdefault('HUGGINGFACE_HUB_CACHE', GOLDEN_AMI_MODEL_CACHE)
2636

2737
INPUT_PATH = ''
2838
OUTPUT_PATH = ''

docker/start_server.py

Lines changed: 24 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -125,26 +125,45 @@ def initialize_model():
125125
global llm, sampling_params
126126

127127
if llm is None:
128-
print("Initializing DeepSeek-OCR model...")
128+
print("Initializing DeepSeek-OCR-2 model...")
129129
print(f"Model path from config: {MODEL_PATH}")
130130

131131
# Get environment variable overrides
132132
model_path = os.environ.get('MODEL_PATH', MODEL_PATH)
133133
print(f"Final model path: {model_path}")
134134

135-
# Set up model download directory if specified
136-
hf_home = os.environ.get('HF_HOME', '/app/models')
135+
# Check for Golden AMI pre-cached model first
136+
golden_ami_cache = '/mnt/ecs-data/models'
137+
default_cache = '/app/models'
138+
139+
if os.path.exists(golden_ami_cache) and os.listdir(golden_ami_cache):
140+
hf_home = golden_ami_cache
141+
print(f"Using Golden AMI pre-cached models at: {golden_ami_cache}")
142+
else:
143+
hf_home = os.environ.get('HF_HOME', default_cache)
144+
print(f"Using standard model cache: {hf_home}")
145+
137146
os.environ['HF_HOME'] = hf_home
138147
os.environ['TRANSFORMERS_CACHE'] = hf_home
139148
os.environ['HUGGINGFACE_HUB_CACHE'] = hf_home
140149
print(f"Model cache directory: {hf_home}")
141150

151+
# Get dtype from environment (default: bfloat16 for g5/A10G)
142152
dtype = os.environ.get('VLLM_TORCH_DTYPE', VLLM_TORCH_DTYPE)
143153
print(f"dtype: {dtype}")
144154

155+
# Validate dtype for current GPU
156+
if torch.cuda.is_available():
157+
gpu_name = torch.cuda.get_device_name(0)
158+
print(f"GPU detected: {gpu_name}")
159+
# A10G (g5) supports bfloat16, T4 (g4dn) does not
160+
if 'T4' in gpu_name and dtype == 'bfloat16':
161+
print("WARNING: T4 GPU detected but bfloat16 requested. Falling back to float16.")
162+
dtype = 'float16'
163+
145164
# Initialize vLLM engine with the Hugging Face repository ID
146165
llm = LLM(
147-
model=model_path, # Use HF repository ID: "deepseek-ai/DeepSeek-OCR"
166+
model=model_path, # Use HF repository ID: "deepseek-ai/DeepSeek-OCR-2"
148167
hf_overrides={"architectures": ["DeepseekOCRForCausalLM"]},
149168
enforce_eager=True,
150169
trust_remote_code=True,
@@ -155,7 +174,7 @@ def initialize_model():
155174
gpu_memory_utilization=0.9,
156175
disable_mm_preprocessor_cache=True,
157176
download_dir=hf_home, # Specify where to download and cache the model
158-
dtype=dtype, # Use float16 for Tesla T4 and similar GPUs
177+
dtype=dtype, # Use bfloat16 for A10G (g5), float16 for T4 (g4dn)
159178
)
160179

161180
# Set up sampling parameters

0 commit comments

Comments
 (0)