aws-samples
diff --git a/‎.github/workflows/build-ami.yml‎
Lines changed: 202 additions & 0 deletions b/‎.github/workflows/build-ami.yml‎
Lines changed: 202 additions & 0 deletions
diff --git a/‎docker/Dockerfile‎
Lines changed: 17 additions & 11 deletions b/‎docker/Dockerfile‎
Lines changed: 17 additions & 11 deletions
diff --git a/‎docker/custom_config.py‎
Lines changed: 22 additions & 12 deletions b/‎docker/custom_config.py‎
Lines changed: 22 additions & 12 deletions
diff --git a/‎docker/start_server.py‎
Lines changed: 24 additions & 5 deletions b/‎docker/start_server.py‎
Lines changed: 24 additions & 5 deletions
@@ -0,0 +1,202 @@
+# GitHub Actions workflow to build DeepSeek-OCR Golden AMI
+# Triggers on changes to packer/ or docker/ directories
+
+name: Build Golden AMI
+
+on:
+  push:
+    branches:
+      - main
+    paths:
+      - 'packer/**'
+      - 'docker/**'
+  pull_request:
+    branches:
+      - main
+    paths:
+      - 'packer/**'
+      - 'docker/**'
+  workflow_dispatch:
+    inputs:
+      aws_region:
+        description: 'AWS Region for AMI'
+        required: false
+        default: 'us-east-1'
+      instance_type:
+        description: 'Instance type for Packer build'
+        required: false
+        default: 'g5.xlarge'
+
+env:
+  AWS_REGION: ${{ github.event.inputs.aws_region || 'us-east-1' }}
+  PACKER_VERSION: '1.10.0'
+
+jobs:
+  validate:
+    name: Validate Packer Template
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Setup Packer
+        uses: hashicorp/setup-packer@main
+        with:
+          version: ${{ env.PACKER_VERSION }}
+
+      - name: Initialize Packer
+        working-directory: packer
+        run: packer init deepseek-ocr-golden.pkr.hcl
+
+      - name: Validate Packer template
+        working-directory: packer
+        run: packer validate deepseek-ocr-golden.pkr.hcl
+
+  build:
+    name: Build Golden AMI
+    runs-on: ubuntu-latest
+    needs: validate
+    # Only build on push to main or manual trigger (not on PRs)
+    if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
+
+    permissions:
+      id-token: write
+      contents: read
+
+    outputs:
+      ami_id: ${{ steps.build.outputs.ami_id }}
+      ami_name: ${{ steps.build.outputs.ami_name }}
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Configure AWS credentials
+        uses: aws-actions/configure-aws-credentials@v4
+        with:
+          role-to-assume: ${{ secrets.AWS_PACKER_ROLE_ARN }}
+          aws-region: ${{ env.AWS_REGION }}
+
+      - name: Setup Packer
+        uses: hashicorp/setup-packer@main
+        with:
+          version: ${{ env.PACKER_VERSION }}
+
+      - name: Initialize Packer
+        working-directory: packer
+        run: packer init deepseek-ocr-golden.pkr.hcl
+
+      - name: Build AMI
+        id: build
+        working-directory: packer
+        env:
+          PKR_VAR_aws_region: ${{ env.AWS_REGION }}
+          PKR_VAR_instance_type: ${{ github.event.inputs.instance_type || 'g5.xlarge' }}
+          PKR_VAR_vpc_id: ${{ secrets.PACKER_VPC_ID }}
+          PKR_VAR_subnet_id: ${{ secrets.PACKER_SUBNET_ID }}
+        run: |
+          # Run Packer build
+          packer build -machine-readable deepseek-ocr-golden.pkr.hcl | tee build.log
+
+          # Extract AMI ID from manifest
+          AMI_ID=$(jq -r '.builds[0].artifact_id | split(":")[1]' manifest.json)
+          AMI_NAME=$(jq -r '.builds[0].custom_data.ami_name // "unknown"' manifest.json)
+
+          echo "ami_id=$AMI_ID" >> $GITHUB_OUTPUT
+          echo "ami_name=$AMI_NAME" >> $GITHUB_OUTPUT
+
+          echo "### AMI Build Complete :rocket:" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "| Property | Value |" >> $GITHUB_STEP_SUMMARY
+          echo "|----------|-------|" >> $GITHUB_STEP_SUMMARY
+          echo "| AMI ID | \`$AMI_ID\` |" >> $GITHUB_STEP_SUMMARY
+          echo "| Region | ${{ env.AWS_REGION }} |" >> $GITHUB_STEP_SUMMARY
+
+      - name: Upload manifest artifact
+        uses: actions/upload-artifact@v4
+        with:
+          name: ami-manifest
+          path: packer/manifest.json
+          retention-days: 90
+
+  update-cdk-context:
+    name: Update CDK Context
+    runs-on: ubuntu-latest
+    needs: build
+    if: needs.build.outputs.ami_id != ''
+
+    permissions:
+      contents: write
+      pull-requests: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+
+      - name: Update cdk.json with new AMI ID
+        run: |
+          AMI_ID="${{ needs.build.outputs.ami_id }}"
+
+          # Update cdk.json if it exists and has goldenAmiId context
+          if [ -f cdk.json ]; then
+            # Check if goldenAmiId exists in context
+            if jq -e '.context.goldenAmiId' cdk.json > /dev/null 2>&1; then
+              jq --arg ami "$AMI_ID" '.context.goldenAmiId = $ami' cdk.json > cdk.json.tmp
+              mv cdk.json.tmp cdk.json
+              echo "Updated cdk.json with goldenAmiId: $AMI_ID"
+            else
+              echo "goldenAmiId not found in cdk.json context, skipping update"
+            fi
+          fi
+
+      - name: Create Pull Request
+        uses: peter-evans/create-pull-request@v6
+        with:
+          commit-message: "chore: update Golden AMI to ${{ needs.build.outputs.ami_id }}"
+          title: "chore: Update Golden AMI to ${{ needs.build.outputs.ami_id }}"
+          body: |
+            ## Golden AMI Update
+
+            A new Golden AMI has been built and is ready for deployment.
+
+            | Property | Value |
+            |----------|-------|
+            | AMI ID | `${{ needs.build.outputs.ami_id }}` |
+            | Region | ${{ env.AWS_REGION }} |
+            | Build | [${{ github.run_id }}](${{ github.server_url }}/${{ github.repository }}/actions/runs/${{ github.run_id }}) |
+
+            ### Changes Included
+            This AMI includes:
+            - Pre-baked DeepSeek-OCR-2 model (~6GB)
+            - NVIDIA drivers and container toolkit
+            - vLLM and flash-attn dependencies
+            - ECS agent configuration
+
+            ### Deployment
+            After merging, deploy with:
+            ```bash
+            STAGE=dev npm run deploy:dev
+            ```
+          branch: chore/update-golden-ami
+          delete-branch: true
+
+  notify:
+    name: Notify Build Status
+    runs-on: ubuntu-latest
+    needs: [build]
+    if: always()
+
+    steps:
+      - name: Build succeeded
+        if: needs.build.result == 'success'
+        run: |
+          echo "### :white_check_mark: Golden AMI Build Succeeded" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "AMI ID: \`${{ needs.build.outputs.ami_id }}\`" >> $GITHUB_STEP_SUMMARY
+
+      - name: Build failed
+        if: needs.build.result == 'failure'
+        run: |
+          echo "### :x: Golden AMI Build Failed" >> $GITHUB_STEP_SUMMARY
+          echo "" >> $GITHUB_STEP_SUMMARY
+          echo "Please check the build logs for details." >> $GITHUB_STEP_SUMMARY
@@ -1,5 +1,6 @@
-# DeepSeek-OCR vLLM Docker Image
+# DeepSeek-OCR-2 vLLM Docker Image
 # Based on official vLLM OpenAI image for better compatibility
+# Supports BF16 inference on g5.xlarge (A10G GPU)
 
 FROM vllm/vllm-openai:v0.8.5
 
@@ -14,15 +15,13 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     git ca-certificates curl && \
     rm -rf /var/lib/apt/lists/*
 
-# Fetch upstream DeepSeek-OCR sources at build time
+# Fetch upstream DeepSeek-OCR-2 sources at build time
 RUN git clone --depth 1 https://github.com/deepseek-ai/DeepSeek-OCR.git /app/DeepSeek-OCR-src
 
 # Copy the DeepSeek-OCR vLLM implementation (correct nested path)
+# Note: DeepSeek-OCR-2 uses the same vLLM implementation structure
 RUN cp -r /app/DeepSeek-OCR-src/DeepSeek-OCR-master/DeepSeek-OCR-vllm /app/DeepSeek-OCR-vllm
 
-# Optional sanity check (can be removed once verified)
-# RUN ls -la /app/DeepSeek-OCR-src && ls -la /app/DeepSeek-OCR-src/DeepSeek-OCR-master
-
 # Copy custom files to replace the originals (transparent replacement approach)
 COPY custom_config.py ./DeepSeek-OCR-vllm/config.py
 COPY custom_image_process.py ./DeepSeek-OCR-vllm/process/image_process.py
@@ -36,9 +35,14 @@ COPY custom_run_dpsk_ocr_eval_batch.py ./DeepSeek-OCR-vllm/run_dpsk_ocr_eval_bat
 # Copy the startup script
 COPY start_server.py .
 
+# Upgrade pip and install core dependencies with specific versions for DeepSeek-OCR-2
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir \
+    torch==2.6.0 \
+    transformers==4.46.3 \
+    tokenizers==0.20.3
+
 # Install Python dependencies (explicit list to avoid conflicts)
-# If you prefer upstream requirements, you can add:
-# RUN pip install --no-cache-dir -r /app/DeepSeek-OCR-src/requirements.txt
 RUN pip install --no-cache-dir \
     PyMuPDF \
     img2pdf \
@@ -54,26 +58,28 @@ RUN pip install --no-cache-dir \
     uvicorn[standard]==0.24.0 \
     python-multipart==0.0.6
 
-# Install flash-attn for optimal performance
+# Install flash-attn for optimal performance with BF16
 # Note: This requires CUDA toolkit and may fail on some systems
 RUN pip install --no-cache-dir flash-attn==2.7.3 --no-build-isolation || \
     (echo "WARNING: flash-attn installation failed. The model will still work but may be slower." && \
      echo "This is expected if CUDA development tools are not available in the base image.")
 
-# Downgrade tokenizers to compatible version if needed
-RUN pip install --no-cache-dir tokenizers==0.13.3 || echo "Using existing tokenizers version"
-
 # Add the DeepSeek-OCR directory to PYTHONPATH
 ENV PYTHONPATH="/app/DeepSeek-OCR-vllm:${PYTHONPATH}"
 
 # Create directories for outputs and model cache
 RUN mkdir -p /app/outputs /app/models
 
 # Set default Hugging Face cache directory
+# These can be overridden to use Golden AMI pre-cached models
 ENV HF_HOME="/app/models"
 ENV TRANSFORMERS_CACHE="/app/models"
 ENV HUGGINGFACE_HUB_CACHE="/app/models"
 
+# Default model configuration for DeepSeek-OCR-2
+ENV MODEL_PATH="deepseek-ai/DeepSeek-OCR-2"
+ENV VLLM_TORCH_DTYPE="bfloat16"
+
 # Make the scripts executable
 RUN chmod +x /app/start_server.py
 
 
@@ -1,28 +1,38 @@
-# Custom configuration for DeepSeek-OCR vLLM
+# Custom configuration for DeepSeek-OCR-2 vLLM
 # This file replaces the original config.py during Docker build
 # Modify the PROMPT value below to change the default prompt used by the OCR service
 
-# TODO: change modes
+import os
+
+# Processing modes for different use cases:
 # Tiny: base_size = 512, image_size = 512, crop_mode = False
 # Small: base_size = 640, image_size = 640, crop_mode = False
 # Base: base_size = 1024, image_size = 1024, crop_mode = False
 # Large: base_size = 1280, image_size = 1280, crop_mode = False
-# Gundam: base_size = 1024, image_size = 640, crop_mode = True
+# Gundam: base_size = 1024, image_size = 768, crop_mode = True (recommended for OCR-2)
 
 BASE_SIZE = 1024
-IMAGE_SIZE = 640
+IMAGE_SIZE = 768  # Updated for DeepSeek-OCR-2 (larger than OCR-1)
 CROP_MODE = True
-MIN_CROPS= 2
-MAX_CROPS= 6 # max:9; If your GPU memory is small, it is recommended to set it to 6.
-MAX_CONCURRENCY = 100 # If you have limited GPU memory, lower the concurrency count.
-NUM_WORKERS = 64 # image pre-process (resize/padding) workers
+MIN_CROPS = 2
+MAX_CROPS = 6  # max:9; If your GPU memory is small, it is recommended to set it to 6.
+MAX_CONCURRENCY = 100  # If you have limited GPU memory, lower the concurrency count.
+NUM_WORKERS = 64  # image pre-process (resize/padding) workers
 PRINT_NUM_VIS_TOKENS = False
 SKIP_REPEAT = True
 
-# IMPORTANT: Use the Hugging Face repository ID, not a local path
-# vLLM will download and cache the model automatically
-MODEL_PATH = 'deepseek-ai/DeepSeek-OCR'  # Hugging Face repository ID
-VLLM_TORCH_DTYPE = 'half'
+# DeepSeek-OCR-2 Model Configuration
+# Use environment variables for flexibility (Golden AMI may override)
+MODEL_PATH = os.environ.get('MODEL_PATH', 'deepseek-ai/DeepSeek-OCR-2')
+VLLM_TORCH_DTYPE = os.environ.get('VLLM_TORCH_DTYPE', 'bfloat16')  # BF16 for g5 (A10G GPU)
+
+# Check for pre-cached model in Golden AMI location
+GOLDEN_AMI_MODEL_CACHE = '/mnt/ecs-data/models'
+if os.path.exists(GOLDEN_AMI_MODEL_CACHE):
+    # Use Golden AMI pre-cached model if available
+    os.environ.setdefault('HF_HOME', GOLDEN_AMI_MODEL_CACHE)
+    os.environ.setdefault('TRANSFORMERS_CACHE', GOLDEN_AMI_MODEL_CACHE)
+    os.environ.setdefault('HUGGINGFACE_HUB_CACHE', GOLDEN_AMI_MODEL_CACHE)
 
 INPUT_PATH = ''
 OUTPUT_PATH = ''
 
@@ -125,26 +125,45 @@ def initialize_model():
     global llm, sampling_params
 
     if llm is None:
-        print("Initializing DeepSeek-OCR model...")
+        print("Initializing DeepSeek-OCR-2 model...")
         print(f"Model path from config: {MODEL_PATH}")
 
         # Get environment variable overrides
         model_path = os.environ.get('MODEL_PATH', MODEL_PATH)
         print(f"Final model path: {model_path}")
 
-        # Set up model download directory if specified
-        hf_home = os.environ.get('HF_HOME', '/app/models')
+        # Check for Golden AMI pre-cached model first
+        golden_ami_cache = '/mnt/ecs-data/models'
+        default_cache = '/app/models'
+
+        if os.path.exists(golden_ami_cache) and os.listdir(golden_ami_cache):
+            hf_home = golden_ami_cache
+            print(f"Using Golden AMI pre-cached models at: {golden_ami_cache}")
+        else:
+            hf_home = os.environ.get('HF_HOME', default_cache)
+            print(f"Using standard model cache: {hf_home}")
+
         os.environ['HF_HOME'] = hf_home
         os.environ['TRANSFORMERS_CACHE'] = hf_home
         os.environ['HUGGINGFACE_HUB_CACHE'] = hf_home
         print(f"Model cache directory: {hf_home}")
 
+        # Get dtype from environment (default: bfloat16 for g5/A10G)
         dtype = os.environ.get('VLLM_TORCH_DTYPE', VLLM_TORCH_DTYPE)
         print(f"dtype: {dtype}")
 
+        # Validate dtype for current GPU
+        if torch.cuda.is_available():
+            gpu_name = torch.cuda.get_device_name(0)
+            print(f"GPU detected: {gpu_name}")
+            # A10G (g5) supports bfloat16, T4 (g4dn) does not
+            if 'T4' in gpu_name and dtype == 'bfloat16':
+                print("WARNING: T4 GPU detected but bfloat16 requested. Falling back to float16.")
+                dtype = 'float16'
+
         # Initialize vLLM engine with the Hugging Face repository ID
         llm = LLM(
-            model=model_path,  # Use HF repository ID: "deepseek-ai/DeepSeek-OCR"
+            model=model_path,  # Use HF repository ID: "deepseek-ai/DeepSeek-OCR-2"
             hf_overrides={"architectures": ["DeepseekOCRForCausalLM"]},
             enforce_eager=True,
             trust_remote_code=True,
@@ -155,7 +174,7 @@ def initialize_model():
             gpu_memory_utilization=0.9,
             disable_mm_preprocessor_cache=True,
             download_dir=hf_home,  # Specify where to download and cache the model
-            dtype=dtype,  # Use float16 for Tesla T4 and similar GPUs
+            dtype=dtype,  # Use bfloat16 for A10G (g5), float16 for T4 (g4dn)
         )
 
         # Set up sampling parameters