aws · Yadan-Wei · Apr 7, 2026 · Apr 2, 2026 · Apr 2, 2026 · Apr 2, 2026
diff --git a/.github/actions/build-image/action.yml b/.github/actions/build-image/action.yml
@@ -69,6 +69,10 @@ inputs:
     description: 'Transformers library version (e.g., 4.28.1)'
     required: false
     default: ''
+  runtime-base:
+    description: 'Pre-built runtime base image URI. When set, skips compile stages.'
+    required: false
+    default: ''
 
 outputs:
   image-uri:
@@ -120,3 +124,4 @@ runs:
         INFERENCE_TOOLKIT_VERSION: ${{ inputs.inference-toolkit-version }}
         TORCHSERVE_VERSION: ${{ inputs.torchserve-version }}
         TRANSFORMERS_VERSION: ${{ inputs.transformers-version }}
+        RUNTIME_BASE: ${{ inputs.runtime-base }}
diff --git a/.github/config/vllm-omni-ec2-amzn2023.yml b/.github/config/vllm-omni-ec2-amzn2023.yml
@@ -0,0 +1,26 @@
+# vLLM-Omni EC2 AL2023 Image Configuration
+
+image:
+  name: "vllm-omni-ec2-amzn2023"
+  description: "vLLM-Omni for EC2 instances (AL2023, omni-modality serving)"
+
+common:
+  framework: "vllm-omni"
+  framework_version: "0.18.0"
+  job_type: "general"
+  python_version: "py312"
+  cuda_version: "cu129"
+  os_version: "amzn2023"
+  customer_type: "ec2"
+  arch_type: "x86"
+  prod_image: "vllm-omni:0.18-gpu-py312-ec2"
+  device_type: "gpu"
+  contributor: "None"
+
+release:
+  release: false
+  force_release: false
+  public_registry: false
+  private_registry: true
+  enable_soci: true
+  environment: production
diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
@@ -0,0 +1,57 @@
+# vLLM-Omni Model Test Configuration
+# Tests for omni-modality models (TTS, image generation, video, omni-chat)
+#
+# Each model defines its test_request (sent to /invocations via middleware)
+# and the route for the SageMaker routing middleware.
+#
+# Models use s3_model (pre-cached in S3) downloaded by the download-model action.
+
+s3_prefix: "s3://dlc-cicd-models/omni-models"
+
+smoke-test:
+  codebuild-fleet:
+    # --- TTS models (route: /v1/audio/speech) ---
+    - name: "qwen3-tts-1.7b-customvoice"
+      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
+      validate: "binary_size_gt:1000"
+
+    # --- Image generation models (route: /v1/images/generations) ---
+    - name: "flux2-klein-4b"
+      s3_model: "flux2-klein-4b.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/images/generations"
+      test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
+      validate: "json_field:data[0].b64_json"
+
+    # --- Video generation models (route: /v1/videos) ---
+    - name: "wan2.1-t2v-1.3b"
+      s3_model: "wan2.1-t2v-1.3b.tar.gz"
+      fleet: "x86-g6exl-runner"
+      extra_args: ""
+      route: "/v1/videos"
+      content_type: "multipart/form-data"
+      test_request: 'prompt=a dog running on a beach&num_frames=17&num_inference_steps=4&size=480x320&seed=42'
+      validate: "json_field:id"
+
+    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
+    # model is big, won't run for now
+    # - name: "bagel-7b-mot"
+    #   s3_model: "bagel-7b-mot.tar.gz"
+    #   fleet: "x86-g6e4xl-runner"
+    #   extra_args: ""
+    #   route: "/v1/chat/completions"
+    #   test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
+    #   validate: "json_field:choices[0].message.content"
+
+    - name: "qwen2.5-omni-3b"
+      s3_model: "qwen2.5-omni-3b.tar.gz"
+      fleet: "x86-g6e12xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+      validate: "json_field:choices[0].message.content"
diff --git a/.github/config/vllm-omni-sagemaker-amzn2023.yml b/.github/config/vllm-omni-sagemaker-amzn2023.yml
@@ -0,0 +1,26 @@
+# vLLM-Omni SageMaker AL2023 Image Configuration
+
+image:
+  name: "vllm-omni-sagemaker-amzn2023"
+  description: "vLLM-Omni for SageMaker (AL2023, omni-modality serving)"
+
+common:
+  framework: "vllm-omni"
+  framework_version: "0.18.0"
+  job_type: "general"
+  python_version: "py312"
+  cuda_version: "cu129"
+  os_version: "amzn2023"
+  customer_type: "sagemaker"
+  arch_type: "x86"
+  prod_image: "vllm-omni:0.18-gpu-py312-sagemaker"
+  device_type: "gpu"
+  contributor: "None"
+
+release:
+  release: false
+  force_release: false
+  public_registry: false
+  private_registry: true
+  enable_soci: true
+  environment: production
diff --git a/.github/scripts/build_image.sh b/.github/scripts/build_image.sh
@@ -26,6 +26,7 @@ CUSTOMER_TYPE="${CUSTOMER_TYPE:-}"
 INFERENCE_TOOLKIT_VERSION="${INFERENCE_TOOLKIT_VERSION:-}"
 TORCHSERVE_VERSION="${TORCHSERVE_VERSION:-}"
 TRANSFORMERS_VERSION="${TRANSFORMERS_VERSION:-}"
+RUNTIME_BASE="${RUNTIME_BASE:-}"
 
 # Resolve image URI
 CI_IMAGE_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/ci:${TAG_PR}"
@@ -67,6 +68,13 @@ BUILD_CMD="docker buildx build --progress plain \
   --build-arg FRAMEWORK=\"${FRAMEWORK}\" \
   --build-arg FRAMEWORK_VERSION=\"${FRAMEWORK_VERSION}\""
 
+# Use pre-built runtime base if available (skips compile stages)
+if [[ -n "${RUNTIME_BASE}" ]]; then
+  echo "Using pre-built runtime base: ${RUNTIME_BASE}"
+  BUILD_CMD="${BUILD_CMD} \
+  --build-arg RUNTIME_BASE=\"${RUNTIME_BASE}\""
+fi
+
 # Add SageMaker labels if customer-type is 'sagemaker'
 if [[ "${CUSTOMER_TYPE}" == "sagemaker" ]]; then
   BUILD_CMD="${BUILD_CMD} \

diff --git a/.github/workflows/pr-base-v1.yml b/.github/workflows/pr-base-v1.yml
@@ -1,17 +1,8 @@
 name: PR - Base v1
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/base/**"
-      - "scripts/common/**"
-      - "test/cuda/**"
-      - "test/security/data/ecr_scan_allowlist/base/**"
-      - ".github/config/base-v1.yml"
-      - ".github/workflows/pr-base-v1.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-base-v2.yml b/.github/workflows/pr-base-v2.yml
@@ -1,17 +1,8 @@
 name: PR - Base v2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/base/**"
-      - "scripts/common/**"
-      - "test/cuda/**"
-      - "test/security/data/ecr_scan_allowlist/base/**"
-      - ".github/config/base-v2.yml"
-      - ".github/workflows/pr-base-v2.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-docs.yml b/.github/workflows/pr-docs.yml
@@ -1,11 +1,8 @@
 name: PR - Documentations
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**docs**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-lambda.yml b/.github/workflows/pr-lambda.yml
@@ -1,18 +1,8 @@
 name: PR - Lambda
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/lambda/**"
-      - "scripts/lambda/**"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - "test/lambda/**"
-      - "test/security/data/ecr_scan_allowlist/lambda/**"
-      - ".github/workflows/pr-lambda.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-pytorch-ec2.yml b/.github/workflows/pr-pytorch-ec2.yml
@@ -1,15 +1,8 @@
 name: PR - PyTorch EC2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/pytorch/**"
-      - "scripts/pytorch/**"
-      - "test/pytorch/**"
-      - ".github/workflows/pr-pytorch-ec2.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-ray-ec2-cpu.yml b/.github/workflows/pr-ray-ec2-cpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray EC2 CPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-ray-ec2-gpu.yml b/.github/workflows/pr-ray-ec2-gpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray EC2 GPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-ray-sagemaker-cpu.yml b/.github/workflows/pr-ray-sagemaker-cpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray SageMaker CPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-ray-sagemaker-gpu.yml b/.github/workflows/pr-ray-sagemaker-gpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray SageMaker GPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-sagemaker-xgboost.yml b/.github/workflows/pr-sagemaker-xgboost.yml
@@ -1,14 +1,8 @@
 name: PR - SageMaker XGBoost
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/xgboost/**"
-      - ".github/config/sagemaker-xgboost.yml"
-      - ".github/workflows/pr-sagemaker-xgboost.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-sglang-ec2-amzn2023.yml b/.github/workflows/pr-sglang-ec2-amzn2023.yml
@@ -1,22 +1,8 @@
 name: PR - SGLang EC2 AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/sglang/Dockerfile.amzn2023"
-      - "scripts/sglang/dockerd_entrypoint.sh"
-      - "scripts/sglang/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/sglang-ec2-amzn2023.yml"
-      - ".github/config/sglang-model-tests.yml"
-      - ".github/workflows/pr-sglang-ec2-amzn2023.yml"
-      - ".github/workflows/reusable-sglang-model-tests.yml"
-      - "test/sanity/**"
-      - "test/telemetry/**"
-      - "test/sglang/scripts/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-sglang-ec2.yml b/.github/workflows/pr-sglang-ec2.yml
@@ -1,13 +1,8 @@
 name: PR - SGLang EC2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**sglang**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

diff --git a/.github/workflows/pr-sglang-sagemaker-amzn2023.yml b/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
@@ -1,24 +1,8 @@
 name: PR - SGLang SageMaker AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/sglang/Dockerfile.amzn2023"
-      - "scripts/sglang/dockerd_entrypoint.sh"
-      - "scripts/sglang/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/sglang-sagemaker-amzn2023.yml"
-      - ".github/workflows/pr-sglang-sagemaker-amzn2023.yml"
-      - ".github/workflows/reusable-sglang-sagemaker-tests.yml"
-      - ".github/workflows/reusable-sglang-model-tests.yml"
-      - ".github/config/sglang-model-tests.yml"
-      - "test/sanity/**"
-      - "test/telemetry/**"
-      - "test/sglang/sagemaker/**"
-      - "test/sglang/scripts/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read