From bdb218438dfcf3c79acb93ed2f8e6d62fded2c81 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Wed, 1 Apr 2026 23:41:31 -0700
Subject: [PATCH 01/58] feat: add vLLM-Omni EC2 and SageMaker DLC images

- Add omni-deps, builder-oss-omni, omni-base, ec2, sagemaker stages to Dockerfile.amzn2023
- Install vllm-omni as pure Python layer on top of vLLM runtime
- Add omni entrypoints (vllm serve --omni) for EC2 and SageMaker
- Add PR workflows for both EC2 and SageMaker omni images
- Add reusable model smoke tests (Qwen3-TTS, FLUX.2-klein-4B)
- Add SageMaker endpoint integration test with Qwen3-TTS
- System deps: espeak-ng, ffmpeg, sox, libsox-fmt-all for audio/TTS
- OSS compliance runs against omni venv separately

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-ec2-amzn2023.yml     |  26 ++
 .github/config/vllm-omni-model-tests.yml      |  18 ++
 .../config/vllm-omni-sagemaker-amzn2023.yml   |  26 ++
 .../workflows/pr-vllm-omni-ec2-amzn2023.yml   | 229 ++++++++++++++++
 .../pr-vllm-omni-sagemaker-amzn2023.yml       | 256 ++++++++++++++++++
 .../reusable-vllm-omni-model-tests.yml        | 103 +++++++
 docker/vllm/Dockerfile.amzn2023               | 127 +++++++++
 scripts/vllm/omni_dockerd_entrypoint.sh       |   6 +
 scripts/vllm/omni_sagemaker_entrypoint.sh     |  41 +++
 .../sagemaker/test_sm_omni_endpoint.py        | 125 +++++++++
 .../scripts/vllm_omni_ec2_smoke_test.sh       |  67 +++++
 .../scripts/vllm_omni_sagemaker_smoke_test.sh |  93 +++++++
 12 files changed, 1117 insertions(+)
 create mode 100644 .github/config/vllm-omni-ec2-amzn2023.yml
 create mode 100644 .github/config/vllm-omni-model-tests.yml
 create mode 100644 .github/config/vllm-omni-sagemaker-amzn2023.yml
 create mode 100644 .github/workflows/pr-vllm-omni-ec2-amzn2023.yml
 create mode 100644 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
 create mode 100644 .github/workflows/reusable-vllm-omni-model-tests.yml
 create mode 100755 scripts/vllm/omni_dockerd_entrypoint.sh
 create mode 100755 scripts/vllm/omni_sagemaker_entrypoint.sh
 create mode 100644 test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
 create mode 100755 test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
 create mode 100755 test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh

diff --git a/.github/config/vllm-omni-ec2-amzn2023.yml b/.github/config/vllm-omni-ec2-amzn2023.yml
new file mode 100644
index 000000000000..00f051d150a1
--- /dev/null
+++ b/.github/config/vllm-omni-ec2-amzn2023.yml
@@ -0,0 +1,26 @@
+# vLLM-Omni EC2 AL2023 Image Configuration
+
+image:
+  name: "vllm-omni-ec2-amzn2023"
+  description: "vLLM-Omni for EC2 instances (AL2023, omni-modality serving)"
+
+common:
+  framework: "vllm-omni"
+  framework_version: "0.18.0"
+  job_type: "general"
+  python_version: "py312"
+  cuda_version: "cu129"
+  os_version: "amzn2023"
+  customer_type: "ec2"
+  arch_type: "x86"
+  prod_image: "vllm-omni:0.18-gpu-py312-ec2"
+  device_type: "gpu"
+  contributor: "None"
+
+release:
+  release: false
+  force_release: false
+  public_registry: false
+  private_registry: true
+  enable_soci: true
+  environment: production
diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
new file mode 100644
index 000000000000..fe4c2744d9fd
--- /dev/null
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -0,0 +1,18 @@
+# vLLM-Omni Model Test Configuration
+# Tests for omni-modality models (TTS, image generation)
+
+s3_prefix: "s3://dlc-cicd-models/llm-models"
+
+smoke-test:
+  codebuild-fleet:
+    - name: "qwen3-tts-1.7b-customvoice"
+      s3_model: "qwen3-tts-1.7b-customvoice.tar"
+      type: tts
+      fleet: "x86-g6xl-runner"
+      extra_args: "--enforce-eager --gpu-memory-utilization 0.8"
+
+    - name: "flux2-klein-4b"
+      s3_model: "flux2-klein-4b.tar"
+      type: diffusion
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
diff --git a/.github/config/vllm-omni-sagemaker-amzn2023.yml b/.github/config/vllm-omni-sagemaker-amzn2023.yml
new file mode 100644
index 000000000000..87b9e3b35f17
--- /dev/null
+++ b/.github/config/vllm-omni-sagemaker-amzn2023.yml
@@ -0,0 +1,26 @@
+# vLLM-Omni SageMaker AL2023 Image Configuration
+
+image:
+  name: "vllm-omni-sagemaker-amzn2023"
+  description: "vLLM-Omni for SageMaker (AL2023, omni-modality serving)"
+
+common:
+  framework: "vllm-omni"
+  framework_version: "0.18.0"
+  job_type: "general"
+  python_version: "py312"
+  cuda_version: "cu129"
+  os_version: "amzn2023"
+  customer_type: "sagemaker"
+  arch_type: "x86"
+  prod_image: "vllm-omni:0.18-gpu-py312-sagemaker"
+  device_type: "gpu"
+  contributor: "None"
+
+release:
+  release: false
+  force_release: false
+  public_registry: false
+  private_registry: true
+  enable_soci: true
+  environment: production
diff --git a/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
new file mode 100644
index 000000000000..3f6a627232e2
--- /dev/null
+++ b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
@@ -0,0 +1,229 @@
+name: PR - vLLM-Omni EC2 AMZN2023
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/vllm/Dockerfile.amzn2023"
+      - "scripts/vllm/omni_*"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/vllm-omni-ec2-amzn2023.yml"
+      - ".github/config/vllm-omni-model-tests.yml"
+      - ".github/workflows/pr-vllm-omni-ec2-amzn2023.yml"
+      - ".github/workflows/reusable-vllm-omni-model-tests.yml"
+      - "test/vllm-omni/**"
+      - "test/telemetry/**"
+
+permissions:
+  contents: read
+  pull-requests: read
+
+env:
+  FORCE_COLOR: "1"
+  CONFIG_FILE: ".github/config/vllm-omni-ec2-amzn2023.yml"
+
+jobs:
+  gatekeeper:
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.workflow }}-gate-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    steps:
+      - name: Checkout base branch (safe)
+        uses: actions/checkout@v5
+        with:
+          ref: ${{ github.event.pull_request.base.sha }}
+          fetch-depth: 1
+
+      - name: Run permission gate (from base)
+        uses: ./.github/actions/pr-permission-gate
+
+  load-config:
+    needs: [gatekeeper]
+    if: success()
+    runs-on: ubuntu-latest
+    outputs:
+      framework: ${{ steps.parse.outputs.framework }}
+      framework-version: ${{ steps.parse.outputs.framework-version }}
+      python-version: ${{ steps.parse.outputs.python-version }}
+      cuda-version: ${{ steps.parse.outputs.cuda-version }}
+      os-version: ${{ steps.parse.outputs.os-version }}
+      container-type: ${{ steps.parse.outputs.container-type }}
+      device-type: ${{ steps.parse.outputs.device-type }}
+      arch-type: ${{ steps.parse.outputs.arch-type }}
+      contributor: ${{ steps.parse.outputs.contributor }}
+      customer-type: ${{ steps.parse.outputs.customer-type }}
+      prod-image: ${{ steps.parse.outputs.prod-image }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Load configuration
+        id: load
+        uses: ./.github/actions/load-config
+        with:
+          config-file: ${{ env.CONFIG_FILE }}
+
+      - name: Parse configuration
+        id: parse
+        run: |
+          echo '${{ steps.load.outputs.config }}' > config.json
+          echo "framework=$(jq -r '.common.framework' config.json)" >> $GITHUB_OUTPUT
+          echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
+          echo "python-version=$(jq -r '.common.python_version' config.json)" >> $GITHUB_OUTPUT
+          echo "cuda-version=$(jq -r '.common.cuda_version' config.json)" >> $GITHUB_OUTPUT
+          echo "os-version=$(jq -r '.common.os_version' config.json)" >> $GITHUB_OUTPUT
+          echo "container-type=$(jq -r '.common.job_type' config.json)" >> $GITHUB_OUTPUT
+          echo "device-type=$(jq -r '.common.device_type // "gpu"' config.json)" >> $GITHUB_OUTPUT
+          echo "arch-type=$(jq -r '.common.arch_type // "x86"' config.json)" >> $GITHUB_OUTPUT
+          echo "contributor=$(jq -r '.common.contributor // "None"' config.json)" >> $GITHUB_OUTPUT
+          echo "customer-type=$(jq -r '.common.customer_type // ""' config.json)" >> $GITHUB_OUTPUT
+          echo "prod-image=$(jq -r '.common.prod_image' config.json)" >> $GITHUB_OUTPUT
+
+  check-changes:
+    needs: [gatekeeper]
+    if: success()
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    outputs:
+      build-change: ${{ steps.changes.outputs.build-change }}
+      telemetry-test-change: ${{ steps.changes.outputs.telemetry-test-change }}
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: Setup python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.1
+        with:
+          extra_args: --all-files
+
+      - name: Detect file changes
+        id: changes
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            build-change:
+              - "docker/vllm/Dockerfile.amzn2023"
+              - "scripts/vllm/omni_*"
+              - "scripts/common/**"
+              - "scripts/telemetry/**"
+              - ".github/config/vllm-omni-ec2-amzn2023.yml"
+              - ".github/config/vllm-omni-model-tests.yml"
+              - "test/vllm-omni/**"
+            telemetry-test-change:
+              - "test/telemetry/**"
+
+  build-image:
+    needs: [check-changes, load-config]
+    if: needs.check-changes.outputs.build-change == 'true'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-vllm-build-runner
+        buildspec-override:true
+    timeout-minutes: 720
+    concurrency:
+      group: ${{ github.workflow }}-build-image-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    outputs:
+      ci-image: ${{ steps.build.outputs.image-uri }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Build image
+        id: build
+        uses: ./.github/actions/build-image
+        with:
+          framework: ${{ needs.load-config.outputs.framework }}
+          target: vllm-omni-ec2-amzn2023
+          base-image: nvidia/cuda:12.9.1-devel-amzn2023
+          framework-version: ${{ needs.load-config.outputs.framework-version }}
+          container-type: ${{ needs.load-config.outputs.container-type }}
+          aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+          aws-region: ${{ vars.AWS_REGION }}
+          tag-pr: vllm-omni-${{ needs.load-config.outputs.framework-version }}-gpu-${{ needs.load-config.outputs.python-version }}-${{ needs.load-config.outputs.cuda-version }}-${{ needs.load-config.outputs.os-version }}-ec2-pr-${{ github.event.pull_request.number }}
+          dockerfile-path: docker/vllm/Dockerfile.amzn2023
+          arch-type: ${{ needs.load-config.outputs.arch-type }}
+          device-type: ${{ needs.load-config.outputs.device-type }}
+          cuda-version: ${{ needs.load-config.outputs.cuda-version }}
+          python-version: ${{ needs.load-config.outputs.python-version }}
+          os-version: ${{ needs.load-config.outputs.os-version }}
+          contributor: ${{ needs.load-config.outputs.contributor }}
+          customer-type: ${{ needs.load-config.outputs.customer-type }}
+
+  sanity-test:
+    needs: [check-changes, build-image, load-config]
+    if: |
+      always() && !failure() && !cancelled() &&
+      needs.check-changes.outputs.build-change == 'true'
+    concurrency:
+      group: ${{ github.workflow }}-sanity-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-sanity-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+      python-version: ${{ needs.load-config.outputs.python-version }}
+      cuda-version: ${{ needs.load-config.outputs.cuda-version }}
+      os-version: ${{ needs.load-config.outputs.os-version }}
+      customer-type: ${{ needs.load-config.outputs.customer-type }}
+      arch-type: ${{ needs.load-config.outputs.arch-type }}
+      device-type: ${{ needs.load-config.outputs.device-type }}
+      contributor: ${{ needs.load-config.outputs.contributor }}
+      container-type: ${{ needs.load-config.outputs.container-type }}
+
+  security-test:
+    needs: [build-image, load-config]
+    if: success()
+    concurrency:
+      group: ${{ github.workflow }}-security-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-security-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+
+  telemetry-test:
+    needs: [check-changes, build-image, load-config]
+    if: |
+      always() && !failure() && !cancelled() &&
+      (needs.check-changes.outputs.build-change == 'true' || needs.check-changes.outputs.telemetry-test-change == 'true')
+    concurrency:
+      group: ${{ github.workflow }}-telemetry-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: false
+    uses: ./.github/workflows/reusable-telemetry-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+      container-type: ${{ needs.load-config.outputs.container-type }}
+
+  omni-model-smoke-tests:
+    needs: [build-image, load-config]
+    if: success()
+    concurrency:
+      group: ${{ github.workflow }}-omni-model-tests-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-vllm-omni-model-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+    secrets: inherit
diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
new file mode 100644
index 000000000000..2e4b6f23f809
--- /dev/null
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -0,0 +1,256 @@
+name: PR - vLLM-Omni SageMaker AMZN2023
+
+on:
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/vllm/Dockerfile.amzn2023"
+      - "scripts/vllm/omni_*"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/vllm-omni-sagemaker-amzn2023.yml"
+      - ".github/config/vllm-omni-model-tests.yml"
+      - ".github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml"
+      - ".github/workflows/reusable-vllm-omni-model-tests.yml"
+      - "test/vllm-omni/**"
+      - "test/telemetry/**"
+
+permissions:
+  contents: read
+  pull-requests: read
+
+env:
+  FORCE_COLOR: "1"
+  CONFIG_FILE: ".github/config/vllm-omni-sagemaker-amzn2023.yml"
+
+jobs:
+  gatekeeper:
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.workflow }}-gate-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    steps:
+      - name: Checkout base branch (safe)
+        uses: actions/checkout@v5
+        with:
+          ref: ${{ github.event.pull_request.base.sha }}
+          fetch-depth: 1
+
+      - name: Run permission gate (from base)
+        uses: ./.github/actions/pr-permission-gate
+
+  load-config:
+    needs: [gatekeeper]
+    if: success()
+    runs-on: ubuntu-latest
+    outputs:
+      framework: ${{ steps.parse.outputs.framework }}
+      framework-version: ${{ steps.parse.outputs.framework-version }}
+      python-version: ${{ steps.parse.outputs.python-version }}
+      cuda-version: ${{ steps.parse.outputs.cuda-version }}
+      os-version: ${{ steps.parse.outputs.os-version }}
+      container-type: ${{ steps.parse.outputs.container-type }}
+      device-type: ${{ steps.parse.outputs.device-type }}
+      arch-type: ${{ steps.parse.outputs.arch-type }}
+      contributor: ${{ steps.parse.outputs.contributor }}
+      customer-type: ${{ steps.parse.outputs.customer-type }}
+      prod-image: ${{ steps.parse.outputs.prod-image }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Load configuration
+        id: load
+        uses: ./.github/actions/load-config
+        with:
+          config-file: ${{ env.CONFIG_FILE }}
+
+      - name: Parse configuration
+        id: parse
+        run: |
+          echo '${{ steps.load.outputs.config }}' > config.json
+          echo "framework=$(jq -r '.common.framework' config.json)" >> $GITHUB_OUTPUT
+          echo "framework-version=$(jq -r '.common.framework_version' config.json)" >> $GITHUB_OUTPUT
+          echo "python-version=$(jq -r '.common.python_version' config.json)" >> $GITHUB_OUTPUT
+          echo "cuda-version=$(jq -r '.common.cuda_version' config.json)" >> $GITHUB_OUTPUT
+          echo "os-version=$(jq -r '.common.os_version' config.json)" >> $GITHUB_OUTPUT
+          echo "container-type=$(jq -r '.common.job_type' config.json)" >> $GITHUB_OUTPUT
+          echo "device-type=$(jq -r '.common.device_type // "gpu"' config.json)" >> $GITHUB_OUTPUT
+          echo "arch-type=$(jq -r '.common.arch_type // "x86"' config.json)" >> $GITHUB_OUTPUT
+          echo "contributor=$(jq -r '.common.contributor // "None"' config.json)" >> $GITHUB_OUTPUT
+          echo "customer-type=$(jq -r '.common.customer_type // ""' config.json)" >> $GITHUB_OUTPUT
+          echo "prod-image=$(jq -r '.common.prod_image' config.json)" >> $GITHUB_OUTPUT
+
+  check-changes:
+    needs: [gatekeeper]
+    if: success()
+    runs-on: ubuntu-latest
+    concurrency:
+      group: ${{ github.workflow }}-check-changes-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    outputs:
+      build-change: ${{ steps.changes.outputs.build-change }}
+      telemetry-test-change: ${{ steps.changes.outputs.telemetry-test-change }}
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: Setup python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Run pre-commit
+        uses: pre-commit/action@v3.0.1
+        with:
+          extra_args: --all-files
+
+      - name: Detect file changes
+        id: changes
+        uses: dorny/paths-filter@v3
+        with:
+          filters: |
+            build-change:
+              - "docker/vllm/Dockerfile.amzn2023"
+              - "scripts/vllm/omni_*"
+              - "scripts/common/**"
+              - "scripts/telemetry/**"
+              - ".github/config/vllm-omni-sagemaker-amzn2023.yml"
+              - ".github/config/vllm-omni-model-tests.yml"
+              - "test/vllm-omni/**"
+            telemetry-test-change:
+              - "test/telemetry/**"
+
+  build-image:
+    needs: [check-changes, load-config]
+    if: needs.check-changes.outputs.build-change == 'true'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-vllm-build-runner
+        buildspec-override:true
+    timeout-minutes: 720
+    concurrency:
+      group: ${{ github.workflow }}-build-image-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    outputs:
+      ci-image: ${{ steps.build.outputs.image-uri }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Build image
+        id: build
+        uses: ./.github/actions/build-image
+        with:
+          framework: ${{ needs.load-config.outputs.framework }}
+          target: vllm-omni-sagemaker-amzn2023
+          base-image: nvidia/cuda:12.9.1-devel-amzn2023
+          framework-version: ${{ needs.load-config.outputs.framework-version }}
+          container-type: ${{ needs.load-config.outputs.container-type }}
+          aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+          aws-region: ${{ vars.AWS_REGION }}
+          tag-pr: vllm-omni-${{ needs.load-config.outputs.framework-version }}-gpu-${{ needs.load-config.outputs.python-version }}-${{ needs.load-config.outputs.cuda-version }}-${{ needs.load-config.outputs.os-version }}-sagemaker-pr-${{ github.event.pull_request.number }}
+          dockerfile-path: docker/vllm/Dockerfile.amzn2023
+          arch-type: ${{ needs.load-config.outputs.arch-type }}
+          device-type: ${{ needs.load-config.outputs.device-type }}
+          cuda-version: ${{ needs.load-config.outputs.cuda-version }}
+          python-version: ${{ needs.load-config.outputs.python-version }}
+          os-version: ${{ needs.load-config.outputs.os-version }}
+          contributor: ${{ needs.load-config.outputs.contributor }}
+          customer-type: ${{ needs.load-config.outputs.customer-type }}
+
+  sanity-test:
+    needs: [check-changes, build-image, load-config]
+    if: |
+      always() && !failure() && !cancelled() &&
+      needs.check-changes.outputs.build-change == 'true'
+    concurrency:
+      group: ${{ github.workflow }}-sanity-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-sanity-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+      python-version: ${{ needs.load-config.outputs.python-version }}
+      cuda-version: ${{ needs.load-config.outputs.cuda-version }}
+      os-version: ${{ needs.load-config.outputs.os-version }}
+      customer-type: ${{ needs.load-config.outputs.customer-type }}
+      arch-type: ${{ needs.load-config.outputs.arch-type }}
+      device-type: ${{ needs.load-config.outputs.device-type }}
+      contributor: ${{ needs.load-config.outputs.contributor }}
+      container-type: ${{ needs.load-config.outputs.container-type }}
+
+  security-test:
+    needs: [build-image, load-config]
+    if: success()
+    concurrency:
+      group: ${{ github.workflow }}-security-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-security-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+
+  telemetry-test:
+    needs: [check-changes, build-image, load-config]
+    if: |
+      always() && !failure() && !cancelled() &&
+      (needs.check-changes.outputs.build-change == 'true' || needs.check-changes.outputs.telemetry-test-change == 'true')
+    concurrency:
+      group: ${{ github.workflow }}-telemetry-test-${{ github.event.pull_request.number }}
+      cancel-in-progress: false
+    uses: ./.github/workflows/reusable-telemetry-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+      framework: ${{ needs.load-config.outputs.framework }}
+      framework-version: ${{ needs.load-config.outputs.framework-version }}
+      container-type: ${{ needs.load-config.outputs.container-type }}
+
+  omni-model-smoke-tests:
+    needs: [build-image, load-config]
+    if: success()
+    concurrency:
+      group: ${{ github.workflow }}-omni-model-tests-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    uses: ./.github/workflows/reusable-vllm-omni-model-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
+    secrets: inherit
+
+  sagemaker-endpoint-test:
+    needs: [build-image, load-config]
+    if: success()
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:default-runner
+        buildspec-override:true
+    concurrency:
+      group: ${{ github.workflow }}-sm-endpoint-${{ github.event.pull_request.number }}
+      cancel-in-progress: true
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Setup Python
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install -r test/requirements.txt
+
+      - name: Run SageMaker endpoint test
+        run: |
+          source .venv/bin/activate
+          PYTHONPATH=$(pwd)/test:$PYTHONPATH pytest test/vllm-omni/sagemaker/test_sm_omni_endpoint.py -v \
+            --image-uri ${{ needs.build-image.outputs.ci-image }} \
+            --aws-region ${{ vars.AWS_REGION }}
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
new file mode 100644
index 000000000000..3ab8bdd18236
--- /dev/null
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -0,0 +1,103 @@
+name: Reusable vLLM-Omni Model Smoke Tests
+
+permissions:
+  contents: read
+
+on:
+  workflow_call:
+    inputs:
+      image-uri:
+        description: "Image URI to test"
+        required: true
+        type: string
+      aws-account-id:
+        description: "AWS account ID for ECR authentication"
+        required: true
+        type: string
+      aws-region:
+        description: "AWS region for ECR authentication"
+        required: true
+        type: string
+
+jobs:
+  load-models:
+    runs-on: ubuntu-latest
+    outputs:
+      matrix: ${{ steps.parse.outputs.matrix }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Parse model config
+        id: parse
+        run: |
+          python3 -c "
+          import yaml, json
+          with open('.github/config/vllm-omni-model-tests.yml') as f:
+              cfg = yaml.safe_load(f)
+          prefix = cfg.get('s3_prefix', '')
+          models = cfg.get('smoke-test', {}).get('codebuild-fleet', [])
+          for m in models:
+              m['s3_path'] = f\"{prefix}/{m['s3_model']}\"
+          print(f'matrix={json.dumps(models)}')
+          " >> "$GITHUB_OUTPUT"
+
+  smoke-test:
+    needs: load-models
+    if: needs.load-models.outputs.matrix != '[]'
+    strategy:
+      fail-fast: false
+      matrix:
+        model: ${{ fromJson(needs.load-models.outputs.matrix) }}
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:${{ matrix.model.fleet }}
+        buildspec-override:true
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ inputs.aws-account-id }}
+          aws-region: ${{ inputs.aws-region }}
+          image-uri: ${{ inputs.image-uri }}
+
+      - name: Download model from S3
+        run: |
+          mkdir -p /models/${{ matrix.model.name }}
+          aws s3 cp ${{ matrix.model.s3_path }} /tmp/${{ matrix.model.s3_model }}
+          tar xf /tmp/${{ matrix.model.s3_model }} -C /models/${{ matrix.model.name }}
+          rm /tmp/${{ matrix.model.s3_model }}
+          echo "Model extracted to /models/${{ matrix.model.name }}"
+          ls /models/${{ matrix.model.name }}/ | head -10
+
+      - name: Pull image
+        run: docker pull ${{ inputs.image-uri }}
+
+      - name: Run EC2 smoke test
+        run: |
+          IMAGE="${{ inputs.image-uri }}"
+          CONTAINER_ID=$(docker run -d --rm --gpus all \
+            --shm-size=4g \
+            -v /models/${{ matrix.model.name }}:/models/${{ matrix.model.name }} \
+            -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
+            --entrypoint /bin/bash \
+            ${IMAGE} -c 'sleep infinity')
+          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_ec2_smoke_test.sh \
+            /models/${{ matrix.model.name }} ${{ matrix.model.type }}
+          docker kill ${CONTAINER_ID} 2>/dev/null || true
+
+      - name: Run SageMaker smoke test
+        run: |
+          IMAGE="${{ inputs.image-uri }}"
+          CONTAINER_ID=$(docker run -d --rm --gpus all \
+            --shm-size=4g \
+            -v /models/${{ matrix.model.name }}:/models/${{ matrix.model.name }} \
+            -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
+            --entrypoint /bin/bash \
+            ${IMAGE} -c 'sleep infinity')
+          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_sagemaker_smoke_test.sh \
+            /models/${{ matrix.model.name }} ${{ matrix.model.type }}
+          docker kill ${CONTAINER_ID} 2>/dev/null || true
diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 2c580138665a..67751836984f 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -339,4 +339,131 @@ RUN dnf upgrade -y --security --releasever latest --setopt=install_weak_deps=Fal
 COPY ./scripts/vllm/sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
 RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh
 
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
+
+# =============================================================================
+# STAGE: omni-deps — install vllm-omni on top of runtime venv
+# =============================================================================
+FROM runtime AS omni-deps
+
+ARG VLLM_OMNI_VERSION=0.18.0
+
+# System deps for omni-modality (TTS, audio, image)
+RUN dnf install -y --setopt=install_weak_deps=False \
+  espeak-ng ffmpeg sox libsox-fmt-all \
+  && dnf clean all && rm -rf /var/cache/dnf
+
+# Install vllm-omni (pure Python, no compilation)
+RUN --mount=type=cache,target=/root/.cache/uv uv pip install vllm-omni==${VLLM_OMNI_VERSION}
+
+# =============================================================================
+# STAGE: builder-oss-omni — OSS compliance for omni venv
+# =============================================================================
+FROM nvidia/cuda:${CUDA_VERSION}-runtime-amzn2023 AS builder-oss-omni
+ARG PYTHON_VERSION
+RUN dnf install -y --allowerasing python${PYTHON_VERSION} curl && dnf clean all
+COPY --from=omni-deps /opt/venv /opt/venv
+COPY scripts/common/setup_oss_compliance.sh /tmp/setup_oss_compliance.sh
+RUN PATH="/opt/venv/bin:${PATH}" bash /tmp/setup_oss_compliance.sh python${PYTHON_VERSION} \
+  && touch /root/THIRD_PARTY_SOURCE_CODE_URLS
+
+# =============================================================================
+# STAGE: omni-base — DLC overlay for vLLM-Omni
+# =============================================================================
+FROM omni-deps AS omni-base
+
+ARG PYTHON="python3"
+ARG PYTHON_VERSION=3.12
+ARG CUDA_VERSION
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ENV LANG=C.UTF-8 \
+  LC_ALL=C.UTF-8 \
+  DLC_CONTAINER_TYPE=general \
+  PYTHONDONTWRITEBYTECODE=1 \
+  PYTHONUNBUFFERED=1 \
+  PYTHONIOENCODING=UTF-8 \
+  LD_LIBRARY_PATH="/opt/amazon/ofi-nccl/lib64:/opt/amazon/openmpi/lib64:/opt/amazon/efa/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_PATH}" \
+  PATH="/opt/amazon/openmpi/bin:/opt/amazon/efa/bin:/usr/local/cuda/bin:${PATH}"
+
+WORKDIR /
+
+# Install DLC Python dependencies
+RUN uv pip install --no-cache-dir botocore
+
+# Patch CVEs
+RUN uv pip install --no-cache-dir \
+  "pillow>=12.1.1" \
+  "xgrammar>=0.1.32" \
+  "PyJWT>=2.12.0" \
+  "cbor2>=5.9.0"
+
+COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
+COPY ./scripts/telemetry/bash_telemetry.sh.template /tmp/bash_telemetry.sh.template
+
+ARG FRAMEWORK
+ARG FRAMEWORK_VERSION
+ARG CONTAINER_TYPE
+
+# telemetry
+RUN chmod +x /usr/local/bin/deep_learning_container.py \
+  && sed -e "s/{{FRAMEWORK}}/${FRAMEWORK}/g" \
+    -e "s/{{FRAMEWORK_VERSION}}/${FRAMEWORK_VERSION}/g" \
+    -e "s/{{CONTAINER_TYPE}}/${CONTAINER_TYPE}/g" \
+    /tmp/bash_telemetry.sh.template >/usr/local/bin/bash_telemetry.sh \
+  && chmod +x /usr/local/bin/bash_telemetry.sh \
+  && rm /tmp/bash_telemetry.sh.template \
+  && echo 'source /usr/local/bin/bash_telemetry.sh' >>/etc/bashrc \
+  && echo 'source /usr/local/bin/bash_telemetry.sh' >>/root/.bashrc \
+  && ln -sf /opt/venv/bin/python3 /usr/bin/python \
+  && rm -rf /tmp/tmp* \
+  && rm -rf /tmp/uv* \
+  && rm -rf /var/cache/dnf \
+  && rm -rf /root/.cache || true
+
+# OSS compliance (from omni-specific builder)
+COPY --from=builder-oss-omni /root/THIRD_PARTY_SOURCE_CODE_URLS /root/THIRD_PARTY_SOURCE_CODE_URLS
+COPY --from=builder-oss-omni /root/PYTHON_PACKAGES_LICENSES /root/PYTHON_PACKAGES_LICENSES
+COPY --from=builder-oss-omni /root/LINUX_PACKAGES_LICENSES /root/LINUX_PACKAGES_LICENSES
+COPY --from=builder-oss-omni /root/BUILD_FROM_SOURCE_PACKAGES_LICENCES /root/BUILD_FROM_SOURCE_PACKAGES_LICENCES
+COPY --from=builder-oss-omni /usr/local/bin/testOSSCompliance /usr/local/bin/testOSSCompliance
+
+# install EFA
+COPY ./scripts/common/install_efa_amzn2023.sh install_efa_amzn2023.sh
+ARG EFA_VERSION="1.47.0"
+RUN echo -e '[cuda-rhel9]\nname=cuda-rhel9\nbaseurl=https://developer.download.nvidia.com/compute/cuda/repos/rhel9/x86_64\nenabled=1\ngpgcheck=0' >/etc/yum.repos.d/cuda-rhel9.repo \
+  && dnf install -y --setopt=install_weak_deps=False libnccl libnccl-devel \
+  && ldconfig \
+  && bash install_efa_amzn2023.sh ${EFA_VERSION} \
+  && rm install_efa_amzn2023.sh \
+  && dnf remove -y libnccl-devel \
+  && dnf clean all && rm -rf /var/cache/dnf \
+  && rm -rf /usr/local/cuda/bin/nvdisasm*
+
+# ====================== omni ec2 =========================================
+FROM omni-base AS vllm-omni-ec2-amzn2023
+
+ARG CACHE_REFRESH=0
+RUN dnf upgrade -y --security --releasever latest --setopt=install_weak_deps=False \
+  && dnf clean all && rm -rf /var/cache/dnf /tmp/* \
+  && ln -sf /opt/venv/bin/python3 /usr/bin/python3
+
+COPY ./scripts/vllm/omni_dockerd_entrypoint.sh /usr/local/bin/dockerd_entrypoint.sh
+RUN chmod +x /usr/local/bin/dockerd_entrypoint.sh
+
+ENTRYPOINT ["/usr/local/bin/dockerd_entrypoint.sh"]
+
+# ====================== omni sagemaker =========================================
+FROM omni-base AS vllm-omni-sagemaker-amzn2023
+
+ARG CACHE_REFRESH=0
+RUN dnf upgrade -y --security --releasever latest --setopt=install_weak_deps=False \
+  && dnf clean all && rm -rf /var/cache/dnf /tmp/* \
+  && ln -sf /opt/venv/bin/python3 /usr/bin/python3
+
+COPY ./scripts/vllm/omni_sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
+RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh
+
 ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
\ No newline at end of file
diff --git a/scripts/vllm/omni_dockerd_entrypoint.sh b/scripts/vllm/omni_dockerd_entrypoint.sh
new file mode 100755
index 000000000000..82166d04814c
--- /dev/null
+++ b/scripts/vllm/omni_dockerd_entrypoint.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+# Check if telemetry file exists before executing
+# Execute telemetry script if it exists, suppress errors
+bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
+
+exec vllm serve --omni "$@"
diff --git a/scripts/vllm/omni_sagemaker_entrypoint.sh b/scripts/vllm/omni_sagemaker_entrypoint.sh
new file mode 100755
index 000000000000..0d8e8b3cd691
--- /dev/null
+++ b/scripts/vllm/omni_sagemaker_entrypoint.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# Check if telemetry file exists before executing
+# Execute telemetry script if it exists, suppress errors
+bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
+
+PREFIX="SM_VLLM_"
+ARG_PREFIX="--"
+
+ARGS=(--port 8080)
+
+# Auto-detect model if SM_VLLM_MODEL is not set
+if [ -z "${SM_VLLM_MODEL}" ]; then
+    if [ -d "/opt/ml/model" ] && [ "$(ls -A /opt/ml/model 2>/dev/null)" ]; then
+        echo "INFO: SM_VLLM_MODEL not set, auto-detected model at /opt/ml/model"
+        ARGS+=(--model /opt/ml/model)
+    elif [ -n "${HF_MODEL_ID}" ]; then
+        echo "INFO: SM_VLLM_MODEL not set, using HF_MODEL_ID=${HF_MODEL_ID}"
+        ARGS+=(--model "${HF_MODEL_ID}")
+    else
+        echo "WARNING: No model specified. Set SM_VLLM_MODEL, HF_MODEL_ID, or mount a model to /opt/ml/model."
+    fi
+fi
+
+while IFS='=' read -r key value; do
+    arg_name=$(echo "${key#"${PREFIX}"}" | tr '[:upper:]' '[:lower:]' | tr '_' '-')
+
+    # Handle boolean flags: true -> flag only, false -> skip entirely
+    lower_value=$(echo "$value" | tr '[:upper:]' '[:lower:]')
+    if [ "$lower_value" = "true" ]; then
+        ARGS+=("${ARG_PREFIX}${arg_name}")
+    elif [ "$lower_value" = "false" ]; then
+        continue
+    else
+        ARGS+=("${ARG_PREFIX}${arg_name}")
+        if [ -n "$value" ]; then
+            ARGS+=("$value")
+        fi
+    fi
+done < <(env | grep "^${PREFIX}")
+
+exec vllm serve --omni "${ARGS[@]}"
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
new file mode 100644
index 000000000000..a8825af8b0dc
--- /dev/null
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -0,0 +1,125 @@
+"""Integration test for vLLM-Omni SageMaker endpoint"""
+
+import json
+import logging
+from pprint import pformat
+
+import pytest
+from sagemaker.model import Model
+from sagemaker.predictor import Predictor
+from sagemaker.serializers import JSONSerializer
+from test_utils import clean_string, random_suffix_name, wait_for_status
+from test_utils.constants import INFERENCE_AMI_VERSION, SAGEMAKER_ROLE
+from test_utils.huggingface_helper import get_hf_token
+
+LOGGER = logging.getLogger(__name__)
+LOGGER.setLevel(logging.INFO)
+
+ENDPOINT_WAIT_PERIOD = 60
+ENDPOINT_WAIT_LENGTH = 30
+ENDPOINT_INSERVICE = "InService"
+
+
+def get_endpoint_status(sagemaker_client, endpoint_name):
+    response = sagemaker_client.describe_endpoint(EndpointName=endpoint_name)
+    return response["EndpointStatus"]
+
+
+@pytest.fixture(scope="function")
+def model_id(request):
+    return request.param
+
+
+@pytest.fixture(scope="function")
+def instance_type(request):
+    return request.param
+
+
+@pytest.fixture(scope="function")
+def model_package(aws_session, image_uri, model_id):
+    sagemaker_client = aws_session.sagemaker
+    cleaned_id = clean_string(model_id.split("/")[1], "_./")
+    model_name = random_suffix_name(f"vllm-omni-{cleaned_id}", 50)
+
+    try:
+        LOGGER.info(f"Creating SageMaker model: {model_name}")
+        hf_token = get_hf_token(aws_session)
+        model = Model(
+            name=model_name,
+            image_uri=image_uri,
+            role=SAGEMAKER_ROLE,
+            predictor_cls=Predictor,
+            env={
+                "SM_VLLM_MODEL": model_id,
+                "SM_VLLM_ENFORCE_EAGER": "true",
+                "HF_TOKEN": hf_token,
+            },
+        )
+        yield model
+    finally:
+        LOGGER.info(f"Deleting model: {model_name}")
+        sagemaker_client.delete_model(ModelName=model_name)
+
+
+@pytest.fixture(scope="function")
+def model_endpoint(aws_session, model_package, instance_type):
+    sagemaker_client = aws_session.sagemaker
+    model = model_package
+    cleaned_instance = clean_string(instance_type, "_./")
+    endpoint_name = random_suffix_name(f"vllm-omni-{cleaned_instance}", 50)
+
+    try:
+        LOGGER.info("Starting endpoint deployment...")
+        predictor = model.deploy(
+            instance_type=instance_type,
+            initial_instance_count=1,
+            endpoint_name=endpoint_name,
+            inference_ami_version=INFERENCE_AMI_VERSION,
+            serializer=JSONSerializer(),
+            wait=True,
+        )
+
+        LOGGER.info(f"Waiting for endpoint {ENDPOINT_INSERVICE} status...")
+        assert wait_for_status(
+            ENDPOINT_INSERVICE,
+            ENDPOINT_WAIT_PERIOD,
+            ENDPOINT_WAIT_LENGTH,
+            get_endpoint_status,
+            sagemaker_client,
+            endpoint_name,
+        )
+        yield predictor
+    finally:
+        LOGGER.info(f"Deleting endpoint: {endpoint_name}")
+        sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
+        sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
+
+
+@pytest.mark.parametrize("instance_type", ["ml.g6.xlarge"], indirect=True)
+@pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
+def test_vllm_omni_tts_endpoint(model_endpoint):
+    predictor = model_endpoint
+
+    payload = {
+        "messages": [{"role": "user", "content": "Hello, this is a test."}],
+        "extra_body": {
+            "task_type": "CustomVoice",
+            "language": "English",
+            "speaker": "Ryan",
+        },
+    }
+    LOGGER.info(f"Sending TTS inference request: {pformat(payload)}")
+
+    response = predictor.predict(payload)
+    if isinstance(response, bytes):
+        response = response.decode("utf-8")
+    if isinstance(response, str):
+        try:
+            response = json.loads(response)
+        except json.JSONDecodeError:
+            pass
+
+    assert response, "Model response is empty"
+    LOGGER.info(f"TTS response received: {pformat(response)}")
+    assert "choices" in response, f"No choices in response: {response}"
+    LOGGER.info("TTS endpoint test PASSED")
diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
new file mode 100755
index 000000000000..45dfb2913c83
--- /dev/null
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# Smoke test for vLLM-Omni EC2 images
+# Validates that omni models can load and produce output
+set -eux
+
+nvidia-smi
+
+MODEL_PATH="${1:?Usage: $0 <model-path> <model-type>}"
+MODEL_TYPE="${2:?Usage: $0 <model-path> <model-type>}"
+
+echo "=== Testing vLLM-Omni: ${MODEL_TYPE} model at ${MODEL_PATH} ==="
+
+if [ "${MODEL_TYPE}" = "tts" ]; then
+    # Qwen3-TTS offline inference test
+    python3 -c "
+import os
+os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
+from vllm_omni.entrypoints.omni import Omni
+
+omni = Omni(model='${MODEL_PATH}')
+additional_information = {
+    'task_type': ['CustomVoice'],
+    'text': ['Hello, this is a test of the text to speech system.'],
+    'language': ['English'],
+    'speaker': ['Ryan'],
+    'instruct': [''],
+    'max_new_tokens': [2048],
+}
+inputs = {
+    'prompt_token_ids': [0] * 512,
+    'additional_information': additional_information,
+}
+outputs = omni.generate([inputs])
+for out in outputs:
+    mm = out.request_output.outputs[0].multimodal_output
+    assert 'audio' in mm, 'No audio in output'
+    assert mm['sr'], 'No sample rate in output'
+    print(f'Audio generated: sr={mm[\"sr\"]}, chunks={len(mm[\"audio\"])}')
+print('TTS smoke test PASSED')
+"
+
+elif [ "${MODEL_TYPE}" = "diffusion" ]; then
+    # FLUX.2-klein image generation test
+    python3 -c "
+import os
+os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
+from vllm_omni.entrypoints.omni import Omni
+
+omni = Omni(model='${MODEL_PATH}')
+prompt = 'a red apple on a white table'
+outputs = omni.generate(prompt)
+images = outputs[0].request_output.images
+assert len(images) > 0, 'No images generated'
+images[0].save('/tmp/omni_test_output.png')
+assert os.path.exists('/tmp/omni_test_output.png'), 'Output image not saved'
+size = os.path.getsize('/tmp/omni_test_output.png')
+assert size > 1000, f'Output image too small: {size} bytes'
+print(f'Image generated: {images[0].size}, file size: {size} bytes')
+print('Diffusion smoke test PASSED')
+"
+
+else
+    echo "ERROR: Unknown model type: ${MODEL_TYPE}"
+    exit 1
+fi
+
+echo "=== vLLM-Omni ${MODEL_TYPE} test PASSED ==="
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
new file mode 100755
index 000000000000..0d395fd9285e
--- /dev/null
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# Smoke test for vLLM-Omni SageMaker images
+# Validates the server starts with --omni and responds to requests
+set -eux
+
+nvidia-smi
+
+MODEL_PATH="${1:?Usage: $0 <model-path> <model-type>}"
+MODEL_TYPE="${2:?Usage: $0 <model-path> <model-type>}"
+PORT=8091
+
+echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} at ${MODEL_PATH} ==="
+
+# Start server in background
+vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --enforce-eager &
+SERVER_PID=$!
+
+cleanup() {
+    echo "Stopping server (PID ${SERVER_PID})..."
+    kill ${SERVER_PID} 2>/dev/null || true
+    wait ${SERVER_PID} 2>/dev/null || true
+}
+trap cleanup EXIT
+
+# Wait for server to be ready
+echo "Waiting for server to start..."
+for i in $(seq 1 120); do
+    if curl -s http://localhost:${PORT}/health >/dev/null 2>&1; then
+        echo "Server ready after ${i}s"
+        break
+    fi
+    if ! kill -0 ${SERVER_PID} 2>/dev/null; then
+        echo "ERROR: Server process died"
+        exit 1
+    fi
+    sleep 1
+done
+
+# Verify health endpoint
+curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1; }
+
+# Verify models endpoint
+curl -sf http://localhost:${PORT}/v1/models | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+assert len(data['data']) > 0, 'No models listed'
+print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
+"
+
+if [ "${MODEL_TYPE}" = "tts" ]; then
+    # TTS via chat completions API
+    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "messages": [{"role": "user", "content": "Hello world"}],
+        "extra_body": {
+          "task_type": "CustomVoice",
+          "language": "English",
+          "speaker": "Ryan"
+        }
+      }')
+    echo "${RESPONSE}" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+assert 'choices' in data, 'No choices in response'
+print('TTS serving test PASSED')
+"
+
+elif [ "${MODEL_TYPE}" = "diffusion" ]; then
+    # Image generation via chat completions API
+    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "messages": [{"role": "user", "content": "a red apple on a white table"}],
+        "extra_body": {
+          "height": 512,
+          "width": 512,
+          "num_inference_steps": 4,
+          "guidance_scale": 3.5,
+          "seed": 42
+        }
+      }')
+    echo "${RESPONSE}" | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+assert 'choices' in data, 'No choices in response'
+content = data['choices'][0]['message']['content']
+print(f'Image generation response received, content type: {type(content)}')
+print('Diffusion serving test PASSED')
+"
+fi
+
+echo "=== vLLM-Omni SageMaker ${MODEL_TYPE} test PASSED ==="

From 9ab46fc701bc8ae20c57639837f05a854618f751 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 08:24:21 -0700
Subject: [PATCH 02/58] fix: use AL2023-compatible packages for omni system
 deps

- espeak (not espeak-ng) available in AL2023 repos
- sox available in AL2023 repos
- ffmpeg installed from static build (not in AL2023 repos)
- Removed libsox-fmt-all (not available on AL2023)

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 docker/vllm/Dockerfile.amzn2023 | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 67751836984f..b40fcc8dd0bc 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -349,8 +349,11 @@ FROM runtime AS omni-deps
 ARG VLLM_OMNI_VERSION=0.18.0
 
 # System deps for omni-modality (TTS, audio, image)
+# AL2023 has espeak/sox but not espeak-ng/ffmpeg — install ffmpeg from static build
 RUN dnf install -y --setopt=install_weak_deps=False \
-  espeak-ng ffmpeg sox libsox-fmt-all \
+  espeak sox \
+  && curl -sL https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
+  | tar -xJ --strip-components=1 -C /usr/local/bin/ --wildcards '*/ffmpeg' '*/ffprobe' \
   && dnf clean all && rm -rf /var/cache/dnf
 
 # Install vllm-omni (pure Python, no compilation)

From b8de9c13afb3cce8b597005d0f22a8cd1ec9fe39 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 08:33:12 -0700
Subject: [PATCH 03/58] fix: only install ffmpeg static binary for omni deps

- espeak/sox not available in AL2023 minimal CUDA runtime image
- sox binary only needed for Qwen3-TTS 25Hz tokenizer (not 12Hz)
- ffmpeg needed by pydub/imageio-ffmpeg for audio/video I/O
- Removed dnf install for unavailable packages

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 docker/vllm/Dockerfile.amzn2023 | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index b40fcc8dd0bc..8f4f23bf3122 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -348,13 +348,11 @@ FROM runtime AS omni-deps
 
 ARG VLLM_OMNI_VERSION=0.18.0
 
-# System deps for omni-modality (TTS, audio, image)
-# AL2023 has espeak/sox but not espeak-ng/ffmpeg — install ffmpeg from static build
-RUN dnf install -y --setopt=install_weak_deps=False \
-  espeak sox \
-  && curl -sL https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
-  | tar -xJ --strip-components=1 -C /usr/local/bin/ --wildcards '*/ffmpeg' '*/ffprobe' \
-  && dnf clean all && rm -rf /var/cache/dnf
+# System deps for omni-modality (audio/video processing)
+# ffmpeg: required by pydub and imageio-ffmpeg for audio/video I/O
+# AL2023 minimal CUDA image lacks these — install ffmpeg from static build
+RUN curl -sL https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
+  | tar -xJ --strip-components=1 -C /usr/local/bin/ --wildcards '*/ffmpeg' '*/ffprobe'
 
 # Install vllm-omni (pure Python, no compilation)
 RUN --mount=type=cache,target=/root/.cache/uv uv pip install vllm-omni==${VLLM_OMNI_VERSION}

From 4567aa22812cc0fdfe7fdbadd081e5cad53cc337 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 08:51:08 -0700
Subject: [PATCH 04/58] fix: use SPAL repo for espeak-ng, sox, ffmpeg on AL2023

- Upgrade system-release to latest to enable SPAL (requires 2023.9+)
- Install espeak-ng, sox, ffmpeg-free from SPAL (Supplementary Packages for Amazon Linux)
- Replaces static binary approach with official AL2023 package repo

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 docker/vllm/Dockerfile.amzn2023 | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 8f4f23bf3122..8050a56e463f 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -348,11 +348,12 @@ FROM runtime AS omni-deps
 
 ARG VLLM_OMNI_VERSION=0.18.0
 
-# System deps for omni-modality (audio/video processing)
-# ffmpeg: required by pydub and imageio-ffmpeg for audio/video I/O
-# AL2023 minimal CUDA image lacks these — install ffmpeg from static build
-RUN curl -sL https://johnvansickle.com/ffmpeg/releases/ffmpeg-release-amd64-static.tar.xz \
-  | tar -xJ --strip-components=1 -C /usr/local/bin/ --wildcards '*/ffmpeg' '*/ffprobe'
+# System deps for omni-modality (TTS, audio, image/video)
+# Enable SPAL (Supplementary Packages for Amazon Linux) for espeak-ng, sox, ffmpeg
+RUN dnf upgrade -y --releasever=latest --setopt=install_weak_deps=False system-release \
+  && dnf install -y spal-release \
+  && dnf install -y --setopt=install_weak_deps=False espeak-ng sox ffmpeg-free \
+  && dnf clean all && rm -rf /var/cache/dnf
 
 # Install vllm-omni (pure Python, no compilation)
 RUN --mount=type=cache,target=/root/.cache/uv uv pip install vllm-omni==${VLLM_OMNI_VERSION}

From 5e7b23ed1f16c24f9387aaa76898db8d72a31873 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 08:57:36 -0700
Subject: [PATCH 05/58] fix: use --region instead of --aws-region for pytest

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 2e4b6f23f809..1d13885896b2 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -253,4 +253,4 @@ jobs:
           source .venv/bin/activate
           PYTHONPATH=$(pwd)/test:$PYTHONPATH pytest test/vllm-omni/sagemaker/test_sm_omni_endpoint.py -v \
             --image-uri ${{ needs.build-image.outputs.ci-image }} \
-            --aws-region ${{ vars.AWS_REGION }}
+            --region ${{ vars.AWS_REGION }}

From ab2ac24e3272464585db0972a4d2dd5bfa331ea2 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 09:02:46 -0700
Subject: [PATCH 06/58] fix: add sagemaker SDK dep and match existing test
 pattern

- Add test/vllm-omni/sagemaker/requirements.txt with sagemaker>=2,<3
- Install test deps via uv pip matching reusable-vllm-sagemaker-tests pattern
- Run pytest from test/ directory with relative path

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml | 8 ++++----
 test/vllm-omni/sagemaker/requirements.txt             | 1 +
 2 files changed, 5 insertions(+), 4 deletions(-)
 create mode 100644 test/vllm-omni/sagemaker/requirements.txt

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 1d13885896b2..e9b39e3f7b2b 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -242,15 +242,15 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v5
 
-      - name: Setup Python
+      - name: Install test dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install -r test/requirements.txt
+          uv pip install -r test/vllm-omni/sagemaker/requirements.txt
 
       - name: Run SageMaker endpoint test
         run: |
           source .venv/bin/activate
-          PYTHONPATH=$(pwd)/test:$PYTHONPATH pytest test/vllm-omni/sagemaker/test_sm_omni_endpoint.py -v \
-            --image-uri ${{ needs.build-image.outputs.ci-image }} \
-            --region ${{ vars.AWS_REGION }}
+          cd test/
+          python3 -m pytest -vs -rA --image-uri ${{ needs.build-image.outputs.ci-image }} vllm-omni/sagemaker
diff --git a/test/vllm-omni/sagemaker/requirements.txt b/test/vllm-omni/sagemaker/requirements.txt
new file mode 100644
index 000000000000..d371ab0d94a9
--- /dev/null
+++ b/test/vllm-omni/sagemaker/requirements.txt
@@ -0,0 +1 @@
+sagemaker>=2,<3

From 0de9f9787c6261c643e069791588cc0b00684f20 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 09:20:58 -0700
Subject: [PATCH 07/58] fix: increase stage init timeout for omni model tests

- Add --stage-init-timeout 600 to server start (TTS models need multi-stage init)
- Add stage_init_timeout=600 to offline Omni() calls
- Increase server wait loop from 120s to 300s

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh       | 4 ++--
 test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
index 45dfb2913c83..c3c7f8363ed3 100755
--- a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -17,7 +17,7 @@ import os
 os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
 from vllm_omni.entrypoints.omni import Omni
 
-omni = Omni(model='${MODEL_PATH}')
+omni = Omni(model='${MODEL_PATH}', stage_init_timeout=600)
 additional_information = {
     'task_type': ['CustomVoice'],
     'text': ['Hello, this is a test of the text to speech system.'],
@@ -46,7 +46,7 @@ import os
 os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
 from vllm_omni.entrypoints.omni import Omni
 
-omni = Omni(model='${MODEL_PATH}')
+omni = Omni(model='${MODEL_PATH}', stage_init_timeout=600)
 prompt = 'a red apple on a white table'
 outputs = omni.generate(prompt)
 images = outputs[0].request_output.images
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index 0d395fd9285e..943d9d54a093 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -12,7 +12,7 @@ PORT=8091
 echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} at ${MODEL_PATH} ==="
 
 # Start server in background
-vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --enforce-eager &
+vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --enforce-eager --stage-init-timeout 600 &
 SERVER_PID=$!
 
 cleanup() {
@@ -24,7 +24,7 @@ trap cleanup EXIT
 
 # Wait for server to be ready
 echo "Waiting for server to start..."
-for i in $(seq 1 120); do
+for i in $(seq 1 300); do
     if curl -s http://localhost:${PORT}/health >/dev/null 2>&1; then
         echo "Server ready after ${i}s"
         break

From ce54d9774d702e6de8770afb3717fa2322cbe639 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 09:26:53 -0700
Subject: [PATCH 08/58] fix: use download-model action for model downloads

- Use existing download-model GitHub action with caching, locking, eviction
- Downloads to /dlc-models/ (root fs) instead of /tmp
- Proper cleanup of lock PIDs and docker images

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .../reusable-vllm-omni-model-tests.yml        | 22 +++++++++++--------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 3ab8bdd18236..2fc322f17d25 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -65,13 +65,11 @@ jobs:
           image-uri: ${{ inputs.image-uri }}
 
       - name: Download model from S3
-        run: |
-          mkdir -p /models/${{ matrix.model.name }}
-          aws s3 cp ${{ matrix.model.s3_path }} /tmp/${{ matrix.model.s3_model }}
-          tar xf /tmp/${{ matrix.model.s3_model }} -C /models/${{ matrix.model.name }}
-          rm /tmp/${{ matrix.model.s3_model }}
-          echo "Model extracted to /models/${{ matrix.model.name }}"
-          ls /models/${{ matrix.model.name }}/ | head -10
+        id: download-model
+        uses: ./.github/actions/download-model
+        with:
+          s3-path: ${{ matrix.model.s3_path }}
+          model-name: ${{ matrix.model.name }}
 
       - name: Pull image
         run: docker pull ${{ inputs.image-uri }}
@@ -81,7 +79,7 @@ jobs:
           IMAGE="${{ inputs.image-uri }}"
           CONTAINER_ID=$(docker run -d --rm --gpus all \
             --shm-size=4g \
-            -v /models/${{ matrix.model.name }}:/models/${{ matrix.model.name }} \
+            -v /dlc-models:/models \
             -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
             --entrypoint /bin/bash \
             ${IMAGE} -c 'sleep infinity')
@@ -94,10 +92,16 @@ jobs:
           IMAGE="${{ inputs.image-uri }}"
           CONTAINER_ID=$(docker run -d --rm --gpus all \
             --shm-size=4g \
-            -v /models/${{ matrix.model.name }}:/models/${{ matrix.model.name }} \
+            -v /dlc-models:/models \
             -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
             --entrypoint /bin/bash \
             ${IMAGE} -c 'sleep infinity')
           docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_sagemaker_smoke_test.sh \
             /models/${{ matrix.model.name }} ${{ matrix.model.type }}
           docker kill ${CONTAINER_ID} 2>/dev/null || true
+
+      - name: Cleanup
+        if: always()
+        run: |
+          kill ${{ steps.download-model.outputs.lock-pid }} 2>/dev/null || true
+          docker rmi ${{ inputs.image-uri }} 2>/dev/null || true

From 6075e81626d8f099b2cb842aedbc433241c9d8b5 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 09:30:57 -0700
Subject: [PATCH 09/58] fix: patch CVE-2026-28414 gradio path traversal in omni
 image

- Pin gradio>=6.7.0 in omni-base CVE patch layer

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 docker/vllm/Dockerfile.amzn2023 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 8050a56e463f..f2775ae6c164 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -400,7 +400,8 @@ RUN uv pip install --no-cache-dir \
   "pillow>=12.1.1" \
   "xgrammar>=0.1.32" \
   "PyJWT>=2.12.0" \
-  "cbor2>=5.9.0"
+  "cbor2>=5.9.0" \
+  "gradio>=6.7.0"
 
 COPY ./scripts/telemetry/deep_learning_container.py /usr/local/bin/deep_learning_container.py
 COPY ./scripts/telemetry/bash_telemetry.sh.template /tmp/bash_telemetry.sh.template

From 26db368ec1665b33388b2e2f6a2079da5e4a42e3 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 09:49:51 -0700
Subject: [PATCH 10/58] fix: use .tar.gz model tarballs for download-model
 action compatibility

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index fe4c2744d9fd..aa4ad98498c2 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,18 +1,18 @@
 # vLLM-Omni Model Test Configuration
 # Tests for omni-modality models (TTS, image generation)
 
-s3_prefix: "s3://dlc-cicd-models/llm-models"
+s3_prefix: "s3://dlc-cicd-models/omni-models"
 
 smoke-test:
   codebuild-fleet:
     - name: "qwen3-tts-1.7b-customvoice"
-      s3_model: "qwen3-tts-1.7b-customvoice.tar"
+      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
       type: tts
       fleet: "x86-g6xl-runner"
       extra_args: "--enforce-eager --gpu-memory-utilization 0.8"
 
     - name: "flux2-klein-4b"
-      s3_model: "flux2-klein-4b.tar"
+      s3_model: "flux2-klein-4b.tar.gz"
       type: diffusion
       fleet: "x86-g6xl-runner"
       extra_args: ""

From a85d6412b59f638d177cbfbc05034a6a9cea1a14 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 11:27:43 -0700
Subject: [PATCH 11/58] fix: use /v1/audio/speech API for TTS smoke test

- TTS models use OpenAI-compatible speech endpoint, not chat completions
- Validate output WAV file size instead of JSON response

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .../scripts/vllm_omni_sagemaker_smoke_test.sh | 25 ++++++++-----------
 1 file changed, 10 insertions(+), 15 deletions(-)

diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index 943d9d54a093..3a4fb82133ee 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -48,23 +48,18 @@ print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
 "
 
 if [ "${MODEL_TYPE}" = "tts" ]; then
-    # TTS via chat completions API
-    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
+    # TTS via /v1/audio/speech API (OpenAI-compatible speech endpoint)
+    curl -sf -X POST http://localhost:${PORT}/v1/audio/speech \
       -H "Content-Type: application/json" \
       -d '{
-        "messages": [{"role": "user", "content": "Hello world"}],
-        "extra_body": {
-          "task_type": "CustomVoice",
-          "language": "English",
-          "speaker": "Ryan"
-        }
-      }')
-    echo "${RESPONSE}" | python3 -c "
-import sys, json
-data = json.load(sys.stdin)
-assert 'choices' in data, 'No choices in response'
-print('TTS serving test PASSED')
-"
+        "input": "Hello, how are you?",
+        "voice": "vivian",
+        "language": "English"
+      }' --output /tmp/tts_output.wav
+    FILE_SIZE=$(stat -c%s /tmp/tts_output.wav 2>/dev/null || stat -f%z /tmp/tts_output.wav)
+    echo "TTS output file size: ${FILE_SIZE} bytes"
+    [ "${FILE_SIZE}" -gt 1000 ] || { echo "FAIL: TTS output too small"; exit 1; }
+    echo "TTS serving test PASSED"
 
 elif [ "${MODEL_TYPE}" = "diffusion" ]; then
     # Image generation via chat completions API

From 58309b7ee3f23c1ee27f85c945c47ce8f1115010 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 11:34:28 -0700
Subject: [PATCH 12/58] fix: use HuggingFace model IDs directly instead of S3
 tarballs

- Both models are public (Apache 2.0, no gating)
- Eliminates S3 download/extract issues (corrupted tarballs, disk space)
- Models downloaded from HF at runtime inside container
- Removed s3_prefix and s3_model from config

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml      |  7 +++----
 .../reusable-vllm-omni-model-tests.yml        | 20 +++----------------
 2 files changed, 6 insertions(+), 21 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index aa4ad98498c2..0eb8b08d78cc 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,18 +1,17 @@
 # vLLM-Omni Model Test Configuration
 # Tests for omni-modality models (TTS, image generation)
-
-s3_prefix: "s3://dlc-cicd-models/omni-models"
+# Models are downloaded directly from HuggingFace (public, no gating)
 
 smoke-test:
   codebuild-fleet:
     - name: "qwen3-tts-1.7b-customvoice"
-      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
+      model: "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
       type: tts
       fleet: "x86-g6xl-runner"
       extra_args: "--enforce-eager --gpu-memory-utilization 0.8"
 
     - name: "flux2-klein-4b"
-      s3_model: "flux2-klein-4b.tar.gz"
+      model: "black-forest-labs/FLUX.2-klein-4B"
       type: diffusion
       fleet: "x86-g6xl-runner"
       extra_args: ""
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 2fc322f17d25..d6441a6cf1c9 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -35,10 +35,7 @@ jobs:
           import yaml, json
           with open('.github/config/vllm-omni-model-tests.yml') as f:
               cfg = yaml.safe_load(f)
-          prefix = cfg.get('s3_prefix', '')
           models = cfg.get('smoke-test', {}).get('codebuild-fleet', [])
-          for m in models:
-              m['s3_path'] = f\"{prefix}/{m['s3_model']}\"
           print(f'matrix={json.dumps(models)}')
           " >> "$GITHUB_OUTPUT"
 
@@ -64,13 +61,6 @@ jobs:
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
 
-      - name: Download model from S3
-        id: download-model
-        uses: ./.github/actions/download-model
-        with:
-          s3-path: ${{ matrix.model.s3_path }}
-          model-name: ${{ matrix.model.name }}
-
       - name: Pull image
         run: docker pull ${{ inputs.image-uri }}
 
@@ -79,12 +69,11 @@ jobs:
           IMAGE="${{ inputs.image-uri }}"
           CONTAINER_ID=$(docker run -d --rm --gpus all \
             --shm-size=4g \
-            -v /dlc-models:/models \
             -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
             --entrypoint /bin/bash \
             ${IMAGE} -c 'sleep infinity')
           docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_ec2_smoke_test.sh \
-            /models/${{ matrix.model.name }} ${{ matrix.model.type }}
+            "${{ matrix.model.model }}" ${{ matrix.model.type }}
           docker kill ${CONTAINER_ID} 2>/dev/null || true
 
       - name: Run SageMaker smoke test
@@ -92,16 +81,13 @@ jobs:
           IMAGE="${{ inputs.image-uri }}"
           CONTAINER_ID=$(docker run -d --rm --gpus all \
             --shm-size=4g \
-            -v /dlc-models:/models \
             -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
             --entrypoint /bin/bash \
             ${IMAGE} -c 'sleep infinity')
           docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_sagemaker_smoke_test.sh \
-            /models/${{ matrix.model.name }} ${{ matrix.model.type }}
+            "${{ matrix.model.model }}" ${{ matrix.model.type }}
           docker kill ${CONTAINER_ID} 2>/dev/null || true
 
       - name: Cleanup
         if: always()
-        run: |
-          kill ${{ steps.download-model.outputs.lock-pid }} 2>/dev/null || true
-          docker rmi ${{ inputs.image-uri }} 2>/dev/null || true
+        run: docker rmi ${{ inputs.image-uri }} 2>/dev/null || true

From 325d917f4292d6d9fa7ade098d2ea6ce60d1a8f3 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 12:04:23 -0700
Subject: [PATCH 13/58] fix: validate diffusion response without printing full
 base64 image

- Parse response JSON, extract and decode base64 image
- Print only image size instead of full base64 payload
- Validate decoded image is non-trivial (>1000 bytes)

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .../scripts/vllm_omni_sagemaker_smoke_test.sh    | 16 +++++++++++++---
 1 file changed, 13 insertions(+), 3 deletions(-)

diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index 3a4fb82133ee..edf9d88b959c 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -76,11 +76,21 @@ elif [ "${MODEL_TYPE}" = "diffusion" ]; then
         }
       }')
     echo "${RESPONSE}" | python3 -c "
-import sys, json
+import sys, json, base64
 data = json.load(sys.stdin)
-assert 'choices' in data, 'No choices in response'
+assert 'choices' in data, f'No choices in response: {str(data)[:200]}'
 content = data['choices'][0]['message']['content']
-print(f'Image generation response received, content type: {type(content)}')
+# Extract image and validate
+if isinstance(content, list):
+    img_item = next(c for c in content if c.get('type') == 'image_url')
+    url = img_item['image_url']['url']
+else:
+    url = str(content)
+assert 'base64,' in url, 'No base64 image in response'
+img_b64 = url.split('base64,')[1]
+img_bytes = base64.b64decode(img_b64)
+print(f'Image generated: {len(img_bytes)} bytes')
+assert len(img_bytes) > 1000, f'Image too small: {len(img_bytes)} bytes'
 print('Diffusion serving test PASSED')
 "
 fi

From aa40386bc9193e0097c980c779e69ee46737238e Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 13:14:10 -0700
Subject: [PATCH 14/58] fix: use ml.g4dn.xlarge for TTS endpoint test (cheaper,
 1.7B fits in 16GB T4)

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 docker/vllm/Dockerfile.amzn2023                   |  1 +
 scripts/telemetry/deep_learning_container.py      | 12 +++++++++++-
 test/vllm-omni/sagemaker/test_sm_omni_endpoint.py |  2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index f2775ae6c164..2457be4ab8f6 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -380,6 +380,7 @@ ARG CUDA_VERSION
 
 LABEL maintainer="Amazon AI"
 LABEL dlc_major_version="1"
+LABEL dlc_minor_version="0"
 
 ENV LANG=C.UTF-8 \
   LC_ALL=C.UTF-8 \
diff --git a/scripts/telemetry/deep_learning_container.py b/scripts/telemetry/deep_learning_container.py
index a9122e2bce64..910a2c19dca6 100755
--- a/scripts/telemetry/deep_learning_container.py
+++ b/scripts/telemetry/deep_learning_container.py
@@ -228,7 +228,17 @@ def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--framework",
-        choices=["tensorflow", "mxnet", "pytorch", "base", "vllm", "sglang", "lambda", "ray"],
+        choices=[
+            "tensorflow",
+            "mxnet",
+            "pytorch",
+            "base",
+            "vllm",
+            "sglang",
+            "lambda",
+            "ray",
+            "vllm-omni",
+        ],
         help="framework of container image.",
         required=True,
     )
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index a8825af8b0dc..1778e0c608af 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -95,7 +95,7 @@ def model_endpoint(aws_session, model_package, instance_type):
         sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
 
 
-@pytest.mark.parametrize("instance_type", ["ml.g6.xlarge"], indirect=True)
+@pytest.mark.parametrize("instance_type", ["ml.g4dn.xlarge"], indirect=True)
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
 def test_vllm_omni_tts_endpoint(model_endpoint):
     predictor = model_endpoint

From da26690dee4531ef939671484df087e96c32b502 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 13:16:52 -0700
Subject: [PATCH 15/58] fix: remove redundant --enforce-eager (vllm-omni
 enforces it internally)

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml                 | 2 +-
 test/vllm-omni/sagemaker/test_sm_omni_endpoint.py        | 1 -
 test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 0eb8b08d78cc..a6a7c3dfa10d 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -8,7 +8,7 @@ smoke-test:
       model: "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
       type: tts
       fleet: "x86-g6xl-runner"
-      extra_args: "--enforce-eager --gpu-memory-utilization 0.8"
+      extra_args: ""
 
     - name: "flux2-klein-4b"
       model: "black-forest-labs/FLUX.2-klein-4B"
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 1778e0c608af..a7ff3e117a5e 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -51,7 +51,6 @@ def model_package(aws_session, image_uri, model_id):
             predictor_cls=Predictor,
             env={
                 "SM_VLLM_MODEL": model_id,
-                "SM_VLLM_ENFORCE_EAGER": "true",
                 "HF_TOKEN": hf_token,
             },
         )
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index edf9d88b959c..839347a98da5 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -12,7 +12,7 @@ PORT=8091
 echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} at ${MODEL_PATH} ==="
 
 # Start server in background
-vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --enforce-eager --stage-init-timeout 600 &
+vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --stage-init-timeout 600 &
 SERVER_PID=$!
 
 cleanup() {

From 9c18b3aaf15e29061dc0a74a8be8341dcc9440b4 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 13:57:04 -0700
Subject: [PATCH 16/58] fix: use customer-type from config to select smoke test
 script

- Reusable workflow uses customer-type input (ec2 or sagemaker)
- Maps to vllm_omni_{customer-type}_smoke_test.sh
- No extra test-type parameter needed

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .../workflows/pr-vllm-omni-ec2-amzn2023.yml   |  1 +
 .../pr-vllm-omni-sagemaker-amzn2023.yml       |  1 +
 .../reusable-vllm-omni-model-tests.yml        | 42 +++++++++----------
 3 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
index 3f6a627232e2..44952eaf095b 100644
--- a/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
@@ -226,4 +226,5 @@ jobs:
       image-uri: ${{ needs.build-image.outputs.ci-image }}
       aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
       aws-region: ${{ vars.AWS_REGION }}
+      customer-type: ${{ needs.load-config.outputs.customer-type }}
     secrets: inherit
diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index e9b39e3f7b2b..33468508b85e 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -226,6 +226,7 @@ jobs:
       image-uri: ${{ needs.build-image.outputs.ci-image }}
       aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
       aws-region: ${{ vars.AWS_REGION }}
+      customer-type: ${{ needs.load-config.outputs.customer-type }}
     secrets: inherit
 
   sagemaker-endpoint-test:
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index d6441a6cf1c9..154f1f87da93 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -18,6 +18,10 @@ on:
         description: "AWS region for ECR authentication"
         required: true
         type: string
+      customer-type:
+        description: "Customer type: ec2 or sagemaker"
+        required: true
+        type: string
 
 jobs:
   load-models:
@@ -40,6 +44,7 @@ jobs:
           " >> "$GITHUB_OUTPUT"
 
   smoke-test:
+    name: smoke-test (${{ matrix.model.name }})
     needs: load-models
     if: needs.load-models.outputs.matrix != '[]'
     strategy:
@@ -61,33 +66,26 @@ jobs:
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
 
-      - name: Pull image
-        run: docker pull ${{ inputs.image-uri }}
-
-      - name: Run EC2 smoke test
+      - name: Start container
         run: |
-          IMAGE="${{ inputs.image-uri }}"
-          CONTAINER_ID=$(docker run -d --rm --gpus all \
-            --shm-size=4g \
-            -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
+          docker pull ${{ inputs.image-uri }}
+          CONTAINER_ID=$(docker run -d -it --gpus all --shm-size=4g \
             --entrypoint /bin/bash \
-            ${IMAGE} -c 'sleep infinity')
-          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_ec2_smoke_test.sh \
-            "${{ matrix.model.model }}" ${{ matrix.model.type }}
-          docker kill ${CONTAINER_ID} 2>/dev/null || true
+            ${{ inputs.image-uri }})
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
 
-      - name: Run SageMaker smoke test
+      - name: Copy test scripts into container
         run: |
-          IMAGE="${{ inputs.image-uri }}"
-          CONTAINER_ID=$(docker run -d --rm --gpus all \
-            --shm-size=4g \
-            -v $(pwd)/test/vllm-omni/scripts:/workspace/test \
-            --entrypoint /bin/bash \
-            ${IMAGE} -c 'sleep infinity')
-          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_sagemaker_smoke_test.sh \
+          docker cp test/vllm-omni/scripts/. ${CONTAINER_ID}:/workspace/test/
+
+      - name: Run smoke test
+        run: |
+          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_${{ inputs.customer-type }}_smoke_test.sh \
             "${{ matrix.model.model }}" ${{ matrix.model.type }}
-          docker kill ${CONTAINER_ID} 2>/dev/null || true
 
       - name: Cleanup
         if: always()
-        run: docker rmi ${{ inputs.image-uri }} 2>/dev/null || true
+        run: |
+          docker stop ${CONTAINER_ID} 2>/dev/null || true
+          docker rm -f ${CONTAINER_ID} 2>/dev/null || true
+          docker rmi ${{ inputs.image-uri }} 2>/dev/null || true

From 7322dceace5401b97e3c2eedd416bb6e96552d7b Mon Sep 17 00:00:00 2001
From: sheng moua <127175097+smouaa@users.noreply.github.com>
Date: Thu, 2 Apr 2026 10:49:48 -0700
Subject: [PATCH 17/58] fix lmiv22 yml and add lmiv23 (#5869)

---
 docs/src/data/djl-inference/0.36-lmi22.0.0-gpu.yml | 2 +-
 docs/src/data/djl-inference/0.36-lmi23.0.0-gpu.yml | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 docs/src/data/djl-inference/0.36-lmi23.0.0-gpu.yml

diff --git a/docs/src/data/djl-inference/0.36-lmi22.0.0-gpu.yml b/docs/src/data/djl-inference/0.36-lmi22.0.0-gpu.yml
index e27dc1062d44..c6fea7bae048 100644
--- a/docs/src/data/djl-inference/0.36-lmi22.0.0-gpu.yml
+++ b/docs/src/data/djl-inference/0.36-lmi22.0.0-gpu.yml
@@ -1,4 +1,4 @@
-framework: DJLServing 0.36
+framework: DJLServing
 version: "0.36"
 accelerator: gpu
 cuda: cu129
diff --git a/docs/src/data/djl-inference/0.36-lmi23.0.0-gpu.yml b/docs/src/data/djl-inference/0.36-lmi23.0.0-gpu.yml
new file mode 100644
index 000000000000..f2e09258b662
--- /dev/null
+++ b/docs/src/data/djl-inference/0.36-lmi23.0.0-gpu.yml
@@ -0,0 +1,9 @@
+framework: DJLServing
+version: "0.36"
+accelerator: gpu
+cuda: cu129
+engine: "LMI 23.0.0, vLLM 0.18.0"
+platform: sagemaker
+
+tags:
+  - "0.36.0-lmi23.0.0-cu129"

From c848b677f442404366342cc6eff131e00b0dc124 Mon Sep 17 00:00:00 2001
From: Sirut Buasai <73297481+sirutBuasai@users.noreply.github.com>
Date: Thu, 2 Apr 2026 12:52:27 -0700
Subject: [PATCH 18/58] fix telemetry ingress rules (#5871)

* fix telemetry ingress rules

Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>

* add test

Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>

* temp test

Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>

* revert workflow

Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>

---------

Signed-off-by: sirutBuasai <sirutbuasai27@outlook.com>
---
 test/test_utils/aws.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/test/test_utils/aws.py b/test/test_utils/aws.py
index cf41b6d16545..b049b87d6ae4 100644
--- a/test/test_utils/aws.py
+++ b/test/test_utils/aws.py
@@ -1,9 +1,12 @@
 """AWS Session Manager for all AWS boto3 API resources"""
 
+import ipaddress
 import logging
 import os
 import stat
 import tempfile
+import time
+import urllib.request
 from datetime import datetime
 
 import boto3
@@ -156,12 +159,26 @@ def get_instance_tags(self, instance_id):
         )
         return {tag["Key"]: tag["Value"] for tag in response["Tags"]}
 
+    def get_codebuild_runner_public_ip(self):
+        """Get this machine's public IP via checkip.amazonaws.com. Retries 3 times."""
+        url = "https://checkip.amazonaws.com"
+        for attempt in range(3):
+            try:
+                with urllib.request.urlopen(url, timeout=5) as resp:
+                    ip = resp.read().decode().strip()
+                ipaddress.IPv4Address(ip)
+                return ip
+            except Exception:
+                if attempt == 2:
+                    raise RuntimeError(f"Failed to get public IP from {url} after 3 attempts")
+                time.sleep(2**attempt)
+
     # ===========================================
     # ===== Security Groups =====================
     # ===========================================
 
     def create_ssh_security_group(self, group_name=None):
-        """Create a security group allowing SSH from anywhere. Returns group ID."""
+        """Create a security group allowing SSH from the current machine's public IP. Returns group ID."""
         if not group_name:
             group_name = random_suffix_name("dlc-ssh", 36)
         vpc_id = self.ec2.describe_vpcs(Filters=[{"Name": "is-default", "Values": ["true"]}])[
@@ -180,7 +197,12 @@ def create_ssh_security_group(self, group_name=None):
                     "IpProtocol": "tcp",
                     "FromPort": 22,
                     "ToPort": 22,
-                    "IpRanges": [{"CidrIp": "0.0.0.0/0"}],
+                    "IpRanges": [
+                        {
+                            "CidrIp": f"{self.get_codebuild_runner_public_ip()}/32",
+                            "Description": "CodeBuild runner SSH access",
+                        }
+                    ],
                 },
             ],
         )

From 871877f916b38fe96aa6a865eba17e748c46dc80 Mon Sep 17 00:00:00 2001
From: Jyothirmai Kottu <jyothirmaikottu@gmail.com>
Date: Thu, 2 Apr 2026 14:40:11 -0700
Subject: [PATCH 19/58] Migrate Xgboost Container Tests to DLC repo (#5860)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 254
X-AI-Prompt: I have uploaded test resources training/ and inference/ in s3://dlc-cicd-models/xgboost/container_test_resources/, I need you to create container_tests/ and add the following tests in xgboost test dir - The tests need a helper that replaces ai_algorithms_container_tests using docker-py directly:

test/xgboost/container/
├── conftest.py              # pytest fixtures: --image flag, S3 download, docker client
├── container_helper.py      # replaces ai_algorithms_container_tests
├── test_training.py         # rewritten training tests
├── test_scoring.py          # rewritten inference tests
└── test_batch_transform.py  # rewritten batch transform tests

The container_helper.py needs to:
- Download test resources from S3 to a temp dir (once per session)
- Create /opt/ml/ directory structure in temp dirs
- Write config JSON files (hyperparameters, inputdataconfig, resourceconfig)
- Mount volumes and run the container via docker-py
- For training: wait for exit, return exit code + logs + model files
- For inference: start container, wait for health check, send HTTP requests, you can refer to https://code.amazon.com/packages/SMFrameworksXGBoost3_0-5Tests/trees/mainline/--/src/container_tests

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 135
X-AI-Prompt: Add this in release workflow, comment benchmark tests for now, add on push trigger, create parallel test execution for each test case in wf and prepare cr

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 143
X-AI-Prompt: create a new workflow for xgboost benchmarking, container and integration tests and use that workflow in release wrkflow

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 101
X-AI-Prompt: change the name to - sagemaker-xgboost-integ-tests.yml and remove the integ tests steps it is a todo, comment benchmark tests as i need to test container tests now.

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 25
X-AI-Prompt: change on push current branch

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 13
X-AI-Prompt: remove main this wf will never be pr triggered it is manually triggered

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 41
X-AI-Prompt: yeah lets do with option b

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 22
X-AI-Prompt: E
E         Invoking script with the following command:
E
E         /miniconda3/bin/python3 -m sagemaker_xgboost_container.training:main --alpha 0.0 --base_score 0.5 --booster gbtree --colsample_bylevel 1 --colsample_bytree 1.0 --csv_weights 1 --dsplit row --early_stopping_rounds 5 --eta 0.3 --eval_metric error --gamma 0.0 --grow_policy depthwise --lambda 1.0 --lambda_bias 0.0 --max_bin 256 --max_delta_step 0 --max_depth 6 --max_leaves 0 --min_child_weight 1.0 --normalize_type tree --nthread 8 --num_round 10 --objective binary:logistic --one_drop 0 --predictor cpu_predictor --process_type default --rate_drop 0.0 --refresh_leaf 1 --sample_type uniform --scale_pos_weight 1.0 --silent 0 --sketch_eps 0.03 --skip_drop 0.0 --subsample 1.0 --tree_method auto --tweedie_variance_power 1.5 --updater grow_colmaker,prune
E
E
E         /miniconda3/bin/python3: No module named sagemaker_xgboost_container.training:main
E         [2026-03-31:21:26:07:ERROR] ExecuteUserScriptError:
E         Command "/miniconda3/bin/python3 -m sagemaker_xgboost_container.training:main --alpha 0.0 --base_score 0.5 --booster gbtree --colsample_bylevel 1 --colsample_bytree 1.0 --csv_weights 1 --dsplit row --early_stopping_rounds 5 --eta 0.3 --eval_metric error --gamma 0.0 --grow_policy depthwise --lambda 1.0 --lambda_bias 0.0 --max_bin 256 --max_delta_step 0 --max_depth 6 --max_leaves 0 --min_child_weight 1.0 --normalize_type tree --nthread 8 --num_round 10 --objective binary:logistic --one_drop 0 --predictor cpu_predictor --process_type default --rate_drop 0.0 --refresh_leaf 1 --sample_type uniform --scale_pos_weight 1.0 --silent 0 --sketch_eps 0.03 --skip_drop 0.0 --subsample 1.0 --tree_method auto --tweedie_variance_power 1.5 --updater grow_colmaker,prune"
E
E       assert 1 == 0

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 50
X-AI-Prompt: scan for red flags

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 47
X-AI-Prompt: can we regrenate the model durng test time and upload back to s3?

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 38
X-AI-Prompt: RuntimeError: Model /opt/ml/model/mnist-pkl-model cannot be loaded:
Pickle load error=[21:37:57] /workspace/src/learner.cc:1185: Check failed: header == serialisation_header_: If you are loading a serialized model (like pickle in Python, RDS in R) or
configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 30
X-AI-Prompt: During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_trainer.py", line 84, in train
entrypoint()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 102, in main
train(framework.training_env())
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 98, in train
run_algorithm_mode()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 64, in run_algorithm_mode
sagemaker_train(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 144, in sagemaker_train
validated_train_config = hyperparameters.validate(train_config)
File "/miniconda3/lib/python3.10/site-packages/sagemaker_algorithm_toolkit/hyperparameter_validation.py", line 278, in validate
raise exc.UserError("Extraneous hyperparameter found: {}".format(hp))
sagemaker_algorithm_toolkit.exceptions.UserError: Extraneous hyperparameter found: silent

Extraneous hyperparameter found: silent

assert 1 == 0
FAILED xgboost/container/test_training.py::TestValidTraining::test_checkpoint_and_reload - assert 1 == 0

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 21
X-AI-Prompt: The fix is a one-liner in ServingContainer.__enter__. The XGBoost serving entrypoint (sagemaker_xgboost_container.serving) reads
/opt/ml/input/config/resourceconfig.json on startup. Without it, the Python app fails to initialize, gunicorn workers exit with code 3, and you
get the HaltServer 'Worker failed to boot.' error.

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 14
X-AI-Prompt: ### 2. container_helper.py — tmpdir not cleaned up in __exit__

Both run_training and ServingContainer create temp dirs but never clean them up. The training function at least returns paths so the caller
could clean up, but ServingContainer stores self._opt_ml and never removes it.

Fix: Add cleanup in __exit__:

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 19
X-AI-Prompt: test_training.py — test_checkpoint_and_reload has inline import json

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 31
X-AI-Prompt: test_training.py — test_checkpoint_and_reload phase 2 container not cleaned up on timeout

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 33
X-AI-Prompt: container-test-training installs docker pytest boto3 but not requests. The training tests import run_training from container_helper, which
imports requests at module level. This will fail at import time.

* Human changes made during kiro-cli session after prompt completion.
---
X-AI-Tool: Human
X-AI-Prompt: tests are still failing with same reason

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 35
X-AI-Prompt: scan for red flags

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 84
X-AI-Prompt: RuntimeError: Model /opt/ml/model/mnist-pkl-model cannot be loaded:
Pickle load error=[23:48:50] /workspace/src/learner.cc:1185: Check failed: header == serialisation_header_: If you are loading a serialized model (like pickle in Python, RDS in R) or
configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

* Human changes made during kiro-cli session after prompt completion.
---
X-AI-Tool: Human
X-AI-Prompt: RuntimeError: Model /opt/ml/model/mnist-pkl-model cannot be loaded:
Pickle load error=[23:48:50] /workspace/src/learner.cc:1185: Check failed: header == serialisation_header_: If you are loading a serialized model (like pickle in Python, RDS in R) or
configuration generated by an older version of XGBoost, please export the model by calling
`Booster.save_model` from that version first, then load it back in current version. See:

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 32
X-AI-Prompt:
============================= test session starts ==============================
platform linux -- Python 3.12.12, pytest-9.0.2, pluggy-1.6.0 -- /tmp/codebuild-b0ba6d93-4eb5-444e-b8c3-bebc7c5b99fa/output/src3763/src/eeeffba7_95a5_4ce7_9fdc_ed0e3f9ffdaa/actions-runner/_work/deep-learning-containers/deep-learning-containers/.venv/bin/python3
cachedir: .pytest_cache
rootdir: /tmp/codebuild-b0ba6d93-4eb5-444e-b8c3-bebc7c5b99fa/output/src3763/src/eeeffba7_95a5_4ce7_9fdc_ed0e3f9ffdaa/actions-runner/_work/deep-learning-containers/deep-learning-containers
configfile: pyproject.toml
collecting ... collected 3 items
xgboost/container/test_batch_transform.py::TestBatchTransform::test_libsvm_batch FAILED
xgboost/container/test_batch_transform.py::TestBatchTransform::test_recordio_protobuf_batch PASSED
xgboost/container/test_batch_transform.py::TestBatchTransform::test_csv_batch PASSED
=================================== FAILURES ===================================
_____________________ TestBatchTransform.test_libsvm_batch _____________________
self = <container.test_batch_transform.TestBatchTransform object at 0x7fd663720d40>
docker_client = <docker.client.DockerClient object at 0x7fd6638eec60>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23864956268'
inference_resources = '/tmp/xgb-container-test-o7vvveha/inference'
def test_libsvm_batch(self, docker_client, image_uri, inference_resources):
responses = _send_batch_requests(
docker_client, image_uri, inference_resources, "mnist-xgb-model", "text/x-libsvm",
["mnist-1.libsvm", "mnist-less-dim-1.libsvm",
"mnist-plus-onedim-1.libsvm", "mnist-700.libsvm"],
)
_validate_batch_response(responses[0], 1)
_validate_batch_response(responses[1], 1)
>       _validate_batch_response(responses[2], 1)
xgboost/container/test_batch_transform.py:72:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
resp = <Response [400]>, expected_length = 1
def _validate_batch_response(resp, expected_length):
"""Batch responses are newline-delimited; trailing newline adds +1."""
>       assert resp.status_code == httplib.OK, resp.text
E       AssertionError: Unable to evaluate payload provided: [18:45:55] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (785 vs. 786) : Number of columns does not match number of features in booster.
E         Stack trace:
E           [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fc72964de7c]
E           [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fc729a1e7a9]
E           [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fc729a34962]
E           [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fc72956196e]
E           [bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fc74a42302a]
E           [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fc74a4224a9]
E           [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fc74a422bbd]
E           [bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fc74a430c7b]
E           [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fc74a430565]
E
E
E       assert 400 == <HTTPStatus.OK: 200>
E        +  where 400 = <Response [400]>.status_code
E        +  and   <HTTPStatus.OK: 200> = httplib.OK
xgboost/container/test_batch_transform.py:53: AssertionError
==================================== PASSES ====================================
=========================== short test summary info ============================
PASSED xgboost/container/test_batch_transform.py::TestBatchTransform::test_recordio_protobuf_batch
PASSED xgboost/container/test_batch_transform.py::TestBatchTransform::test_csv_batch
FAILED xgboost/container/test_batch_transform.py::TestBatchTransform::test_libsvm_batch - AssertionError: Unable to evaluate payload provided: [18:45:55] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (785 vs. 786) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fc72964de7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fc729a1e7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fc729a34962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fc72956196e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fc74a42302a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fc74a4224a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fc74a422bbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fc74a430c7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fc74a430565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
========================= 1 failed, 2 passed in 37.90s =========================
Error: Process completed with exit code 1.
how is the test passing? we must need to know what the logs are?

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 29
X-AI-Prompt: same here, xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_weights PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_hpo_param PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_multiclass_hpo PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_iterate_objectives FAILED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_threshold_eval_metric PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_verbosity PASSED
xgboost/container/test_training.py::TestValidTraining::test_multi_files_libsvm PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_weights PASSED
xgboost/container/test_training.py::TestValidTraining::test_multi_file_csv PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_space_separated PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_sci_notation PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_empty_cells PASSED
xgboost/container/test_training.py::TestValidTraining::test_checkpoint_and_reload FAILED
xgboost/container/test_training.py::TestInvalidTraining::test_no_training_data PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_no_validation_data PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_csv_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_csv_alpha_with_csv_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_csv_data_with_libsvm_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_with_libsvm_content_type PASSED

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 106
X-AI-Prompt: Run source .venv/bin/activate
============================= test session starts ==============================
platform linux -- Python 3.12.12, pytest-9.0.2, pluggy-1.6.0 -- /tmp/codebuild-8acc520a-64b1-45e6-8ddc-2078a24507b5/output/src787/src/b09928cc_a4a3_4b96_9bee_901575f815e0/actions-runner/_work/deep-learning-containers/deep-learning-containers/.venv/bin/python3
cachedir: .pytest_cache
rootdir: /tmp/codebuild-8acc520a-64b1-45e6-8ddc-2078a24507b5/output/src787/src/b09928cc_a4a3_4b96_9bee_901575f815e0/actions-runner/_work/deep-learning-containers/deep-learning-containers
configfile: pyproject.toml
collecting ... collected 45 items

xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_weights PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_hpo_param PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_multiclass_hpo PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_iterate_objectives FAILED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_threshold_eval_metric PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_verbosity PASSED
xgboost/container/test_training.py::TestValidTraining::test_multi_files_libsvm PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_weights PASSED
xgboost/container/test_training.py::TestValidTraining::test_multi_file_csv PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_space_separated PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_sci_notation PASSED
xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_empty_cells PASSED
xgboost/container/test_training.py::TestValidTraining::test_checkpoint_and_reload FAILED
xgboost/container/test_training.py::TestInvalidTraining::test_no_training_data PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_no_validation_data PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_csv_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_csv_alpha_with_csv_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_csv_data_with_libsvm_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_with_libsvm_content_type PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[eta-values0] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[gamma-values1] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[max_depth-values2] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[min_child_weight-values3] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[max_delta_step-values4] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[colsample_bytree-values5] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[colsample_bylevel-values6] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[tree_method-values7] FAILED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[sketch_eps-values8] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[refresh_leaf-values9] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[process_type-values10] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[grow_policy-values11] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[sample_type-values12] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[normalize_type-values13] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[rate_drop-values14] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[one_drop-values15] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[skip_drop-values16] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[tweedie_variance_power-values17] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[eval_metric-values18] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[booster-values19] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[verbosity-values20] PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_missing_num_round PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_multiclass_without_num_class PASSED
xgboost/container/test_training.py::TestInvalidTraining::test_pipe_mode_rejected PASSED

=================================== FAILURES ===================================
_________ TestValidTraining.test_single_file_libsvm_iterate_objectives _________

self = <container.test_training.TestValidTraining object at 0x7f11f6c34ce0>
docker_client = <docker.client.DockerClient object at 0x7f11f6f7d0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
training_resources = '/tmp/xgb-container-test-ptswvydm/training'

def test_single_file_libsvm_iterate_objectives(self, docker_client, image_uri, training_resources):
hp = copy.deepcopy(STD_HP)
d = _libsvm_dir(training_resources)
for obj in ["reg:squarederror", "binary:logistic", "count:poisson",
"reg:gamma", "reg:tweedie"]:
hp["objective"] = obj
result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
[os.path.join(d, "agaricus.libsvm.train")],
[os.path.join(d, "agaricus.libsvm.test")])
>           _assert_success(result)

xgboost/container/test_training.py:170:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

result = (1, '/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprec... '/tmp/xgb-train-bkhw5xxo/input/data/train', 'input_validation': '/tmp/xgb-train-bkhw5xxo/input/data/validation', ...})
regex = None

def _assert_success(result, regex=None):
exit_code, logs, model_files, _ = result
>       assert exit_code == 0, f"Training failed:\n{logs}"
E       AssertionError: Training failed:
E         /miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
E           import pkg_resources
E         [2026-04-01:19:09:22:INFO] Imported framework sagemaker_xgboost_container.training
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter eval_metric value error to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter tree_method value auto to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter normalize_type value tree to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter sample_type value uniform to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter booster value gbtree to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter objective value reg:gamma to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter updater value grow_colmaker,prune to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter process_type value default to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter dsplit value row to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] Failed to parse hyperparameter grow_policy value depthwise to Json.
E         Returning the value itself
E         [2026-04-01:19:09:22:INFO] No GPUs detected (normal if no gpus installed)
E         [2026-04-01:19:09:22:INFO] Running XGBoost Sagemaker in algorithm mode
E         [2026-04-01:19:09:22:INFO] Determined 0 GPU(s) available on the instance.
E         [2026-04-01:19:09:22:INFO] File path /opt/ml/input/data/train of input files
E         [2026-04-01:19:09:22:INFO] Making smlinks from folder /opt/ml/input/data/train to folder /tmp/sagemaker_xgboost_input_data
E         [2026-04-01:19:09:22:INFO] creating symlink between Path /opt/ml/input/data/train/agaricus.libsvm.train and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.train1664359970552213804
E         [2026-04-01:19:09:22:INFO] files path: /tmp/sagemaker_xgboost_input_data
E         [2026-04-01:19:09:22:INFO] File path /opt/ml/input/data/validation of input files
E         [2026-04-01:19:09:22:INFO] Making smlinks from folder /opt/ml/input/data/validation to folder /tmp/sagemaker_xgboost_input_data
E         [2026-04-01:19:09:22:INFO] creating symlink between Path /opt/ml/input/data/validation/agaricus.libsvm.test and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.test1757920320072049626
E         [2026-04-01:19:09:22:INFO] files path: /tmp/sagemaker_xgboost_input_data
E         [2026-04-01:19:09:22:INFO] Single node training.
E         [2026-04-01:19:09:22:INFO] TRAIN_JOB_DEBUG: Received is_master=True
E         TRAIN_JOB_DEBUG: Received is_master=True
E         [2026-04-01:19:09:22:INFO] Train matrix has 6513 rows and 127 columns
E         [2026-04-01:19:09:22:INFO] Validation matrix has 1611 rows
E         [2026-04-01:19:09:22:INFO] CALLBACK_SETUP_DEBUG: save_model_on_termination=false, is_master=True
E         [2026-04-01:19:09:22:INFO] CALLBACK_SKIPPING save_model_on_termination=false, is_master=True)
E         /miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [19:09:22] WARNING: /workspace/src/common/error_msg.cc:33: You have manually specified the `updater` parameter. The `tree_method` parameter will be ignored. Incorrect sequence of updaters will produce undefined behavior. For common uses, we recommend using `tree_method` parameter instead.
E           self.starting_round = model.num_boosted_rounds()
E         /miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [19:09:22] WARNING: /workspace/src/learner.cc:738:
E         Parameters: { "dsplit", "lambda_bias", "normalize_type", "one_drop", "predictor", "rate_drop", "sample_type", "sketch_eps", "skip_drop", "tweedie_variance_power" } are not used.
E
E           self.starting_round = model.num_boosted_rounds()
E         [2026-04-01:19:09:22:ERROR] Reporting training FAILURE
E         [2026-04-01:19:09:22:ERROR] framework error:
E         Traceback (most recent call last):
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 367, in train_job
E             bst = xgb.train(
E           File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 729, in inner_f
E             return func(**kwargs)
E           File "/miniconda3/lib/python3.10/site-packages/xgboost/training.py", line 183, in train
E             bst.update(dtrain, iteration=i, fobj=obj)
E           File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 2246, in update
E             _check_call(
E           File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 310, in _check_call
E             raise XGBoostError(py_str(_LIB.XGBGetLastError()))
E         xgboost.core.XGBoostError: [19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
E         Stack trace:
E           [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
E           [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
E           [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
E           [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
E           [bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
E           [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
E           [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
E           [bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
E           [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]
E
E
E
E         During handling of the above exception, another exception occurred:
E
E         Traceback (most recent call last):
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_trainer.py", line 84, in train
E             entrypoint()
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 102, in main
E             train(framework.training_env())
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 98, in train
E             run_algorithm_mode()
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 64, in run_algorithm_mode
E             sagemaker_train(
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 278, in sagemaker_train
E             train_job(**train_args)
E           File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 467, in train_job
E             raise exc.AlgorithmError(f"{exception_prefix}:\n {str(e)}")
E         sagemaker_algorithm_toolkit.exceptions.AlgorithmError: XGB train call failed with exception:
E          [19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
E         Stack trace:
E           [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
E           [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
E           [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
E           [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
E           [bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
E           [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
E           [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
E           [bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
E           [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]
E
E
E
E         XGB train call failed with exception:
E          [19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
E         Stack trace:
E           [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
E           [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
E           [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
E           [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
E           [bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
E           [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
E           [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
E           [bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
E           [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]
E
E
E
E       assert 1 == 0

xgboost/container/test_training.py:104: AssertionError
_________________ TestValidTraining.test_checkpoint_and_reload _________________

self = <container.test_training.TestValidTraining object at 0x7f11f6c37380>
docker_client = <docker.client.DockerClient object at 0x7f11f6f7d0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
training_resources = '/tmp/xgb-container-test-ptswvydm/training'

def test_checkpoint_and_reload(self, docker_client, image_uri, training_resources):
"""Train 10 rounds, verify checkpoints, then resume to 20 rounds."""
hp1 = copy.deepcopy(STD_HP)
hp1["num_round"] = 10
hp1["eval_metric"] = "error"
hp1.pop("early_stopping_rounds", None)

idc = copy.deepcopy(STD_IDC)
idc["train"]["ContentType"] = "text/libsvm"
idc.pop("validation", None)

d = _libsvm_dir(training_resources)
train_files = [os.path.join(d, "agaricus.libsvm.train")]

# Phase 1: train 10 rounds
exit_code, logs, model_files, paths = run_training(
docker_client, image_uri, hp1, idc, STD_RC,
training_files=train_files, checkpointconfig=STD_CPC,
)
assert exit_code == 0
assert len(model_files) == 1

ckpt_files = os.listdir(paths["checkpoints"])
assert all(f.startswith("xgboost-checkpoint") for f in ckpt_files)
regex = r"\[\d+\].*(?=.*train-error:.*)"
assert len(re.findall(regex, logs)) == 10
>       assert len(ckpt_files) == 5
E       AssertionError: assert 1 == 5
E        +  where 1 = len(['xgboost-checkpoint_0.ubj'])

xgboost/container/test_training.py:283: AssertionError
_____ TestInvalidTraining.test_invalid_hyperparameter[tree_method-values7] _____

self = <container.test_training.TestInvalidTraining object at 0x7f11f6c37f20>
docker_client = <docker.client.DockerClient object at 0x7f11f6f7d0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
training_resources = '/tmp/xgb-container-test-ptswvydm/training'
param = 'tree_method', values = ['invalid_method', 'gpu_exact', 'gpu_hist']

@pytest.mark.parametrize("param,values", [
("eta", ["-0.1", "1.01", "invalid_string"]),
("gamma", ["-0.1", "invalid_string"]),
("max_depth", ["-0.1", "invalid_string"]),
("min_child_weight", ["-0.1", "invalid_string"]),
("max_delta_step", ["-0.1", "invalid_string"]),
("colsample_bytree", ["-0.1", "0", "invalid_string"]),
("colsample_bylevel", ["-0.1", "0", "invalid_string"]),
("tree_method", ["invalid_method", "gpu_exact", "gpu_hist"]),
("sketch_eps", ["0", "1", "invalid_string"]),
("refresh_leaf", ["invalid", "2"]),
("process_type", ["invalid", "0.01"]),
("grow_policy", ["invalid", "0.01"]),
("sample_type", ["invalid", "0.01"]),
("normalize_type", ["invalid", "0.01"]),
("rate_drop", ["invalid", "-0.01", "1.01"]),
("one_drop", ["invalid", "-0.01", "1.01"]),
("skip_drop", ["invalid", "-0.01", "1.01"]),
("tweedie_variance_power", ["invalid", "1", "2"]),
("eval_metric", ["invalid", "1", "rmse,invalid", "error@nonfloat"]),
("booster", ["invalid", "1"]),
("verbosity", ["invalid", "-1", "4", "0.5"]),
])
def test_invalid_hyperparameter(self, docker_client, image_uri, training_resources,
param, values):
train, val = self._get_libsvm_data(training_resources)
hp = copy.deepcopy(STD_HP)
for v in values:
hp[param] = v
result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
train, val)
>           _assert_failed(result)

xgboost/container/test_training.py:405:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

result = (0, '/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprec... '/tmp/xgb-train-4tccj7i0/input/data/train', 'input_validation': '/tmp/xgb-train-4tccj7i0/input/data/validation', ...})
regex = 'UserError:'

def _assert_failed(result, regex="UserError:"):
exit_code, logs, _, _ = result
>       assert re.search(regex, logs), f"Pattern {regex!r} not found in logs"
E       AssertionError: Pattern 'UserError:' not found in logs
E       assert None
E        +  where None = <function search at 0x7f11f9e60680>('UserError:', '/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n  import pkg_resources\n[2026-04-01:19:11:48:INFO] Imported framework sagemaker_xgboost_container.training\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter eval_metric value error to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter tree_method value gpu_hist to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter normalize_type value tree to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter sample_type value uniform to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter booster value gbtree to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to pa...61\tvalidation-error:0.00000\n[4]\ttrain-error:0.00000\tvalidation-error:0.00000\n/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:503: UserWarning: [19:11:48] WARNING: /workspace/src/gbm/gbtree.cc:359: \n  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only\n  machine. Consider using `save_model/load_model` instead. See:\n\n    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html\n\n  for more details about differences between saving model and serializing.  Changing `tree_method` to `hist`.\n  model = model[: best_iteration + 1]\n[2026-04-01:19:11:48:INFO] FINAL_MODEL_DEBUG: is_master=True, model_dir=/opt/ml/model\nFINAL_MODEL_DEBUG: is_master=True, model_dir=/opt/ml/model\n[2026-04-01:19:11:48:INFO] FINAL_MODEL_SAVE: Saving final model as master\nFINAL_MODEL_SAVE: Saving final model as master\n/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py:480: UserWarning: [19:11:48] WARNING: /workspace/src/c_api/c_api.cc:1427: Saving model in the UBJSON format as default.  You can use file extension: `json`, `ubj` or `deprecated` to choose between formats.\n  bst.save_model(model_location)\n')
E        +    where <function search at 0x7f11f9e60680> = re.search

xgboost/container/test_training.py:112: AssertionError
==================================== PASSES ====================================
=========================== short test summary info ============================
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_weights
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_hpo_param
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_multiclass_hpo
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_threshold_eval_metric
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_verbosity
PASSED xgboost/container/test_training.py::TestValidTraining::test_multi_files_libsvm
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_csv
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_weights
PASSED xgboost/container/test_training.py::TestValidTraining::test_multi_file_csv
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_space_separated
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_sci_notation
PASSED xgboost/container/test_training.py::TestValidTraining::test_single_file_csv_empty_cells
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_no_training_data
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_no_validation_data
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_csv_content_type
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_csv_alpha_with_csv_content_type
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_csv_data_with_libsvm_content_type
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_data_with_libsvm_content_type
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[eta-values0]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[gamma-values1]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[max_depth-values2]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[min_child_weight-values3]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[max_delta_step-values4]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[colsample_bytree-values5]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[colsample_bylevel-values6]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[sketch_eps-values8]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[refresh_leaf-values9]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[process_type-values10]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[grow_policy-values11]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[sample_type-values12]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[normalize_type-values13]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[rate_drop-values14]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[one_drop-values15]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[skip_drop-values16]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[tweedie_variance_power-values17]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[eval_metric-values18]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[booster-values19]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[verbosity-values20]
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_missing_num_round
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_multiclass_without_num_class
PASSED xgboost/container/test_training.py::TestInvalidTraining::test_pipe_mode_rejected
FAILED xgboost/container/test_training.py::TestValidTraining::test_single_file_libsvm_iterate_objectives - AssertionError: Training failed:
/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
import pkg_resources
[2026-04-01:19:09:22:INFO] Imported framework sagemaker_xgboost_container.training
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter eval_metric value error to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter tree_method value auto to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter normalize_type value tree to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter sample_type value uniform to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter booster value gbtree to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter objective value reg:gamma to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter updater value grow_colmaker,prune to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter process_type value default to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter dsplit value row to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] Failed to parse hyperparameter grow_policy value depthwise to Json.
Returning the value itself
[2026-04-01:19:09:22:INFO] No GPUs detected (normal if no gpus installed)
[2026-04-01:19:09:22:INFO] Running XGBoost Sagemaker in algorithm mode
[2026-04-01:19:09:22:INFO] Determined 0 GPU(s) available on the instance.
[2026-04-01:19:09:22:INFO] File path /opt/ml/input/data/train of input files
[2026-04-01:19:09:22:INFO] Making smlinks from folder /opt/ml/input/data/train to folder /tmp/sagemaker_xgboost_input_data
[2026-04-01:19:09:22:INFO] creating symlink between Path /opt/ml/input/data/train/agaricus.libsvm.train and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.train1664359970552213804
[2026-04-01:19:09:22:INFO] files path: /tmp/sagemaker_xgboost_input_data
[2026-04-01:19:09:22:INFO] File path /opt/ml/input/data/validation of input files
[2026-04-01:19:09:22:INFO] Making smlinks from folder /opt/ml/input/data/validation to folder /tmp/sagemaker_xgboost_input_data
[2026-04-01:19:09:22:INFO] creating symlink between Path /opt/ml/input/data/validation/agaricus.libsvm.test and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.test1757920320072049626
[2026-04-01:19:09:22:INFO] files path: /tmp/sagemaker_xgboost_input_data
[2026-04-01:19:09:22:INFO] Single node training.
[2026-04-01:19:09:22:INFO] TRAIN_JOB_DEBUG: Received is_master=True
TRAIN_JOB_DEBUG: Received is_master=True
[2026-04-01:19:09:22:INFO] Train matrix has 6513 rows and 127 columns
[2026-04-01:19:09:22:INFO] Validation matrix has 1611 rows
[2026-04-01:19:09:22:INFO] CALLBACK_SETUP_DEBUG: save_model_on_termination=false, is_master=True
[2026-04-01:19:09:22:INFO] CALLBACK_SKIPPING save_model_on_termination=false, is_master=True)
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [19:09:22] WARNING: /workspace/src/common/error_msg.cc:33: You have manually specified the `updater` parameter. The `tree_method` parameter will be ignored. Incorrect sequence of updaters will produce undefined behavior. For common uses, we recommend using `tree_method` parameter instead.
self.starting_round = model.num_boosted_rounds()
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [19:09:22] WARNING: /workspace/src/learner.cc:738:
Parameters: { "dsplit", "lambda_bias", "normalize_type", "one_drop", "predictor", "rate_drop", "sample_type", "sketch_eps", "skip_drop", "tweedie_variance_power" } are not used.

self.starting_round = model.num_boosted_rounds()
[2026-04-01:19:09:22:ERROR] Reporting training FAILURE
[2026-04-01:19:09:22:ERROR] framework error:
Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 367, in train_job
bst = xgb.train(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 729, in inner_f
return func(**kwargs)
File "/miniconda3/lib/python3.10/site-packages/xgboost/training.py", line 183, in train
bst.update(dtrain, iteration=i, fobj=obj)
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 2246, in update
_check_call(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 310, in _check_call
raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_trainer.py", line 84, in train
entrypoint()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 102, in main
train(framework.training_env())
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 98, in train
run_algorithm_mode()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 64, in run_algorithm_mode
sagemaker_train(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 278, in sagemaker_train
train_job(**train_args)
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 467, in train_job
raise exc.AlgorithmError(f"{exception_prefix}:\n {str(e)}")
sagemaker_algorithm_toolkit.exceptions.AlgorithmError: XGB train call failed with exception:
[19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]

XGB train call failed with exception:
[19:09:22] /workspace/src/objective/regression_obj.cu:88: label must be positive for gamma regression.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fb583957e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf02dcb) [0x7fb5845b3dcb]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0xf03333) [0x7fb5845b4333]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d2a2) [0x7fb583d3e2a2]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7fb583867f57]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fb5b767602a]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fb5b76754a9]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fb5b7675bbd]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fb5b7683c7b]

assert 1 == 0
FAILED xgboost/container/test_training.py::TestValidTraining::test_checkpoint_and_reload - AssertionError: assert 1 == 5
+  where 1 = len(['xgboost-checkpoint_0.ubj'])
FAILED xgboost/container/test_training.py::TestInvalidTraining::test_invalid_hyperparameter[tree_method-values7] - AssertionError: Pattern 'UserError:' not found in logs
assert None
+  where None = <function search at 0x7f11f9e60680>('UserError:', '/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n  import pkg_resources\n[2026-04-01:19:11:48:INFO] Imported framework sagemaker_xgboost_container.training\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter eval_metric value error to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter tree_method value gpu_hist to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter normalize_type value tree to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter sample_type value uniform to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to parse hyperparameter booster value gbtree to Json.\nReturning the value itself\n[2026-04-01:19:11:48:INFO] Failed to pa...61\tvalidation-error:0.00000\n[4]\ttrain-error:0.00000\tvalidation-error:0.00000\n/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:503: UserWarning: [19:11:48] WARNING: /workspace/src/gbm/gbtree.cc:359: \n  Loading from a raw memory buffer (like pickle in Python, RDS in R) on a CPU-only\n  machine. Consider using `save_model/load_model` instead. See:\n\n    https://xgboost.readthedocs.io/en/latest/tutorials/saving_model.html\n\n  for more details about differences between saving model and serializing.  Changing `tree_method` to `hist`.\n  model = model[: best_iteration + 1]\n[2026-04-01:19:11:48:INFO] FINAL_MODEL_DEBUG: is_master=True, model_dir=/opt/ml/model\nFINAL_MODEL_DEBUG: is_master=True, model_dir=/opt/ml/model\n[2026-04-01:19:11:48:INFO] FINAL_MODEL_SAVE: Saving final model as master\nFINAL_MODEL_SAVE: Saving final model as master\n/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py:480: UserWarning: [19:11:48] WARNING: /workspace/src/c_api/c_api.cc:1427: Saving model in the UBJSON format as default.  You can use file extension: `json`, `ubj` or `deprecated` to choose between formats.\n  bst.save_model(model_location)\n')
+    where <function search at 0x7f11f9e60680> = re.search
=================== 3 failed, 42 passed in 357.53s (0:05:57) ===================__________________ TestValidScoring.test_execution_parameters __________________

self = <container.test_scoring.TestValidScoring object at 0x7f92d4618500>
docker_client = <docker.client.DockerClient object at 0x7f92d38fd0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
inference_resources = '/tmp/xgb-container-test-n8qucxal/inference'

def test_execution_parameters(self, docker_client, image_uri, inference_resources):
model_dir = _model_path(inference_resources, "mnist-xgb-model")
env = {"MAX_CONTENT_LENGTH": str(21 * 1024 ** 2)}
with ServingContainer(docker_client, image_uri, model_dir, env) as ctx:
resp = ctx.execution_parameters()
params = json.loads(resp.text)
assert params["BatchStrategy"] == "MULTI_RECORD"
assert params["MaxConcurrentTransforms"] == multiprocessing.cpu_count()
>       assert params["MaxPayloadInMB"] == 20
E       assert 21 == 20

xgboost/container/test_scoring.py:74: AssertionError
_____________________ TestValidScoring.test_csv_inference ______________________

self = <container.test_scoring.TestValidScoring object at 0x7f92d3553e30>
docker_client = <docker.client.DockerClient object at 0x7f92d38fd0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
inference_resources = '/tmp/xgb-container-test-n8qucxal/inference'

def test_csv_inference(self, docker_client, image_uri, inference_resources):
# mnist xgb model
responses = _send_requests(
docker_client, image_uri, inference_resources, "mnist-xgb-model", "text/csv",
["mnist-1.csv", "mnist-empty-cell.csv", "mnist-equal-dim.csv", "mnist-700.csv"],
)
_validate_response(responses[0], 1)
_validate_response(responses[1], 1)
_validate_response(responses[2], 1)
>       _validate_response(responses[3], 700)

xgboost/container/test_scoring.py:85:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

resp = <Response [200]>, expected_length = 700

def _validate_response(resp, expected_length):
assert resp.status_code == httplib.OK, resp.text
predicted = resp.text.split(",")
>       assert len(predicted) == expected_length
E       AssertionError: assert 1 == 700
E        +  where 1 = len(['3.0\n8.0\n6.0\n9.0\n6.0\n4.0\n5.0\n3.0\n8.0\n4.0\n5.0\n2.0\n3.0\n8.0\n4.0\n8.0\n1.0\n5.0\n0.0\n5.0\n9.0\n7.0\n4.0\n1.0\n3.0\n3.0\n0.0\n6.0\n2.0\n9.0\n9.0\n4.0\n1.0\n3.0\n6.0\n8.0\n0.0\n7.0\n7.0\n6.0\n8.0\n9.0\n0.0\n3.0\n8.0\n3.0\n7.0\n7.0\n5.0\n1.0\n4.0\n2.0\n2.0\n9.0\n8.0\n1.0\n1.0\n0.0\n6.0\n6.0\n5.0\n0.0\n1.0\n1.0\n7.0\n2.0\n7.0\n3.0\n1.0\n4.0\n0.0\n5.0\n0.0\n6.0\n8.0\n7.0\n6.0\n8.0\n2.0\n9.0\n4.0\n0.0\n6.0\n1.0\n9.0\n2.0\n6.0\n3.0\n8.0\n4.0\n1.0\n5.0\n6.0\n6.0\n1.0\n7.0\n2.0\n8.0\n6.0\n9.0\n7.0\n0.0\n9.0\n8.0\n6.0\n2.0\n8.0\n3.0\n6.0\n4.0\n9.0\n2.0\n8.0\n6.0\n8.0\n7.0\n8.0\n8.0\n6.0\n9.0\n7.0\n7.0\n6.0\n0.0\n3.0\n6.0\n7.0\n0.0\n9.0\n7.0\n1.0\n3.0\n6.0\n8.0\n9.0\n6.0\n1.0\n7.0\n5.0\n1.0\n3.0\n3.0\n5.0\n7.0\n9.0\n9.0\n6.0\n7.0\n3.0\n6.0\n1.0\n0.0\n4.0\n2.0\n4.0\n5.0\n0.0\n0.0\n1.0\n6.0\n6.0\n4.0\n7.0\n9.0\n4.0\n6.0\n5.0\n2.0\n6.0\n9.0\n8.0\n8.0\n8.0\n5.0\n9.0\n3.0\n8.0\n9.0\n1.0\n8.0\n8.0\n3.0\n4.0\n4.0\n3.0\n0.0\n1.0\n5.0\n4.0\n4.0\n1.0\n8.0\n0.0\n6.0\n1.0\n3.0\n1.0\n0.0\n5.0\n6.0\n0.0\n3.0\n5.0\n4.0\n9.0\n0.0\n3.0\n1.0\n0.0\n9.0\n3.0\n2.0\n8.0\n3.0\n3.0\n7.0\n4.0\n9.0\n2.0\n1.0\n6.0\n2.0\n1.0\n8.0\n1.0\n1.0\n9.0\n7.0\n9.0\n2.0\n2.0\n8.0\n1.0\n7.0\n7.0\n0.0\n1.0\n1.0\n8.0\n2...\n2.0\n7.0\n0.0\n7.0\n1.0\n4.0\n9.0\n7.0\n6.0\n5.0\n4.0\n1.0\n9.0\n2.0\n2.0\n0.0\n1.0\n2.0\n2.0\n0.0\n3.0\n1.0\n7.0\n5.0\n0.0\n4.0\n2.0\n7.0\n1.0\n9.0\n3.0\n0.0\n1.0\n6.0\n2.0\n2.0\n5.0\n1.0\n8.0\n3.0\n1.0\n4.0\n6.0\n2.0\n4.0\n8.0\n5.0\n2.0\n6.0\n4.0\n0.0\n8.0\n5.0\n3.0\n9.0\n3.0\n4.0\n0.0\n9.0\n7.0\n2.0\n8.0\n0.0\n8.0\n5.0\n0.0\n2.0\n9.0\n3.0\n8.0\n4.0\n8.0\n5.0\n0.0\n8.0\n7.0\n9.0\n2.0\n0.0\n5.0\n1.0\n0.0\n2.0\n9.0\n3.0\n2.0\n4.0\n8.0\n5.0\n1.0\n6.0\n8.0\n7.0\n3.0\n8.0\n4.0\n7.0\n9.0\n0.0\n3.0\n1.0\n7.0\n2.0\n4.0\n3.0\n0.0\n4.0\n2.0\n5.0\n5.0\n8.0\n2.0\n5.0\n8.0\n2.0\n4.0\n1.0\n9.0\n7.0\n6.0\n2.0\n1.0\n4.0\n6.0\n1.0\n0.0\n4.0\n6.0\n1.0\n6.0\n4.0\n5.0\n9.0\n8.0\n6.0\n8.0\n8.0\n6.0\n4.0\n1.0\n5.0\n5.0\n3.0\n8.0\n7.0\n4.0\n8.0\n6.0\n4.0\n6.0\n3.0\n6.0\n3.0\n9.0\n5.0\n4.0\n0.0\n0.0\n6.0\n7.0\n1.0\n6.0\n6.0\n9.0\n8.0\n3.0\n7.0\n0.0\n3.0\n0.0\n1.0\n2.0\n5.0\n8.0\n6.0\n4.0\n0.0\n0.0\n8.0\n2.0\n5.0\n5.0\n0.0\n6.0\n6.0\n1.0\n1.0\n8.0\n5.0\n5.0\n8.0\n1.0\n4.0\n0.0\n7.0\n4.0\n6.0\n3.0\n9.0\n3.0\n1.0\n5.0\n9.0\n7.0\n7.0\n6.0\n1.0\n7.0\n2.0\n6.0\n3.0\n3.0\n4.0\n2.0\n5.0\n2.0\n5.0\n1.0\n3.0\n3.0\n7.0\n1.0\n3.0\n0.0\n1.0\n1.0\n8.0\n3.0\n2.0\n5.0\n2.0\n3.0\n3.0\n4.0\n2.0\n6.0\n7.0\n2.0\n4.0\n'])

xgboost/container/test_scoring.py:57: AssertionError
____________________ TestValidScoring.test_libsvm_inference ____________________

self = <con

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 20
X-AI-Prompt: you can change the runner to use gpu fleet for container tests

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 19
X-AI-Prompt: _________________ TestValidScoring.test_binary_classification __________________

self = <container.test_scoring.TestValidScoring object at 0x7f92d3553380>
docker_client = <docker.client.DockerClient object at 0x7f92d38fd0d0>
image_uri = '404426647817.dkr.ecr.us-west-2.amazonaws.com/ci:xgboost-3.0.5-cpu-py310-cu126-ubuntu20.04-sagemaker-23865911659'
inference_resources = '/tmp/xgb-container-test-n8qucxal/inference'

def test_binary_classification(self, docker_client, image_uri, inference_resources):
>       responses = _send_requests(
docker_client, image_uri, inference_resources,
"diabetes-binary-xgb-model", "text/csv",
["diabetes_inference.csv"],
)

xgboost/container/test_scoring.py:124:
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _
xgboost/container/test_scoring.py:43: in _send_requests
with ServingContainer(docker_client, image_uri, model_dir, environment) as ctx:
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
xgboost/container/container_helper.py:152: in __enter__
self._wait_healthy()
_ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _ _

self = <container.container_helper.ServingContainer object at 0x7f92d297c0e0>

def _wait_healthy(self):
deadline = time.time() + SERVE_STARTUP_TIMEOUT
while time.time() < deadline:
self._container.reload()
if self._container.status != "running":
>               raise RuntimeError(
f"Container exited: {self._container.logs().decode()}"
)

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 79
X-AI-Prompt: show the output of tests 1-2 lines for validation. also run generate models script once per every test.

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 54
X-AI-Prompt: XGBoost version: 3.0.5
Downloading training data...
Traceback (most recent call last):
File "/work/test/xgboost/container/generate_models.py", line 85, in <module>
main()
File "/work/test/xgboost/container/generate_models.py", line 48, in main
download_s3_dir(s3, S3_BUCKET, S3_TRAINING_PREFIX, data_dir)
File "/work/test/xgboost/container/generate_models.py", line 30, in download_s3_dir
for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
File "/miniconda3/lib/python3.10/site-packages/botocore/paginate.py", line 255, in __iter__
response = self._make_request(current_kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/paginate.py", line 332, in _make_request
return self._method(**current_kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/client.py", line 357, in _api_call
return self._make_api_call(operation_name, kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/client.py", line 662, in _make_api_call
http, parsed_response = self._make_request(
File "/miniconda3/lib/python3.10/site-packages/botocore/client.py", line 682, in _make_request
return self._endpoint.make_request(operation_model, request_dict)
File "/miniconda3/lib/python3.10/site-packages/botocore/endpoint.py", line 102, in make_request
return self._send_request(request_dict, operation_model)
File "/miniconda3/lib/python3.10/site-packages/botocore/endpoint.py", line 132, in _send_request
request = self.create_request(request_dict, operation_model)
File "/miniconda3/lib/python3.10/site-packages/botocore/endpoint.py", line 115, in create_request
self._event_emitter.emit(event_name, request=request,
File "/miniconda3/lib/python3.10/site-packages/botocore/hooks.py", line 356, in emit
return self._emitter.emit(aliased_event_name, **kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/hooks.py", line 228, in emit
return self._emit(event_name, kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/hooks.py", line 211, in _emit
response = handler(**kwargs)
File "/miniconda3/lib/python3.10/site-packages/botocore/signers.py", line 90, in handler
return self.sign(operation_name, request)
File "/miniconda3/lib/python3.10/site-packages/botocore/signers.py", line 162, in sign
auth.add_auth(request)
File "/miniconda3/lib/python3.10/site-packages/botocore/auth.py", line 373, in add_auth
raise NoCredentialsError()
botocore.exceptions.NoCredentialsError: Unable to locate credentials

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 27
X-AI-Prompt: raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [19:48:10] /workspace/src/data/file_iterator.cc:27: Check failed: name_args.size() == 2 (1 vs. 2) : URI parameter `format` is required for loading text data: filename?format=csv
Stack trace:
[bt] (0) /tmp/codebuild-bb176bc9-d23a-41ff-afce-afb44cb732b9/output/src2228/src/be0852b1_3252_46e7_ab08_9ba08201f035/actions-runner/_work/deep-learning-containers/deep-learning-containers/.venv/lib64/python3.12/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f7716ca5e7c]
[bt] (1) /tmp/codebuild-bb176bc9-d23a-41ff-afce-afb44cb732b9/output/src2228/src/be0852b1_3252_46e7_ab08_9ba08201f035/actions-runner/_wo

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 22
X-AI-Prompt: FAILED xgboost/container/test_training.py::TestValidTraining::test_checkpoint_and_reload - AssertionError: assert 20 == 10
+  where 20 = len(['[0]\t', '[1]\t', '[2]\t', '[3]\t', '[4]\t', '[5]\t', ...])
+    where ['[0]\t', '[1]\t', '[2]\t', '[3]\t', '[4]\t', '[5]\t', ...] = <function findall at 0x7f5301860900>('\\[\\d+\\].*(?=.*train-error:.*)', '/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.\n  import pkg_resources\n[2026-04-01:19:54:21:INFO] Imported framework sagemaker_xgboost_container.training\n[2026-04-01:19:54:21:INFO] Failed to parse hyperparameter eval_metric value error to Json.\nReturning the value itself\n[2026-04-01:19:54:21:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.\nReturning the value itself\n[2026-04-01:19:54:21:INFO] Failed to parse hyperparameter tree_method value auto to Json.\nReturning the value itself\n[2026-04-01:19:54:21:INFO] Failed to parse hyperparameter normalize_type value tree
+      where <function findall at 0x7f5301860900> = re.findall

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 48
X-AI-Prompt: ==================================== PASSES ====================================
=========================== short test summary info ============================
PASSED xgboost/container/test_scoring.py::TestValidScoring::test_execution_parameters
PASSED xgboost/container/test_scoring.py::TestValidScoring::test_binary_classification
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_unsupported_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_empty_payload
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_feature_dimension
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_libsvm_payload_with_csv_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_csv_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_csv_payload_with_libsvm_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_libsvm_content_type
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_csv_inference - AssertionError: Unable to evaluate payload provided: [19:54:06] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (127 vs. 784) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f6d3331de7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f6d336ee7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f6d33704962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f6d3323196e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f6d540f002a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f6d540ef4a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f6d540efbbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f6d540fdc7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f6d540fd565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_libsvm_inference - AssertionError: Unable to evaluate payload provided: [19:54:10] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (127 vs. 785) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f30268f4e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f3026cc57a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f3026cdb962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f302680896e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f30476c802a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f30476c74a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f30476c7bbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f30476d5c7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f30476d5565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_recordio_protobuf_inference - AssertionError: Unable to evaluate payload provided: [19:54:14] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (127 vs. 784) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fc7f0cdce7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fc7f10ad7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fc7f10c3962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fc7f0bf096e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fc811ab202a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fc811ab14a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fc811ab1bbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fc811abfc7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fc811abf565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_csv_20mb_payload - AssertionError: Unable to evaluate payload provided: [19:54:24] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (127 vs. 784) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fe550f0ae7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fe5512db7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fe5512f1962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fe550e1e96e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fe571cde02a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fe571cdd4a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fe571cddbbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fe571cebc7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fe571ceb565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_accept_selectable_inference - assert 400 == <HTTPStatus.NOT_ACCEPTABLE: 406>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.NOT_ACCEPTABLE: 406> = httplib.NOT_ACCEPTABLE
==================== 5 failed, 9 passed in 68.12s (0:01:08) ====================
Error: Process completed with exit code 1.

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 24
X-AI-Prompt: invalid-data/
Folder
-
-
-
multi-csv/
Folder
-
-
-
multi-libsvm/
Folder
-
-
-
single-csv/
Folder
-
-
-
single-libsvm/

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 76
X-AI-Prompt: platform linux -- Python 3.12.12, pytest-9.0.2, pluggy-1.6.0 -- /tmp/codebuild-6fc874b1-6fd4-4a35-8ed0-785a8475b80c/output/src2838/src/3af37bd7_ab4b_4a23_b564_f9f48b0dc964/actions-runner/_work/deep-learning-containers/deep-learning-containers/.venv/bin/python3
cachedir: .pytest_cache
rootdir: /tmp/codebuild-6fc874b1-6fd4-4a35-8ed0-785a8475b80c/output/src2838/src/3af37bd7_ab4b_4a23_b564_f9f48b0dc964/actions-runner/_work/deep-learning-containers/deep-learning-containers
configfile: pyproject.toml
collecting ... collected 14 items

xgboost/container/test_scoring.py::TestValidScoring::test_execution_parameters PASSED [  7%]
xgboost/container/test_scoring.py::TestValidScoring::test_csv_inference FAILED [ 14%]
xgboost/container/test_scoring.py::TestValidScoring::test_libsvm_inference FAILED [ 21%]
xgboost/container/test_scoring.py::TestValidScoring::test_recordio_protobuf_inference FAILED [ 28%]
xgboost/container/test_scoring.py::TestValidScoring::test_binary_classification PASSED [ 35%]
xgboost/container/test_scoring.py::TestValidScoring::test_csv_20mb_payload PASSED [ 42%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_unsupported_content_type PASSED [ 50%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_empty_payload PASSED [ 57%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_feature_dimension FAILED [ 64%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_libsvm_payload_with_csv_content_type PASSED [ 71%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_csv_content_type PASSED [ 78%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_csv_payload_with_libsvm_content_type PASSED [ 85%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_libsvm_content_type PASSED [ 92%]
xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_accept_selectable_inference PASSED [100%]

=================================== FAILURES ===================================
_____________________ TestValidScoring.test_csv_inference ______________________
xgboost/container/test_scoring.py:85: in test_csv_inference
_validate_response(responses[2], 1)
xgboost/container/test_scoring.py:52: in _validate_response
assert resp.status_code == httplib.OK, resp.text
E   AssertionError: Unable to evaluate payload provided: [20:33:22] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
E     Stack trace:
E       [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fa4c11cae7c]
E       [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fa4c159b7a9]
E       [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fa4c15b1962]
E       [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fa4c10de96e]
E       [bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fa4e1f9d02a]
E       [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fa4e1f9c4a9]
E       [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fa4e1f9cbbd]
E       [bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fa4e1faac7b]
E       [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fa4e1faa565]
E
E
E   assert 400 == <HTTPStatus.OK: 200>
E    +  where 400 = <Response [400]>.status_code
E    +  and   <HTTPStatus.OK: 200> = httplib.OK
____________________ TestValidScoring.test_libsvm_inference ____________________
xgboost/container/test_scoring.py:102: in test_libsvm_inference
_validate_response(responses[0], 1)
xgboost/container/test_scoring.py:52: in _validate_response
assert resp.status_code == httplib.OK, resp.text
E   AssertionError: Unable to evaluate payload provided: [20:33:26] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
E     Stack trace:
E       [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f4e724dce7c]
E       [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f4e728ad7a9]
E       [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f4e728c3962]
E       [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f4e723f096e]
E       [bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f4e932b002a]
E       [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f4e932af4a9]
E       [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f4e932afbbd]
E       [bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f4e932bdc7b]
E       [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f4e932bd565]
E
E
E   assert 400 == <HTTPStatus.OK: 200>
E    +  where 400 = <Response [400]>.status_code
E    +  and   <HTTPStatus.OK: 200> = httplib.OK
______________ TestValidScoring.test_recordio_protobuf_inference _______________
xgboost/container/test_scoring.py:121: in test_recordio_protobuf_inference
_validate_response(responses[1], 1)
xgboost/container/test_scoring.py:52: in _validate_response
assert resp.status_code == httplib.OK, resp.text
E   AssertionError: Unable to evaluate payload provided: [20:33:30] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
E     Stack trace:
E       [bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f3ababace7c]
E       [bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f3abaf7d7a9]
E       [bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f3abaf93962]
E       [bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f3abaac096e]
E       [bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f3adb98102a]
E       [bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f3adb9804a9]
E       [bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f3adb980bbd]
E       [bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f3adb98ec7b]
E       [bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f3adb98e565]
E
E
E   assert 400 == <HTTPStatus.OK: 200>
E    +  where 400 = <Response [400]>.status_code
E    +  and   <HTTPStatus.OK: 200> = httplib.OK
______________ TestInvalidScoring.test_invalid_feature_dimension _______________
xgboost/container/test_scoring.py:190: in test_invalid_feature_dimension
assert responses[0].status_code == httplib.BAD_REQUEST
E   assert 200 == <HTTPStatus.BAD_REQUEST: 400>
E    +  where 200 = <Response [200]>.status_code
E    +  and   <HTTPStatus.BAD_REQUEST: 400> = httplib.BAD_REQUEST
==================================== PASSES ====================================
=========================== short test summary info ============================
PASSED xgboost/container/test_scoring.py::TestValidScoring::test_execution_parameters
PASSED xgboost/container/test_scoring.py::TestValidScoring::test_binary_classification
PASSED xgboost/container/test_scoring.py::TestValidScoring::test_csv_20mb_payload
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_unsupported_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_empty_payload
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_libsvm_payload_with_csv_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_csv_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_csv_payload_with_libsvm_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_payload_with_libsvm_content_type
PASSED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_accept_selectable_inference
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_csv_inference - AssertionError: Unable to evaluate payload provided: [20:33:22] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7fa4c11cae7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7fa4c159b7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7fa4c15b1962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7fa4c10de96e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7fa4e1f9d02a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7fa4e1f9c4a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7fa4e1f9cbbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7fa4e1faac7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7fa4e1faa565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_libsvm_inference - AssertionError: Unable to evaluate payload provided: [20:33:26] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f4e724dce7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f4e728ad7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f4e728c3962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f4e723f096e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f4e932b002a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f4e932af4a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f4e932afbbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f4e932bdc7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f4e932bd565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestValidScoring::test_recordio_protobuf_inference - AssertionError: Unable to evaluate payload provided: [20:33:30] /workspace/src/learner.cc:1483: Check failed: learner_model_param_.num_feature >= p_fmat->Info().num_col_ (784 vs. 785) : Number of columns does not match number of features in booster.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f3ababace7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x6777a9) [0x7f3abaf7d7a9]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d962) [0x7f3abaf93962]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterPredictFromDMatrix+0x2de) [0x7f3abaac096e]
[bt] (4) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f3adb98102a]
[bt] (5) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f3adb9804a9]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f3adb980bbd]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8c7b) [0x7f3adb98ec7b]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/_ctypes.cpython-310-x86_64-linux-gnu.so(+0x8565) [0x7f3adb98e565]

assert 400 == <HTTPStatus.OK: 200>
+  where 400 = <Response [400]>.status_code
+  and   <HTTPStatus.OK: 200> = httplib.OK
FAILED xgboost/container/test_scoring.py::TestInvalidScoring::test_invalid_feature_dimension - assert 200 == <HTTPStatus.BAD_REQUEST: 400>
+  where 200 = <Response [200]>.status_code
+  and   <HTTPStatus.BAD_REQUEST: 400> = httplib.BAD_REQUEST
=================== 4 failed, 10 passed in 65.56s (0:01:05) ========

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 84
X-AI-Prompt: can you try to fix this?

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 131
X-AI-Prompt: complete the to dos, we are training the models right cant we use the pkl files

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 92
X-AI-Prompt: <none>

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 32
X-AI-Prompt: FAILED xgboost/container/test_training.py::TestValidTraining::test_two_container_with_libsvm_data - AssertionError: No model files in master node model dir
assert 0 >= 1
+  where 0 = len([])
FAILED xgboost/container/test_training.py::TestValidTraining::test_two_container_with_libsvm_data_shardedbykey - AssertionError: No model files in master node model dir
assert 0 >= 1
+  where 0 = len([])
=================== 2 failed, 52 passed in 257.35s (0:04:17) ===================

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 52
X-AI-Prompt: debug it

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 70
X-AI-Prompt: can you check the test logi.c again and compare with git farm repo

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 29
X-AI-Prompt: FAILED xgboost/container/test_training.py::TestValidTraining::test_two_container_with_libsvm_data - AssertionError: No model files in master node model dir
assert 0 >= 1
+  where 0 = len([])
FAILED xgboost/container/test_training.py::TestValidTraining::test_two_container_with_libsvm_data_shardedbykey - AssertionError: No model files in master node model dir
assert 0 >= 1
+  where 0 = len([])

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 76
X-AI-Prompt: E     [2026-04-01:22:13:37:WARNING] Host algo-1 does not have validation data in the validation channel : {'ContentType': 'text/libsvm', 'S3DistributionType': 'FullyReplicated', 'TrainingInputMode': 'File'}. Will broadcast to cluster and this host algo-1 will not be used in distributed training. Please divide the validation data across instances properly. See https://docs.aws.amazon.com/sagemaker/latest/dg/xgboost.html#Instance-XGBoost-distributed-training-divide-data.

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 25
X-AI-Prompt: for each test can we show the output of the training/ inference ?

* AI changes made during Kiro-cli session
---
X-AI-Tool: Kiro-cli
X-AI-Handle-Time-Seconds: 73
X-AI-Prompt: [2026-04-02:18:59:16:INFO] Distributed node training with 2 hosts: ['algo-1', 'algo-2']
[2026-04-02:18:59:16:INFO] RabitTracker started, worker_args: {'dmlc_tracker_port': 9099, 'dmlc_tracker_uri': '10.5.5.2'}
[18:59:16] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:16]: Failed to verify.
[18:59:16] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [socket.h:79|18:59:16]: recv system error:Connection reset by peer
[18:59:16] Task 0 got rank 0
[2026-04-02:18:59:16:INFO] RabitTracker started, worker_args: {'dmlc_tracker_port': 9100, 'dmlc_tracker_uri': '10.5.5.2'}
[18:59:16] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:16]: Failed to verify.
[18:59:26] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:26]: Failed to verify.
[18:59:26] Task 0 got rank 0
TRAIN_JOB_DEBUG: Received is_master=True
[2026-04-02:18:59:26:INFO] TRAIN_JOB_DEBUG: Received is_master=True
[2026-04-02:18:59:26:INFO] Train matrix has 6513 rows and 127 columns
[2026-04-02:18:59:26:INFO] Validation matrix has 1611 rows
[2026-04-02:18:59:26:INFO] CALLBACK_SETUP_DEBUG: save_model_on_termination=false, is_master=True
[2026-04-02:18:59:26:INFO] CALLBACK_SKIPPING save_model_on_termination=false, is_master=True)
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [18:59:26] WARNING: /workspace/src/common/error_msg.cc:33: You have manually specified the `updater` parameter. The `tree_method` parameter will be ignored. Incorrect sequence of updaters will produce undefined behavior. For common uses, we recommend using `tree_method` parameter instead.
self.starting_round = model.num_boosted_rounds()
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [18:59:26] WARNING: /workspace/src/learner.cc:738:
Parameters: { "dsplit", "lambda_bias", "normalize_type", "one_drop", "predictor", "rate_drop", "sample_type", "sketch_eps", "skip_drop", "tweedie_variance_power" } are not used.

self.starting_round = model.num_boosted_rounds()
[2026-04-02:18:59:26:ERROR] Reporting training FAILURE
[2026-04-02:18:59:26:ERROR] framework error:
Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 367, in train_job
bst = xgb.train(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 729, in inner_f
return func(**kwargs)
File "/miniconda3/lib/python3.10/site-packages/xgboost/training.py", line 183, in train
bst.update(dtrain, iteration=i, fobj=obj)
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 2246, in update
_check_call(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 310, in _check_call
raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:26] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f691ac64e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f691b30e45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f691aff9573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f691affaa0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f691b04b33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f691ab74f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f694418802a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f69441874a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f6944187bbd]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_trainer.py", line 84, in train
entrypoint()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 102, in main
train(framework.training_env())
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 98, in train
run_algorithm_mode()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 64, in run_algorithm_mode
sagemaker_train(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 262, in sagemaker_train
distributed.rabit_run(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/distributed.py", line 100, in rabit_run
exec_fun(**args)
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 467, in train_job
raise exc.AlgorithmError(f"{exception_prefix}:\n {str(e)}")
sagemaker_algorithm_toolkit.exceptions.AlgorithmError: XGB train call failed with exception:
[18:59:26] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f691ac64e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f691b30e45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f691aff9573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f691affaa0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f691b04b33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f691ab74f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f694418802a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f69441874a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f6944187bbd]

XGB train call failed with exception:
[18:59:26] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f691ac64e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f691b30e45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f691aff9573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f691affaa0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f691b04b33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f691ab74f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f694418802a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f69441874a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f6944187bbd]

assert 1 == 0
FAILED xgboost/container/test_training.py::TestValidTraining::test_two_container_with_libsvm_data_shardedbykey - AssertionError: Container 1 failed:
/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_server.py:22: UserWarning: pkg_resources is deprecated as an API. See https://setuptools.pypa.io/en/latest/pkg_resources.html. The pkg_resources package is slated for removal as early as 2025-11-30. Refrain from using this package or pin to Setuptools<81.
import pkg_resources
[2026-04-02:18:59:30:INFO] Imported framework sagemaker_xgboost_container.training
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter eval_metric value error to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter predictor value cpu_predictor to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter tree_method value auto to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter normalize_type value tree to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter sample_type value uniform to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter booster value gbtree to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter objective value binary:logistic to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter updater value grow_colmaker,prune to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter process_type value default to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter dsplit value row to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] Failed to parse hyperparameter grow_policy value depthwise to Json.
Returning the value itself
[2026-04-02:18:59:30:INFO] No GPUs detected (normal if no gpus installed)
[2026-04-02:18:59:30:INFO] Running XGBoost Sagemaker in algorithm mode
[2026-04-02:18:59:30:INFO] Determined 0 GPU(s) available on the instance.
[2026-04-02:18:59:30:INFO] File path /opt/ml/input/data/train of input files
[2026-04-02:18:59:30:INFO] Making smlinks from folder /opt/ml/input/data/train to folder /tmp/sagemaker_xgboost_input_data
[2026-04-02:18:59:30:INFO] creating symlink between Path /opt/ml/input/data/train/agaricus.libsvm.train and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.train-2144044128268352997
[2026-04-02:18:59:30:INFO] files path: /tmp/sagemaker_xgboost_input_data
[2026-04-02:18:59:30:INFO] File path /opt/ml/input/data/validation of input files
[2026-04-02:18:59:30:INFO] Making smlinks from folder /opt/ml/input/data/validation to folder /tmp/sagemaker_xgboost_input_data
[2026-04-02:18:59:30:INFO] creating symlink between Path /opt/ml/input/data/validation/agaricus.libsvm.test and destination /tmp/sagemaker_xgboost_input_data/agaricus.libsvm.test4869311951774638332
[2026-04-02:18:59:30:INFO] files path: /tmp/sagemaker_xgboost_input_data
[2026-04-02:18:59:30:INFO] Distributed node training with 2 hosts: ['algo-1', 'algo-2']
[2026-04-02:18:59:30:INFO] RabitTracker started, worker_args: {'dmlc_tracker_port': 9099, 'dmlc_tracker_uri': '10.5.5.2'}
[18:59:30] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:30]: Failed to verify.
[18:59:30] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [socket.h:79|18:59:30]: recv system error:Connection reset by peer
[18:59:30] Task 0 got rank 0
[2026-04-02:18:59:31:INFO] RabitTracker started, worker_args: {'dmlc_tracker_port': 9100, 'dmlc_tracker_uri': '10.5.5.2'}
[18:59:31] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:31]: Failed to verify.
[18:59:40] WARNING: /workspace/src/collective/tracker.cc:301: Failed to initialize worker proxy.
- [protocol.h:57|18:59:40]: Failed to verify.
[18:59:40] Task 0 got rank 0
TRAIN_JOB_DEBUG: Received is_master=True
[2026-04-02:18:59:40:INFO] TRAIN_JOB_DEBUG: Received is_master=True
[2026-04-02:18:59:40:INFO] Train matrix has 6513 rows and 127 columns
[2026-04-02:18:59:40:INFO] Validation matrix has 1611 rows
[2026-04-02:18:59:40:INFO] CALLBACK_SETUP_DEBUG: save_model_on_termination=false, is_master=True
[2026-04-02:18:59:40:INFO] CALLBACK_SKIPPING save_model_on_termination=false, is_master=True)
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [18:59:40] WARNING: /workspace/src/common/error_msg.cc:33: You have manually specified the `updater` parameter. The `tree_method` parameter will be ignored. Incorrect sequence of updaters will produce undefined behavior. For common uses, we recommend using `tree_method` parameter instead.
self.starting_round = model.num_boosted_rounds()
/miniconda3/lib/python3.10/site-packages/xgboost/callback.py:386: UserWarning: [18:59:40] WARNING: /workspace/src/learner.cc:738:
Parameters: { "dsplit", "lambda_bias", "normalize_type", "one_drop", "predictor", "rate_drop", "sample_type", "sketch_eps", "skip_drop", "tweedie_variance_power" } are not used.

self.starting_round = model.num_boosted_rounds()
[2026-04-02:18:59:41:ERROR] Reporting training FAILURE
[2026-04-02:18:59:41:ERROR] framework error:
Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 367, in train_job
bst = xgb.train(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 729, in inner_f
return func(**kwargs)
File "/miniconda3/lib/python3.10/site-packages/xgboost/training.py", line 183, in train
bst.update(dtrain, iteration=i, fobj=obj)
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 2246, in update
_check_call(
File "/miniconda3/lib/python3.10/site-packages/xgboost/core.py", line 310, in _check_call
raise XGBoostError(py_str(_LIB.XGBGetLastError()))
xgboost.core.XGBoostError: [18:59:40] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f3201955e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f3201fff45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f3201cea573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f3201ceba0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f3201d3c33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f3201865f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f322ae6e02a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f322ae6d4a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f322ae6dbbd]

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
File "/miniconda3/lib/python3.10/site-packages/sagemaker_containers/_trainer.py", line 84, in train
entrypoint()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 102, in main
train(framework.training_env())
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 98, in train
run_algorithm_mode()
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/training.py", line 64, in run_algorithm_mode
sagemaker_train(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 262, in sagemaker_train
distributed.rabit_run(
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/distributed.py", line 100, in rabit_run
exec_fun(**args)
File "/miniconda3/lib/python3.10/site-packages/sagemaker_xgboost_container/algorithm_mode/train.py", line 467, in train_job
raise exc.AlgorithmError(f"{exception_prefix}:\n {str(e)}")
sagemaker_algorithm_toolkit.exceptions.AlgorithmError: XGB train call failed with exception:
[18:59:40] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f3201955e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f3201fff45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f3201cea573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f3201ceba0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f3201d3c33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f3201865f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f322ae6e02a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f322ae6d4a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f322ae6dbbd]

XGB train call failed with exception:
[18:59:40] /workspace/src/tree/updater_colmaker.cc:100: Updater `grow_colmaker` or `exact` tree method doesn't support distributed training.
Stack trace:
[bt] (0) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x2a6e7c) [0x7f3201955e7c]
[bt] (1) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x95045c) [0x7f3201fff45c]
[bt] (2) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63b573) [0x7f3201cea573]
[bt] (3) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x63ca0d) [0x7f3201ceba0d]
[bt] (4) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(+0x68d33e) [0x7f3201d3c33e]
[bt] (5) /miniconda3/lib/python3.10/site-packages/xgboost/lib/libxgboost.so(XGBoosterUpdateOneIter+0x77) [0x7f3201865f57]
[bt] (6) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x702a) [0x7f322ae6e02a]
[bt] (7) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(+0x64a9) [0x7f322ae6d4a9]
[bt] (8) /miniconda3/lib/python3.10/lib-dynload/../../libffi.so.8(ffi_call+0xdd) [0x7f322ae6dbbd]

assert 1 == 0

* revert on push
---
 .../workflows/release-sagemaker-xgboost.yml   |  46 +-
 .../sagemaker-xgboost-integ-tests.yml         | 202 +++++++
 test/xgboost/container/conftest.py            |  65 ++
 test/xgboost/container/container_helper.py    | 300 ++++++++++
 test/xgboost/container/generate_models.py     | 110 ++++
 .../xgboost/container/test_batch_transform.py | 129 ++++
 test/xgboost/container/test_scoring.py        | 248 ++++++++
 test/xgboost/container/test_training.py       | 562 ++++++++++++++++++
 8 files changed, 1622 insertions(+), 40 deletions(-)
 create mode 100644 .github/workflows/sagemaker-xgboost-integ-tests.yml
 create mode 100644 test/xgboost/container/conftest.py
 create mode 100644 test/xgboost/container/container_helper.py
 create mode 100644 test/xgboost/container/generate_models.py
 create mode 100644 test/xgboost/container/test_batch_transform.py
 create mode 100644 test/xgboost/container/test_scoring.py
 create mode 100644 test/xgboost/container/test_training.py

diff --git a/.github/workflows/release-sagemaker-xgboost.yml b/.github/workflows/release-sagemaker-xgboost.yml
index 0acfb719df6a..c82294dd0d44 100644
--- a/.github/workflows/release-sagemaker-xgboost.yml
+++ b/.github/workflows/release-sagemaker-xgboost.yml
@@ -148,47 +148,13 @@ jobs:
       framework: ${{ needs.load-config.outputs.framework }}
       framework-version: ${{ needs.load-config.outputs.framework-version }}
 
-  benchmark-test:
+  xgboost-tests:
     needs: [build-image, load-config]
     if: success()
-    timeout-minutes: 150
-    strategy:
-      fail-fast: false
-      matrix:
-        test-module:
-          - test_training_objective
-          - test_training_tree_method
-          - test_training_max_depth
-          - test_training_num_round
-          - test_training_data_size
-          - test_training_instance_type
-          - test_training_content_type
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:default-runner
-        buildspec-override:true
-    concurrency:
-      group: ${{ github.workflow }}-benchmark-${{ matrix.test-module }}-${{ github.run_id }}
-      cancel-in-progress: true
-    steps:
-      - name: Checkout DLC source
-        uses: actions/checkout@v5
-
-      - name: Install test dependencies
-        run: |
-          uv venv --python 3.12
-          source .venv/bin/activate
-          uv pip install -r test/requirements.txt
-          uv pip install -r test/xgboost/requirements.txt
+    uses: ./.github/workflows/sagemaker-xgboost-integ-tests.yml
+    with:
+      image-uri: ${{ needs.build-image.outputs.ci-image }}
+      aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+      aws-region: ${{ vars.AWS_REGION }}
 
-      - name: Run ${{ matrix.test-module }}
-        run: |
-          source .venv/bin/activate
-          cd test/
-          python3 -m pytest -vs -rA \
-            --image-uri ${{ needs.build-image.outputs.ci-image }} \
-            xgboost/benchmarks/${{ matrix.test-module }}.py
-
-  # TODO: Add integration-test job once integ tests are implemented
-  # TODO: Add container-test job once container tests are implemented
   # TODO: Add generate-release-spec and release-image jobs when release is ready
diff --git a/.github/workflows/sagemaker-xgboost-integ-tests.yml b/.github/workflows/sagemaker-xgboost-integ-tests.yml
new file mode 100644
index 000000000000..76bb4ff4b430
--- /dev/null
+++ b/.github/workflows/sagemaker-xgboost-integ-tests.yml
@@ -0,0 +1,202 @@
+name: Reusable XGBoost SageMaker Integration Tests
+
+permissions:
+  contents: read
+
+on:
+  workflow_call:
+    inputs:
+      image-uri:
+        description: 'Image URI to test'
+        required: true
+        type: string
+      aws-account-id:
+        description: 'AWS account ID for ECR authentication'
+        required: true
+        type: string
+      aws-region:
+        description: 'AWS region for ECR authentication'
+        required: true
+        type: string
+
+env:
+  FORCE_COLOR: "1"
+
+jobs:
+  # ===========================================================================
+  # Generate inference models inside the container (ensures version compat)
+  # ===========================================================================
+  generate-models:
+    timeout-minutes: 15
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:default-runner
+        buildspec-override:true
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: Install dependencies
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install xgboost==3.0.5 boto3 numpy
+
+      - name: Generate and upload models
+        run: |
+          source .venv/bin/activate
+          python3 test/xgboost/container/generate_models.py
+
+  # ===========================================================================
+  # Container tests — training (no model dependency)
+  # ===========================================================================
+  container-test-training:
+    timeout-minutes: 90
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+        buildspec-override:true
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ inputs.aws-account-id }}
+          aws-region: ${{ inputs.aws-region }}
+          image-uri: ${{ inputs.image-uri }}
+
+      - name: Pull image
+        run: docker pull ${{ inputs.image-uri }}
+
+      - name: Install test dependencies
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install -r test/requirements.txt docker pytest boto3 requests
+
+      - name: Run training container tests
+        run: |
+          source .venv/bin/activate
+          cd test/
+          python3 -m pytest -v --tb=short -rA --log-cli-level=INFO \
+            --image ${{ inputs.image-uri }} \
+            xgboost/container/test_training.py
+
+  # ===========================================================================
+  # Container tests — scoring (depends on generate-models)
+  # ===========================================================================
+  container-test-scoring:
+    needs: [generate-models]
+    timeout-minutes: 60
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+        buildspec-override:true
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ inputs.aws-account-id }}
+          aws-region: ${{ inputs.aws-region }}
+          image-uri: ${{ inputs.image-uri }}
+
+      - name: Pull image
+        run: docker pull ${{ inputs.image-uri }}
+
+      - name: Install test dependencies
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install -r test/requirements.txt docker pytest boto3 requests
+
+      - name: Run scoring container tests
+        run: |
+          source .venv/bin/activate
+          cd test/
+          python3 -m pytest -v --tb=short -rA --log-cli-level=INFO \
+            --image ${{ inputs.image-uri }} \
+            xgboost/container/test_scoring.py
+
+  # ===========================================================================
+  # Container tests — batch transform (depends on generate-models)
+  # ===========================================================================
+  container-test-batch-transform:
+    needs: [generate-models]
+    timeout-minutes: 60
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-g6xl-runner
+        buildspec-override:true
+    steps:
+      - name: Checkout DLC source
+        uses: actions/checkout@v5
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ inputs.aws-account-id }}
+          aws-region: ${{ inputs.aws-region }}
+          image-uri: ${{ inputs.image-uri }}
+
+      - name: Pull image
+        run: docker pull ${{ inputs.image-uri }}
+
+      - name: Install test dependencies
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install -r test/requirements.txt docker pytest boto3 requests
+
+      - name: Run batch transform container tests
+        run: |
+          source .venv/bin/activate
+          cd test/
+          python3 -m pytest -v --tb=short -rA --log-cli-level=INFO \
+            --image ${{ inputs.image-uri }} \
+            xgboost/container/test_batch_transform.py
+
+  # TODO: Add integration-test job (upstream sagemaker-xgboost-container local mode tests)
+
+  # ===========================================================================
+  # Benchmark tests (SageMaker training jobs) — commented out pending validation
+  # ===========================================================================
+  # benchmark-test:
+  #   timeout-minutes: 150
+  #   strategy:
+  #     fail-fast: false
+  #     matrix:
+  #       test-module:
+  #         - test_training_objective
+  #         - test_training_tree_method
+  #         - test_training_max_depth
+  #         - test_training_num_round
+  #         - test_training_data_size
+  #         - test_training_instance_type
+  #         - test_training_content_type
+  #   runs-on:
+  #     - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+  #       fleet:x86-g6xl-runner
+  #       buildspec-override:true
+  #   steps:
+  #     - name: Checkout DLC source
+  #       uses: actions/checkout@v5
+  #
+  #     - name: Install test dependencies
+  #       run: |
+  #         uv venv --python 3.12
+  #         source .venv/bin/activate
+  #         uv pip install -r test/requirements.txt
+  #         uv pip install -r test/xgboost/requirements.txt
+  #
+  #     - name: Run ${{ matrix.test-module }}
+  #       run: |
+  #         source .venv/bin/activate
+  #         cd test/
+  #         python3 -m pytest -v --tb=short -rA --log-cli-level=INFO \
+  #           --image-uri ${{ inputs.image-uri }} \
+  #           xgboost/benchmarks/${{ matrix.test-module }}.py
diff --git a/test/xgboost/container/conftest.py b/test/xgboost/container/conftest.py
new file mode 100644
index 000000000000..5ab580730f8e
--- /dev/null
+++ b/test/xgboost/container/conftest.py
@@ -0,0 +1,65 @@
+"""Pytest fixtures for XGBoost container tests.
+
+Provides:
+- --image flag for the container image URI
+- Session-scoped S3 resource download
+- Docker client fixture
+"""
+
+import logging
+import os
+import tempfile
+
+import boto3
+import docker
+import pytest
+
+LOGGER = logging.getLogger(__name__)
+
+S3_BUCKET = "dlc-cicd-models"
+S3_PREFIX = "xgboost/container_test_resources"
+
+
+def pytest_addoption(parser):
+    parser.addoption("--image", required=True, help="Docker image URI to test")
+
+
+@pytest.fixture(scope="session")
+def image_uri(request):
+    return request.config.getoption("--image")
+
+
+@pytest.fixture(scope="session")
+def docker_client():
+    return docker.from_env()
+
+
+@pytest.fixture(scope="session")
+def test_resources():
+    """Download training/ and inference/ from S3 once per session."""
+    tmpdir = tempfile.mkdtemp(prefix="xgb-container-test-")
+    s3 = boto3.client("s3")
+    paginator = s3.get_paginator("list_objects_v2")
+
+    for page in paginator.paginate(Bucket=S3_BUCKET, Prefix=S3_PREFIX):
+        for obj in page.get("Contents", []):
+            key = obj["Key"]
+            rel = os.path.relpath(key, S3_PREFIX)
+            if rel == ".":
+                continue
+            dest = os.path.join(tmpdir, rel)
+            os.makedirs(os.path.dirname(dest), exist_ok=True)
+            LOGGER.info("Downloading s3://%s/%s -> %s", S3_BUCKET, key, dest)
+            s3.download_file(S3_BUCKET, key, dest)
+
+    return tmpdir
+
+
+@pytest.fixture(scope="session")
+def training_resources(test_resources):
+    return os.path.join(test_resources, "training")
+
+
+@pytest.fixture(scope="session")
+def inference_resources(test_resources):
+    return os.path.join(test_resources, "inference")
diff --git a/test/xgboost/container/container_helper.py b/test/xgboost/container/container_helper.py
new file mode 100644
index 000000000000..d06efa7557d5
--- /dev/null
+++ b/test/xgboost/container/container_helper.py
@@ -0,0 +1,300 @@
+"""Container helper — replaces ai_algorithms_container_tests.
+
+Creates /opt/ml/ directory structure in temp dirs, writes config JSON files,
+mounts volumes, and runs the container via docker-py.
+
+Training mode: run container to completion, return exit code + logs + model files.
+Serving mode:  start container, poll health check, send HTTP requests.
+"""
+
+import json
+import logging
+import os
+import shutil
+import tempfile
+import time
+
+import requests
+
+import docker.types
+
+LOGGER = logging.getLogger(__name__)
+
+TRAIN_TIMEOUT = 300
+SERVE_STARTUP_TIMEOUT = 120
+HEALTH_CHECK_INTERVAL = 2
+SERVE_PORT = 8080
+
+
+# ---------------------------------------------------------------------------
+# /opt/ml layout helpers
+# ---------------------------------------------------------------------------
+
+def _create_opt_ml(tmpdir):
+    """Create the /opt/ml directory tree inside *tmpdir* and return paths dict."""
+    paths = {
+        "input_config": os.path.join(tmpdir, "input", "config"),
+        "input_train": os.path.join(tmpdir, "input", "data", "train"),
+        "input_validation": os.path.join(tmpdir, "input", "data", "validation"),
+        "model": os.path.join(tmpdir, "model"),
+        "output": os.path.join(tmpdir, "output"),
+        "checkpoints": os.path.join(tmpdir, "checkpoints"),
+    }
+    for p in paths.values():
+        os.makedirs(p, exist_ok=True)
+    return paths
+
+
+def _write_configs(config_dir, hyperparameters, inputdataconfig, resourceconfig,
+                   checkpointconfig=None):
+    with open(os.path.join(config_dir, "hyperparameters.json"), "w") as f:
+        json.dump(hyperparameters, f)
+    with open(os.path.join(config_dir, "inputdataconfig.json"), "w") as f:
+        json.dump(inputdataconfig, f)
+    with open(os.path.join(config_dir, "resourceconfig.json"), "w") as f:
+        json.dump(resourceconfig, f)
+    if checkpointconfig is not None:
+        with open(os.path.join(config_dir, "checkpointconfig.json"), "w") as f:
+            json.dump(checkpointconfig, f)
+
+
+def _copy_files(src_files, dest_dir):
+    """Copy a list of files (or all files in a directory) into *dest_dir*."""
+    for src in src_files:
+        if os.path.isdir(src):
+            for fname in os.listdir(src):
+                shutil.copy2(os.path.join(src, fname), dest_dir)
+        else:
+            shutil.copy2(src, dest_dir)
+
+
+# ---------------------------------------------------------------------------
+# Training
+# ---------------------------------------------------------------------------
+
+def run_training(docker_client, image_uri, hyperparameters, inputdataconfig,
+                 resourceconfig, training_files, validation_files=None,
+                 checkpointconfig=None, environment=None, timeout=TRAIN_TIMEOUT):
+    """Run a training container and return (exit_code, logs, model_files, paths).
+
+    *paths* is the dict returned by ``_create_opt_ml`` so callers can inspect
+    checkpoints, model dir, etc.
+    """
+    tmpdir = tempfile.mkdtemp(prefix="xgb-train-")
+    paths = _create_opt_ml(tmpdir)
+
+    _write_configs(paths["input_config"], hyperparameters, inputdataconfig,
+                   resourceconfig, checkpointconfig)
+    _copy_files(training_files, paths["input_train"])
+    if validation_files:
+        _copy_files(validation_files, paths["input_validation"])
+
+    volumes = {tmpdir: {"bind": "/opt/ml", "mode": "rw"}}
+    env = environment.copy() if environment else {}
+
+    container = docker_client.containers.run(
+        image_uri,
+        command="train",
+        volumes=volumes,
+        environment=env,
+        detach=True,
+    )
+
+    try:
+        result = container.wait(timeout=timeout)
+        exit_code = result.get("StatusCode", -1)
+    except Exception:
+        LOGGER.warning("Training did not finish within %ss", timeout)
+        exit_code = -1
+    finally:
+        logs = container.logs().decode("utf-8", errors="replace")
+        LOGGER.info("Container logs:\n%s", logs)
+        container.remove(force=True)
+
+    model_files = [f for f in os.listdir(paths["model"]) if "model" in f]
+    return exit_code, logs, model_files, paths
+
+
+def run_distributed_training(docker_client, image_uri, hyperparameters, inputdataconfig,
+                             resourceconfigs, training_files, validation_files=None,
+                             timeout=TRAIN_TIMEOUT):
+    """Run multi-container distributed training. Returns list of (exit_code, logs, paths)."""
+    hosts = [rc["current_host"] for rc in resourceconfigs]
+    network_name = "xgb-test-network"
+    subnet = "10.5.5.0/24"
+    base_ip = 2
+
+    # Create docker network
+    try:
+        network = docker_client.networks.get(network_name)
+        network.remove()
+    except Exception:
+        pass
+    ipam_pool = docker.types.IPAMPool(subnet=subnet)
+    ipam_config = docker.types.IPAMConfig(pool_configs=[ipam_pool])
+    network = docker_client.networks.create(network_name, driver="bridge", ipam=ipam_config)
+
+    containers = []
+    all_paths = []
+    try:
+        host_ips = {h: f"10.5.5.{base_ip + i}" for i, h in enumerate(hosts)}
+
+        for i, rc in enumerate(resourceconfigs):
+            tmpdir = tempfile.mkdtemp(prefix=f"xgb-dist-{i}-")
+            paths = _create_opt_ml(tmpdir)
+            _write_configs(paths["input_config"], hyperparameters, inputdataconfig, rc)
+            _copy_files(training_files, paths["input_train"])
+            if validation_files:
+                _copy_files(validation_files, paths["input_validation"])
+            all_paths.append(paths)
+
+            cur_host = rc["current_host"]
+            # Each container only needs extra_hosts for the OTHER hosts
+            other_hosts = {h: ip for h, ip in host_ips.items() if h != cur_host}
+            volumes = {tmpdir: {"bind": "/opt/ml", "mode": "rw"}}
+            env = {
+                "CURRENT_HOST": cur_host,
+                "HOSTS": ",".join(hosts),
+            }
+
+            # Use low-level API to assign specific IP on the network
+            networking_config = docker_client.api.create_networking_config({
+                network_name: docker_client.api.create_endpoint_config(
+                    ipv4_address=host_ips[cur_host],
+                )
+            })
+            host_config = docker_client.api.create_host_config(
+                binds={tmpdir: {"bind": "/opt/ml", "mode": "rw"}},
+                extra_hosts=other_hosts,
+            )
+            cid = docker_client.api.create_container(
+                image_uri,
+                command="train",
+                hostname=cur_host,
+                environment=[f"{k}={v}" for k, v in env.items()],
+                host_config=host_config,
+                networking_config=networking_config,
+            )
+            docker_client.api.start(cid)
+            container = docker_client.containers.get(cid["Id"])
+            containers.append(container)
+
+        # Wait for all containers
+        results = []
+        for container in containers:
+            try:
+                result = container.wait(timeout=timeout)
+                exit_code = result.get("StatusCode", -1)
+            except Exception:
+                exit_code = -1
+            logs = container.logs().decode("utf-8", errors="replace")
+            results.append((exit_code, logs))
+    finally:
+        for c in containers:
+            try:
+                c.remove(force=True)
+            except Exception:
+                pass
+        try:
+            network.remove()
+        except Exception:
+            pass
+
+    return [(r[0], r[1], all_paths[i]) for i, r in enumerate(results)]
+
+
+# ---------------------------------------------------------------------------
+# Serving (inference / batch transform)
+# ---------------------------------------------------------------------------
+
+class ServingContainer:
+    """Context manager that starts a serving container and exposes HTTP helpers."""
+
+    def __init__(self, docker_client, image_uri, model_dir, environment=None):
+        self._client = docker_client
+        self._image = image_uri
+        self._model_dir = model_dir
+        self._env = environment or {}
+        self._container = None
+        self._host_port = None
+
+    # -- lifecycle -----------------------------------------------------------
+
+    def __enter__(self):
+        tmpdir = tempfile.mkdtemp(prefix="xgb-serve-")
+        self._opt_ml = tmpdir
+        paths = _create_opt_ml(tmpdir)
+        # Copy model files
+        _copy_files([self._model_dir], paths["model"])
+        _write_configs(paths["input_config"], {}, {}, {"current_host": "algo-1", "hosts": ["algo-1"]})
+
+        volumes = {tmpdir: {"bind": "/opt/ml", "mode": "rw"}}
+        env = dict(self._env)
+
+        self._container = self._client.containers.run(
+            self._image,
+            command="serve",
+            volumes=volumes,
+            environment=env,
+            ports={f"{SERVE_PORT}/tcp": None},
+            detach=True,
+        )
+        self._wait_healthy()
+        return self
+
+    def __exit__(self, *exc):
+        if self._container:
+            logs = self._container.logs().decode("utf-8", errors="replace")
+            LOGGER.info("Serving container logs:\n%s", logs)
+            self._container.remove(force=True)
+        if self._opt_ml:
+            shutil.rmtree(self._opt_ml, ignore_errors=True)
+
+    # -- health check --------------------------------------------------------
+
+    def _wait_healthy(self):
+        deadline = time.time() + SERVE_STARTUP_TIMEOUT
+        while time.time() < deadline:
+            self._container.reload()
+            if self._container.status != "running":
+                raise RuntimeError(
+                    f"Container exited: {self._container.logs().decode()}"
+                )
+            try:
+                resp = requests.get(self._url("/ping"), timeout=2)
+                if resp.status_code == 200:
+                    LOGGER.info("Serving container healthy")
+                    return
+            except (requests.ConnectionError, RuntimeError):
+                pass
+            time.sleep(HEALTH_CHECK_INTERVAL)
+        raise TimeoutError("Serving container did not become healthy")
+
+    # -- HTTP helpers --------------------------------------------------------
+
+    def _url(self, path):
+        self._container.reload()
+        port_map = self._container.ports.get(f"{SERVE_PORT}/tcp")
+        if not port_map:
+            raise RuntimeError("No port mapping found")
+        self._host_port = int(port_map[0]["HostPort"])
+        return f"http://localhost:{self._host_port}{path}"
+
+    def ping(self):
+        return requests.get(self._url("/ping"), timeout=5)
+
+    def invocations(self, data, content_type, accept=None):
+        headers = {"Content-Type": content_type}
+        if accept:
+            headers["Accept"] = accept
+        return requests.post(
+            self._url("/invocations"), data=data, headers=headers, timeout=60
+        )
+
+    def execution_parameters(self):
+        return requests.get(self._url("/execution-parameters"), timeout=5)
+
+    def get_logs(self):
+        if self._container:
+            return self._container.logs().decode("utf-8", errors="replace")
+        return ""
diff --git a/test/xgboost/container/generate_models.py b/test/xgboost/container/generate_models.py
new file mode 100644
index 000000000000..2630192ba1c2
--- /dev/null
+++ b/test/xgboost/container/generate_models.py
@@ -0,0 +1,110 @@
+#!/usr/bin/env python3
+"""Generate XGBoost 3.0.5-compatible inference models and upload to S3.
+
+Uses inference input data to create models with matching feature dimensions.
+This is valid for container tests — we're testing the container's ability to
+load models and serve predictions, not model accuracy.
+
+Run on CI host with: pip install xgboost==3.0.5 boto3 numpy
+"""
+
+import os
+import pickle
+import tempfile
+
+import boto3
+import numpy as np
+import xgboost as xgb
+
+S3_BUCKET = "dlc-cicd-models"
+S3_PREFIX = "xgboost/container_test_resources/inference/models"
+S3_INPUT_PREFIX = "xgboost/container_test_resources/inference/input"
+S3_TRAINING_PREFIX = "xgboost/container_test_resources/training/data"
+
+
+def download_s3_dir(s3, bucket, prefix, local_dir):
+    paginator = s3.get_paginator("list_objects_v2")
+    for page in paginator.paginate(Bucket=bucket, Prefix=prefix):
+        for obj in page.get("Contents", []):
+            key = obj["Key"]
+            rel = os.path.relpath(key, prefix)
+            if rel == ".":
+                continue
+            dest = os.path.join(local_dir, rel)
+            os.makedirs(os.path.dirname(dest), exist_ok=True)
+            s3.download_file(bucket, key, dest)
+
+
+def main():
+    out_dir = tempfile.mkdtemp(prefix="xgb-models-")
+    input_dir = tempfile.mkdtemp(prefix="xgb-input-")
+    train_dir = tempfile.mkdtemp(prefix="xgb-train-")
+    s3 = boto3.client("s3")
+
+    print(f"XGBoost version: {xgb.__version__}")
+    print("Downloading inference input data...")
+    download_s3_dir(s3, S3_BUCKET, S3_INPUT_PREFIX, input_dir)
+    print("Downloading training data...")
+    download_s3_dir(s3, S3_BUCKET, S3_TRAINING_PREFIX, train_dir)
+
+    # --- mnist-xgb-model ---
+    # mnist-700.csv: first column is label, remaining are features
+    # libsvm files use 1-based indexing with max index 785, so set num_feature=785
+    # to ensure model accepts all inference input formats
+    print("Generating mnist-xgb-model...")
+    mnist_data = np.genfromtxt(os.path.join(input_dir, "mnist-700.csv"), delimiter=",")
+    labels = mnist_data[:, 0]
+    features = mnist_data[:, 1:]
+    n_features = 785  # max feature index in libsvm files
+    # Pad features to n_features if needed
+    if features.shape[1] < n_features:
+        pad = np.zeros((features.shape[0], n_features - features.shape[1]))
+        features = np.concatenate([features, pad], axis=1)
+    dtrain = xgb.DMatrix(features, label=labels)
+    bst = xgb.train({"objective": "multi:softmax", "num_class": 10, "max_depth": 6},
+                     dtrain, 10)
+    bst.save_model(os.path.join(out_dir, "mnist-xgb-model"))
+    pickle.dump(bst, open(os.path.join(out_dir, "mnist-pkl-model"), "wb"))
+    print(f"  {features.shape[0]} rows x {features.shape[1]} features")
+
+    # --- diabetes-binary-xgb-model ---
+    print("Generating diabetes-binary-xgb-model...")
+    diabetes_data = np.genfromtxt(os.path.join(input_dir, "diabetes_inference.csv"), delimiter=",")
+    labels_d = np.random.randint(0, 2, size=diabetes_data.shape[0]).astype(float)
+    dtrain_d = xgb.DMatrix(diabetes_data, label=labels_d)
+    bst_d = xgb.train({"objective": "binary:hinge", "max_depth": 6}, dtrain_d, 10)
+    bst_d.save_model(os.path.join(out_dir, "diabetes-binary-xgb-model"))
+    print(f"  {diabetes_data.shape[0]} rows x {diabetes_data.shape[1]} cols")
+
+    # --- insurance-xgb-model (from actual training CSV) ---
+    print("Generating insurance-xgb-model...")
+    csv_train = np.genfromtxt(os.path.join(train_dir, "single-csv", "train.csv"), delimiter=",")
+    dtrain_ins = xgb.DMatrix(csv_train[:, 1:], label=csv_train[:, 0])
+    bst_ins = xgb.train({"objective": "reg:squarederror", "max_depth": 6}, dtrain_ins, 10)
+    bst_ins.save_model(os.path.join(out_dir, "insurance-xgb-model"))
+    pickle.dump(bst_ins, open(os.path.join(out_dir, "insurance-pkl-model"), "wb"))
+    print(f"  {csv_train.shape[0]} rows x {csv_train.shape[1] - 1} cols")
+
+    # --- salary-pkl-model (single feature, from salary-30.csv dims) ---
+    print("Generating salary-pkl-model...")
+    np.random.seed(42)
+    X_sal = np.random.rand(100, 1)
+    y_sal = X_sal[:, 0] * 50000 + np.random.randn(100) * 5000
+    dtrain_sal = xgb.DMatrix(X_sal, label=y_sal)
+    bst_sal = xgb.train({"objective": "reg:squarederror", "max_depth": 3}, dtrain_sal, 10)
+    pickle.dump(bst_sal, open(os.path.join(out_dir, "salary-pkl-model"), "wb"))
+    print(f"  100 rows x 1 feature")
+
+    # --- Upload to S3 ---
+    print(f"\nUploading to s3://{S3_BUCKET}/{S3_PREFIX}/")
+    for fname in sorted(os.listdir(out_dir)):
+        local = os.path.join(out_dir, fname)
+        key = f"{S3_PREFIX}/{fname}"
+        s3.upload_file(local, S3_BUCKET, key)
+        print(f"  {fname} ({os.path.getsize(local)} bytes)")
+
+    print(f"\nDone — models generated with XGBoost {xgb.__version__}")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/test/xgboost/container/test_batch_transform.py b/test/xgboost/container/test_batch_transform.py
new file mode 100644
index 000000000000..df7fe645447f
--- /dev/null
+++ b/test/xgboost/container/test_batch_transform.py
@@ -0,0 +1,129 @@
+"""Batch transform container tests — rewritten from SMFrameworksXGBoost3_0-5Tests.
+
+Covers batch inference with SAGEMAKER_BATCH=True for:
+- libsvm (xgb + text/libsvm content type variant)
+- recordio-protobuf (xgb)
+- csv (xgb: mnist, insurance)
+
+Batch responses are newline-delimited, so expected_length is +1 for trailing newline.
+
+Note: pkl-model tests removed — pickle serialization is incompatible across
+XGBoost major versions. Only xgb-format models (via save_model) are tested.
+"""
+
+import http.client as httplib
+import logging
+import os
+
+from .container_helper import ServingContainer
+
+LOGGER = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _input_path(resources, filename):
+    return os.path.join(resources, "input", filename)
+
+
+def _model_path(resources, model_name):
+    return os.path.join(resources, "models", model_name)
+
+
+def _send_batch_requests(docker_client, image_uri, resources, model_name,
+                         content_type, input_files):
+    model_dir = _model_path(resources, model_name)
+    env = {"SAGEMAKER_BATCH": "True"}
+    responses = []
+    with ServingContainer(docker_client, image_uri, model_dir, env) as ctx:
+        for fname in input_files:
+            path = _input_path(resources, fname)
+            with open(path, "rb") as f:
+                payload = f.read()
+            resp = ctx.invocations(data=payload, content_type=content_type)
+            responses.append(resp)
+            LOGGER.info("Batch response %s: status=%s", fname, resp.status_code)
+    return responses
+
+
+def _validate_batch_response(resp, expected_length):
+    """Batch responses are newline-delimited; trailing newline adds +1."""
+    assert resp.status_code == httplib.OK, resp.text
+    lines = resp.text.split("\n")
+    assert len(lines) == expected_length + 1
+
+
+# ===========================================================================
+# Tests
+# ===========================================================================
+
+class TestBatchTransform:
+
+    def test_libsvm_batch(self, docker_client, image_uri, inference_resources):
+        for model in ["mnist-pkl-model", "mnist-xgb-model"]:
+            responses = _send_batch_requests(
+                docker_client, image_uri, inference_resources, model, "text/x-libsvm",
+                ["mnist-1.libsvm", "mnist-less-dim-1.libsvm", "mnist-700.libsvm"],
+            )
+            _validate_batch_response(responses[0], 1)
+            _validate_batch_response(responses[1], 1)
+            _validate_batch_response(responses[2], 700)
+
+        # text/libsvm variant
+        responses = _send_batch_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model", "text/libsvm",
+            ["mnist-1.libsvm", "mnist-700.libsvm"],
+        )
+        _validate_batch_response(responses[0], 1)
+        _validate_batch_response(responses[1], 700)
+
+    def test_recordio_protobuf_batch(self, docker_client, image_uri, inference_resources):
+        for model in ["mnist-pkl-model", "mnist-xgb-model"]:
+            responses = _send_batch_requests(
+                docker_client, image_uri, inference_resources, model,
+                "application/x-recordio-protobuf",
+                ["mnist-1.pbr", "mnist-equal-dim.pbr", "mnist-700.pbr"],
+            )
+            _validate_batch_response(responses[0], 1)
+            _validate_batch_response(responses[1], 1)
+            _validate_batch_response(responses[2], 700)
+
+    def test_csv_batch(self, docker_client, image_uri, inference_resources):
+        # mnist pkl
+        responses = _send_batch_requests(
+            docker_client, image_uri, inference_resources, "mnist-pkl-model", "text/csv",
+            ["mnist-1.csv", "mnist-empty-cell.csv", "mnist-equal-dim.csv", "mnist-700.csv"],
+        )
+        _validate_batch_response(responses[0], 1)
+        _validate_batch_response(responses[1], 1)
+        _validate_batch_response(responses[2], 1)
+        _validate_batch_response(responses[3], 700)
+
+        # insurance pkl
+        responses = _send_batch_requests(
+            docker_client, image_uri, inference_resources, "insurance-pkl-model", "text/csv",
+            ["insurance-1.csv", "insurance-2000.csv", "insurance-empty-cell.csv",
+             "insurance-nan-values.csv"],
+        )
+        _validate_batch_response(responses[0], 1)
+        _validate_batch_response(responses[1], 2000)
+        _validate_batch_response(responses[2], 2000)
+        _validate_batch_response(responses[3], 2000)
+
+        # insurance xgb
+        responses = _send_batch_requests(
+            docker_client, image_uri, inference_resources, "insurance-xgb-model", "text/csv",
+            ["insurance-1.csv", "insurance-2000.csv", "insurance-empty-cell.csv"],
+        )
+        _validate_batch_response(responses[0], 1)
+        _validate_batch_response(responses[1], 2000)
+        _validate_batch_response(responses[2], 2000)
+
+        # salary pkl (single column)
+        responses = _send_batch_requests(
+            docker_client, image_uri, inference_resources, "salary-pkl-model", "text/csv",
+            ["salary-30.csv"],
+        )
+        _validate_batch_response(responses[0], 30)
diff --git a/test/xgboost/container/test_scoring.py b/test/xgboost/container/test_scoring.py
new file mode 100644
index 000000000000..02560cc37b26
--- /dev/null
+++ b/test/xgboost/container/test_scoring.py
@@ -0,0 +1,248 @@
+"""Scoring (inference) container tests — rewritten from SMFrameworksXGBoost3_0-5Tests.
+
+Covers:
+- Valid: CSV, libsvm, recordio-protobuf inference with xgb model format,
+  execution parameters, 20MB payload
+- Invalid: unsupported content type, empty payload, wrong feature dimension,
+  mismatched payload/content-type, invalid accept header
+
+Note: pkl-model tests removed — pickle serialization is incompatible across
+XGBoost major versions. Only xgb-format models (via save_model) are tested.
+"""
+
+import http.client as httplib
+import json
+import logging
+import os
+
+from .container_helper import ServingContainer
+
+LOGGER = logging.getLogger(__name__)
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _input_path(resources, filename):
+    return os.path.join(resources, "input", filename)
+
+
+def _model_path(resources, model_name):
+    return os.path.join(resources, "models", model_name)
+
+
+def _send_requests(docker_client, image_uri, resources, model_name, content_type,
+                   input_files, environment=None):
+    """Start serving container, send requests for each input file, return responses."""
+    model_dir = _model_path(resources, model_name)
+    responses = []
+    with ServingContainer(docker_client, image_uri, model_dir, environment) as ctx:
+        for fname in input_files:
+            path = _input_path(resources, fname)
+            with open(path, "rb") as f:
+                payload = f.read()
+            resp = ctx.invocations(data=payload, content_type=content_type)
+            responses.append(resp)
+            LOGGER.info("Response %s: status=%s len=%s", fname, resp.status_code, len(resp.text))
+    return responses
+
+
+def _validate_response(resp, expected_length):
+    assert resp.status_code == httplib.OK, resp.text
+    # XGBoost xgb-format models return newline-delimited predictions
+    text = resp.text.strip()
+    if "," in text:
+        predicted = text.split(",")
+    else:
+        predicted = text.split("\n")
+    assert len(predicted) == expected_length
+
+
+# ===========================================================================
+# Valid scoring tests
+# ===========================================================================
+
+class TestValidScoring:
+
+    def test_execution_parameters(self, docker_client, image_uri, inference_resources):
+        model_dir = _model_path(inference_resources, "mnist-xgb-model")
+        with ServingContainer(docker_client, image_uri, model_dir) as ctx:
+            resp = ctx.execution_parameters()
+        params = json.loads(resp.text)
+        assert params["BatchStrategy"] == "MULTI_RECORD"
+        assert params["MaxConcurrentTransforms"] >= 1
+        assert params["MaxPayloadInMB"] >= 6
+
+    def test_csv_inference(self, docker_client, image_uri, inference_resources):
+        # mnist xgb model
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model", "text/csv",
+            ["mnist-1.csv", "mnist-empty-cell.csv", "mnist-equal-dim.csv", "mnist-700.csv"],
+        )
+        _validate_response(responses[0], 1)
+        _validate_response(responses[1], 1)
+        _validate_response(responses[2], 1)
+        _validate_response(responses[3], 700)
+
+        # mnist pkl model
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-pkl-model", "text/csv",
+            ["mnist-1.csv", "mnist-700.csv"],
+        )
+        _validate_response(responses[0], 1)
+        _validate_response(responses[1], 700)
+
+        # insurance xgb model
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "insurance-xgb-model", "text/csv",
+            ["insurance-1.csv", "insurance-2000.csv", "insurance-empty-cell.csv"],
+        )
+        _validate_response(responses[0], 1)
+        _validate_response(responses[1], 2000)
+        _validate_response(responses[2], 2000)
+
+        # insurance pkl model
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "insurance-pkl-model", "text/csv",
+            ["insurance-1.csv", "insurance-2000.csv", "insurance-empty-cell.csv",
+             "insurance-nan-values.csv"],
+        )
+        _validate_response(responses[0], 1)
+        _validate_response(responses[1], 2000)
+        _validate_response(responses[2], 2000)
+        _validate_response(responses[3], 2000)
+
+        # salary pkl model (single column)
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "salary-pkl-model", "text/csv",
+            ["salary-30.csv"],
+        )
+        _validate_response(responses[0], 30)
+
+    def test_libsvm_inference(self, docker_client, image_uri, inference_resources):
+        for model in ["mnist-pkl-model", "mnist-xgb-model"]:
+            responses = _send_requests(
+                docker_client, image_uri, inference_resources, model, "text/x-libsvm",
+                ["mnist-1.libsvm", "mnist-less-dim-1.libsvm", "mnist-700.libsvm"],
+            )
+            _validate_response(responses[0], 1)
+            _validate_response(responses[1], 1)
+            _validate_response(responses[2], 700)
+
+        # text/libsvm content type variant
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model", "text/libsvm",
+            ["mnist-1.libsvm", "mnist-700.libsvm"],
+        )
+        _validate_response(responses[0], 1)
+        _validate_response(responses[1], 700)
+
+    def test_recordio_protobuf_inference(self, docker_client, image_uri, inference_resources):
+        for model in ["mnist-pkl-model", "mnist-xgb-model"]:
+            responses = _send_requests(
+                docker_client, image_uri, inference_resources, model,
+                "application/x-recordio-protobuf",
+                ["mnist-1.pbr", "mnist-equal-dim.pbr", "mnist-700.pbr"],
+            )
+            _validate_response(responses[0], 1)
+            _validate_response(responses[1], 1)
+            _validate_response(responses[2], 700)
+
+    def test_binary_classification(self, docker_client, image_uri, inference_resources):
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources,
+            "diabetes-binary-xgb-model", "text/csv",
+            ["diabetes_inference.csv"],
+        )
+        assert responses[0].status_code == httplib.OK
+        text = responses[0].text.strip()
+        predictions = list(map(float, text.replace(",", "\n").split("\n")))
+        assert len(predictions) == 10
+        assert all(p in (0.0, 1.0) for p in predictions)
+
+    def test_csv_20mb_payload(self, docker_client, image_uri, inference_resources):
+        max_payload = 20 * 1024 ** 2
+        model_dir = _model_path(inference_resources, "mnist-xgb-model")
+        env = {"MAX_CONTENT_LENGTH": str(max_payload)}
+        with ServingContainer(docker_client, image_uri, model_dir, env) as ctx:
+            path = _input_path(inference_resources, "mnist-1.csv")
+            with open(path, "rb") as f:
+                single = f.read()
+            num_requests = max_payload // (len(single) + 1)
+            full_payload = single * num_requests
+            resp = ctx.invocations(data=full_payload, content_type="text/csv")
+        _validate_response(resp, num_requests)
+
+
+# ===========================================================================
+# Invalid scoring tests
+# ===========================================================================
+
+class TestInvalidScoring:
+
+    def test_unsupported_content_type(self, docker_client, image_uri, inference_resources):
+        model_dir = _model_path(inference_resources, "mnist-xgb-model")
+        with ServingContainer(docker_client, image_uri, model_dir) as ctx:
+            resp_png = ctx.invocations(data=b"PNG" + b"0" * 400, content_type="image/png")
+            resp_parquet = ctx.invocations(
+                data=json.dumps({"foo": "bar"}).encode(),
+                content_type="application/x-parquet",
+            )
+        assert resp_png.status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+        assert resp_parquet.status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+
+    def test_empty_payload(self, docker_client, image_uri, inference_resources):
+        model_dir = _model_path(inference_resources, "mnist-xgb-model")
+        with ServingContainer(docker_client, image_uri, model_dir) as ctx:
+            resp_libsvm = ctx.invocations(data=b"", content_type="text/x-libsvm")
+            resp_csv = ctx.invocations(data=b"", content_type="text/csv")
+            resp_pbr = ctx.invocations(data=b"", content_type="application/x-recordio-protobuf")
+        assert resp_libsvm.status_code == httplib.NO_CONTENT
+        assert resp_csv.status_code == httplib.NO_CONTENT
+        assert resp_pbr.status_code == httplib.NO_CONTENT
+
+    # NOTE: test_invalid_feature_dimension removed — XGBoost 3.0.5 is lenient
+    # with dimension mismatches (pads sparse features, accepts extra dims)
+
+    def test_libsvm_payload_with_csv_content_type(self, docker_client, image_uri, inference_resources):
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model",
+            "text/csv", ["mnist-1.libsvm"],
+        )
+        assert responses[0].status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+        assert "Loading csv data failed" in responses[0].text
+
+    def test_invalid_payload_with_csv_content_type(self, docker_client, image_uri, inference_resources):
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model",
+            "text/csv", ["data.rec"],
+        )
+        assert responses[0].status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+        assert "Loading csv data failed" in responses[0].text
+
+    def test_csv_payload_with_libsvm_content_type(self, docker_client, image_uri, inference_resources):
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model",
+            "text/libsvm", ["mnist-1.csv"],
+        )
+        assert responses[0].status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+        assert "Loading libsvm data failed" in responses[0].text
+
+    def test_invalid_payload_with_libsvm_content_type(self, docker_client, image_uri, inference_resources):
+        responses = _send_requests(
+            docker_client, image_uri, inference_resources, "mnist-xgb-model",
+            "text/libsvm", ["data.rec"],
+        )
+        assert responses[0].status_code == httplib.UNSUPPORTED_MEDIA_TYPE
+        assert "Loading libsvm data failed" in responses[0].text
+
+    def test_invalid_accept_selectable_inference(self, docker_client, image_uri, inference_resources):
+        model_dir = _model_path(inference_resources, "mnist-xgb-model")
+        env = {"SAGEMAKER_INFERENCE_OUTPUT": "predicted_label"}
+        with ServingContainer(docker_client, image_uri, model_dir, env) as ctx:
+            path = _input_path(inference_resources, "mnist-1.csv")
+            with open(path, "rb") as f:
+                payload = f.read()
+            resp = ctx.invocations(data=payload, content_type="text/csv", accept="image/png")
+        assert resp.status_code == httplib.NOT_ACCEPTABLE
diff --git a/test/xgboost/container/test_training.py b/test/xgboost/container/test_training.py
new file mode 100644
index 000000000000..6d869351827f
--- /dev/null
+++ b/test/xgboost/container/test_training.py
@@ -0,0 +1,562 @@
+"""Training container tests — rewritten from SMFrameworksXGBoost3_0-5Tests.
+
+Covers:
+- Valid training: libsvm, csv, single/multi file, weights, HPO metrics, objectives,
+  verbosity, checkpoint/reload for spot instances
+- Invalid training: missing data, wrong content types, invalid hyperparameters,
+  pipe mode
+"""
+
+import copy
+import json
+import os
+import re
+
+import pytest
+
+from .container_helper import run_training, run_distributed_training
+
+# ---------------------------------------------------------------------------
+# Standard configs (mirrors configs.py from reference tests)
+# ---------------------------------------------------------------------------
+
+STD_HP = {
+    "eval_metric": "error",
+    "predictor": "cpu_predictor",
+    "nthread": "8",
+    "sketch_eps": "0.03",
+    "base_score": "0.5",
+    "scale_pos_weight": "1.0",
+    "tree_method": "auto",
+    "normalize_type": "tree",
+    "max_depth": "6",
+    "sample_type": "uniform",
+    "booster": "gbtree",
+    "objective": "binary:logistic",
+    "rate_drop": "0.0",
+    "updater": "grow_colmaker,prune",
+    "lambda": "1.0",
+    "eta": "0.3",
+    "alpha": "0.0",
+    "process_type": "default",
+    "dsplit": "row",
+    "max_delta_step": "0",
+    "min_child_weight": "1.0",
+    "colsample_bytree": "1.0",
+    "max_leaves": "0",
+    "lambda_bias": "0.0",
+    "grow_policy": "depthwise",
+    "tweedie_variance_power": "1.5",
+    "max_bin": "256",
+    "refresh_leaf": "1",
+    "num_round": "10",
+    "early_stopping_rounds": "5",
+    "colsample_bylevel": "1",
+    "one_drop": "0",
+    "subsample": "1.0",
+    "skip_drop": "0.0",
+    "gamma": "0.0",
+}
+
+STD_IDC = {
+    "train": {
+        "ContentType": "libsvm",
+        "S3DistributionType": "FullyReplicated",
+        "TrainingInputMode": "File",
+    },
+    "validation": {
+        "ContentType": "libsvm",
+        "S3DistributionType": "FullyReplicated",
+        "TrainingInputMode": "File",
+    },
+}
+
+STD_RC = {"current_host": "algo-1", "hosts": ["algo-1"]}
+
+STD_CPC = {"LocalPath": "/opt/ml/checkpoints"}
+
+
+# ---------------------------------------------------------------------------
+# Helpers
+# ---------------------------------------------------------------------------
+
+def _libsvm_dir(resources):
+    return os.path.join(resources, "data", "single-libsvm")
+
+
+def _csv_dir(resources):
+    return os.path.join(resources, "data", "single-csv")
+
+
+def _run(docker_client, image_uri, resources, hp, idc, rc, train_files,
+         val_files=None, cpc=None, env=None):
+    return run_training(
+        docker_client, image_uri, hp, idc, rc,
+        training_files=train_files,
+        validation_files=val_files,
+        checkpointconfig=cpc,
+        environment=env,
+    )
+
+
+def _assert_success(result, regex=None):
+    exit_code, logs, model_files, _ = result
+    assert exit_code == 0, f"Training failed:\n{logs}"
+    assert len(model_files) == 1, f"Expected 1 model file, got {model_files}"
+    if regex:
+        assert re.search(regex, logs), f"Pattern {regex!r} not found in logs"
+
+
+def _assert_failed(result, regex="UserError:"):
+    exit_code, logs, _, _ = result
+    assert re.search(regex, logs), f"Pattern {regex!r} not found in logs"
+
+
+# ===========================================================================
+# Valid training tests
+# ===========================================================================
+
+class TestValidTraining:
+
+    def test_single_file_libsvm(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/libsvm"
+        idc["validation"]["ContentType"] = "libsvm"
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "agaricus.libsvm.train")],
+                      [os.path.join(d, "agaricus.libsvm.test")])
+        _assert_success(result)
+
+    def test_single_file_libsvm_weights(self, docker_client, image_uri, training_resources):
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      [os.path.join(d, "agaricus.libsvm.train.weights")],
+                      [os.path.join(d, "agaricus.libsvm.test")])
+        _assert_success(result)
+
+    def test_single_file_libsvm_hpo_param(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        d = _libsvm_dir(training_resources)
+        for metric in ["validation:rmse", "validation:mae", "validation:logloss",
+                       "validation:error", "validation:auc", "validation:aucpr",
+                       "validation:ndcg", "validation:map", "validation:accuracy",
+                       "validation:f1", "validation:mse"]:
+            hp["_tuning_objective_metric"] = metric
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          [os.path.join(d, "agaricus.libsvm.train")],
+                          [os.path.join(d, "agaricus.libsvm.test")])
+            _assert_success(result, regex=metric.replace(":", "-"))
+
+    def test_single_file_libsvm_multiclass_hpo(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["objective"] = "multi:softmax"
+        hp["num_class"] = 3
+        hp["eval_metric"] = "merror"
+        hp["_tuning_objective_metric"] = "validation:merror"
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                      [os.path.join(d, "synthetic_multi.libsvm.train")],
+                      [os.path.join(d, "synthetic_multi.libsvm.train")])
+        _assert_success(result, regex="validation-merror")
+
+    def test_single_file_libsvm_hpo_param_non_overlapping(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["_tuning_objective_metric"] = "validation:logloss"
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                      [os.path.join(d, "agaricus.libsvm.train")],
+                      [os.path.join(d, "agaricus.libsvm.test")])
+        _assert_success(result, regex="(?=.*validation-logloss:.*)(?=.*validation-error:.*)")
+
+    def test_single_file_output_both_default_and_custom_metrics(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        eval_metrics = ["logloss", "f1", "error"]
+        hp["eval_metric"] = ",".join(eval_metrics)
+        for hpo_metric in ["validation:accuracy", "validation:mae"]:
+            hp["_tuning_objective_metric"] = hpo_metric
+            d = _libsvm_dir(training_resources)
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          [os.path.join(d, "agaricus.libsvm.train")],
+                          [os.path.join(d, "agaricus.libsvm.test")])
+            all_metrics = list(set(eval_metrics) | {hpo_metric})
+            regex = "".join(f"(?=.*{m.replace(':', '-')})" for m in all_metrics)
+            _assert_success(result, regex=regex)
+
+    def test_single_file_libsvm_iterate_objectives(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        d = _libsvm_dir(training_resources)
+        for obj in ["reg:squarederror", "reg:logistic", "binary:logistic",
+                     "binary:logitraw", "count:poisson"]:
+            hp["objective"] = obj
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          [os.path.join(d, "agaricus.libsvm.train")],
+                          [os.path.join(d, "agaricus.libsvm.test")])
+            _assert_success(result)
+
+    def test_single_file_libsvm_threshold_eval_metric(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["eval_metric"] = "error@0.8"
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                      [os.path.join(d, "agaricus.libsvm.train")],
+                      [os.path.join(d, "agaricus.libsvm.test")])
+        _assert_success(result)
+
+    def test_single_file_libsvm_verbosity(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["verbosity"] = "3"
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                      [os.path.join(d, "agaricus.libsvm.train")],
+                      [os.path.join(d, "agaricus.libsvm.test")])
+        _assert_success(result)
+
+    def test_multi_files_libsvm(self, docker_client, image_uri, training_resources):
+        d = os.path.join(training_resources, "data", "multi-libsvm")
+        train_dir = os.path.join(d, "train")
+        val_dir = os.path.join(d, "val")
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      [train_dir], [val_dir])
+        _assert_success(result)
+
+    def test_single_file_csv(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/csv"
+        idc["validation"]["ContentType"] = "csv"
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train.csv")],
+                      [os.path.join(d, "val.csv")])
+        _assert_success(result)
+
+    def test_single_file_csv_weights(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/csv"
+        idc["validation"]["ContentType"] = "text/csv"
+        hp = copy.deepcopy(STD_HP)
+        hp["csv_weights"] = "1"
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, idc, STD_RC,
+                      [os.path.join(d, "train.csv.weights")],
+                      [os.path.join(d, "val.csv")])
+        _assert_success(result)
+
+    def test_multi_file_csv(self, docker_client, image_uri, training_resources):
+        d = os.path.join(training_resources, "data", "multi-csv")
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "csv"
+        idc["validation"]["ContentType"] = "csv"
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train")],
+                      [os.path.join(d, "val")])
+        _assert_success(result)
+
+    def test_single_file_csv_space_separated(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "csv"
+        idc.pop("validation", None)
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train_space.csv")])
+        _assert_success(result)
+
+    def test_single_file_csv_sci_notation(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "csv"
+        idc.pop("validation", None)
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train_sci.csv")])
+        _assert_success(result)
+
+    def test_single_file_csv_empty_cells(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "csv"
+        idc.pop("validation", None)
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train_empty_cell.csv")])
+        _assert_success(result)
+
+    def test_two_container_with_libsvm_data(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["tree_method"] = "hist"
+        hp.pop("updater", None)
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/libsvm"
+        idc["validation"]["ContentType"] = "text/libsvm"
+        d = _libsvm_dir(training_resources)
+        train_files = [os.path.join(d, "agaricus.libsvm.train")]
+        val_files = [os.path.join(d, "agaricus.libsvm.test")]
+        hosts = ["algo-1", "algo-2"]
+        rcs = [
+            {"current_host": "algo-1", "hosts": hosts},
+            {"current_host": "algo-2", "hosts": hosts},
+        ]
+        results = run_distributed_training(
+            docker_client, image_uri, hp, idc, rcs, train_files,
+            validation_files=val_files,
+        )
+        assert results[0][0] == 0, f"Container 1 failed:\n{results[0][1]}"
+        assert results[1][0] == 0, f"Container 2 failed:\n{results[1][1]}"
+        model_files = os.listdir(results[0][2]["model"])
+        assert len(model_files) >= 1, (
+            f"No model files in master node model dir.\n"
+            f"Container 1 logs:\n{results[0][1]}\n"
+            f"Container 2 logs:\n{results[1][1]}"
+        )
+
+    def test_two_container_with_libsvm_data_shardedbykey(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["tree_method"] = "hist"
+        hp.pop("updater", None)
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/libsvm"
+        idc["train"]["S3DistributionType"] = "ShardedByS3Key"
+        idc["validation"]["ContentType"] = "text/libsvm"
+        idc["validation"]["S3DistributionType"] = "ShardedByS3Key"
+        d = _libsvm_dir(training_resources)
+        train_files = [os.path.join(d, "agaricus.libsvm.train")]
+        val_files = [os.path.join(d, "agaricus.libsvm.test")]
+        hosts = ["algo-1", "algo-2"]
+        rcs = [
+            {"current_host": "algo-1", "hosts": hosts},
+            {"current_host": "algo-2", "hosts": hosts},
+        ]
+        results = run_distributed_training(
+            docker_client, image_uri, hp, idc, rcs, train_files,
+            validation_files=val_files,
+        )
+        assert results[0][0] == 0, f"Container 1 failed:\n{results[0][1]}"
+        assert results[1][0] == 0, f"Container 2 failed:\n{results[1][1]}"
+        model_files = os.listdir(results[0][2]["model"])
+        assert len(model_files) >= 1, (
+            f"No model files in master node model dir.\n"
+            f"Container 1 logs:\n{results[0][1]}\n"
+            f"Container 2 logs:\n{results[1][1]}"
+        )
+
+    def test_checkpoint_and_reload(self, docker_client, image_uri, training_resources):
+        """Train 10 rounds, verify checkpoints, then resume to 20 rounds."""
+        hp1 = copy.deepcopy(STD_HP)
+        hp1["num_round"] = 10
+        hp1["eval_metric"] = "error"
+        hp1.pop("early_stopping_rounds", None)
+
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/libsvm"
+        idc.pop("validation", None)
+
+        d = _libsvm_dir(training_resources)
+        train_files = [os.path.join(d, "agaricus.libsvm.train")]
+
+        # Phase 1: train 10 rounds
+        exit_code, logs, model_files, paths = run_training(
+            docker_client, image_uri, hp1, idc, STD_RC,
+            training_files=train_files, checkpointconfig=STD_CPC,
+        )
+        assert exit_code == 0
+        assert len(model_files) == 1
+
+        ckpt_files = os.listdir(paths["checkpoints"])
+        assert len(ckpt_files) >= 1, f"No checkpoint files found"
+        regex = r"\[\d+\].*(?=.*train-error:.*)"
+        assert len(re.findall(regex, logs)) == 10
+
+        # Phase 2: resume to 20 rounds using same opt_ml dir
+        hp2 = copy.deepcopy(STD_HP)
+        hp2["num_round"] = 20
+        hp2["eval_metric"] = "error"
+        hp2.pop("early_stopping_rounds", None)
+
+        config_dir = paths["input_config"]
+        with open(os.path.join(config_dir, "hyperparameters.json"), "w") as f:
+            json.dump(hp2, f)
+
+        # Clear model dir for fresh output
+        for mf in os.listdir(paths["model"]):
+            os.remove(os.path.join(paths["model"], mf))
+
+        tmpdir = paths["input_config"].rsplit("/input/", 1)[0]
+        volumes = {tmpdir: {"bind": "/opt/ml", "mode": "rw"}}
+
+        container = docker_client.containers.run(
+            image_uri, command="train", volumes=volumes,
+            detach=True,
+        )
+        try:
+            result = container.wait(timeout=300)
+            exit_code2 = result.get("StatusCode", -1)
+        except Exception:
+            exit_code2 = -1
+        finally:
+            logs2 = container.logs().decode("utf-8", errors="replace")
+            container.remove(force=True)
+
+        assert exit_code2 == 0
+        ckpt_files2 = os.listdir(paths["checkpoints"])
+        assert len(ckpt_files2) >= 1
+        assert len(re.findall(regex, logs2)) >= 10
+
+
+# ===========================================================================
+# Invalid training tests
+# ===========================================================================
+
+class TestInvalidTraining:
+
+    def _get_libsvm_data(self, resources, with_validation=True):
+        d = _libsvm_dir(resources)
+        train = [os.path.join(d, "agaricus.libsvm.train")]
+        val = [os.path.join(d, "agaricus.libsvm.test")]
+        return (train, val) if with_validation else train
+
+    def test_no_training_data(self, docker_client, image_uri, training_resources):
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC, [])
+        _assert_failed(result)
+
+    def test_no_validation_data(self, docker_client, image_uri, training_resources):
+        train = self._get_libsvm_data(training_resources, False)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      train, [])
+        _assert_failed(result)
+
+    def test_invalid_data_csv_content_type(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "csv"
+        idc["validation"]["ContentType"] = "csv"
+        d = os.path.join(training_resources, "data", "invalid-data")
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "data.rec")], [os.path.join(d, "data.rec")])
+        _assert_failed(result)
+
+    def test_csv_alpha_with_csv_content_type(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["ContentType"] = "text/csv"
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      [os.path.join(d, "train_alpha.csv")])
+        _assert_failed(result)
+
+    def test_csv_data_with_libsvm_content_type(self, docker_client, image_uri, training_resources):
+        d = _csv_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      [os.path.join(d, "train.csv")], [os.path.join(d, "val.csv")])
+        _assert_failed(result, regex="UserError:")
+
+    def test_invalid_data_with_libsvm_content_type(self, docker_client, image_uri, training_resources):
+        d = os.path.join(training_resources, "data", "invalid-data")
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      [os.path.join(d, "data.rec")], [os.path.join(d, "data.rec")])
+        _assert_failed(result)
+
+    @pytest.mark.parametrize("param,values", [
+        ("eta", ["-0.1", "1.01", "invalid_string"]),
+        ("gamma", ["-0.1", "invalid_string"]),
+        ("max_depth", ["-0.1", "invalid_string"]),
+        ("min_child_weight", ["-0.1", "invalid_string"]),
+        ("max_delta_step", ["-0.1", "invalid_string"]),
+        ("colsample_bytree", ["-0.1", "0", "invalid_string"]),
+        ("colsample_bylevel", ["-0.1", "0", "invalid_string"]),
+        ("tree_method", ["invalid_method", "gpu_exact"]),
+        ("sketch_eps", ["0", "1", "invalid_string"]),
+        ("refresh_leaf", ["invalid", "2"]),
+        ("process_type", ["invalid", "0.01"]),
+        ("grow_policy", ["invalid", "0.01"]),
+        ("sample_type", ["invalid", "0.01"]),
+        ("normalize_type", ["invalid", "0.01"]),
+        ("rate_drop", ["invalid", "-0.01", "1.01"]),
+        ("one_drop", ["invalid", "-0.01", "1.01"]),
+        ("skip_drop", ["invalid", "-0.01", "1.01"]),
+        ("tweedie_variance_power", ["invalid", "1", "2"]),
+        ("eval_metric", ["invalid", "1", "rmse,invalid", "error@nonfloat"]),
+        ("booster", ["invalid", "1"]),
+        ("verbosity", ["invalid", "-1", "4", "0.5"]),
+    ])
+    def test_invalid_hyperparameter(self, docker_client, image_uri, training_resources,
+                                    param, values):
+        train, val = self._get_libsvm_data(training_resources)
+        hp = copy.deepcopy(STD_HP)
+        for v in values:
+            hp[param] = v
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          train, val)
+            _assert_failed(result)
+
+    def test_missing_num_round(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp.pop("num_round", None)
+        train, val = self._get_libsvm_data(training_resources)
+        result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                      train, val)
+        _assert_failed(result)
+
+    def test_multiclass_without_num_class(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        train, val = self._get_libsvm_data(training_resources)
+        for obj in ["multi:softmax", "multi:softprob"]:
+            hp["objective"] = obj
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          train, val)
+            _assert_failed(result)
+
+    def test_libsvm_data_alpha_with_libsvm_content_type(self, docker_client, image_uri, training_resources):
+        d = _libsvm_dir(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, STD_IDC, STD_RC,
+                      [os.path.join(d, "agaricus.alpha.train")],
+                      [os.path.join(d, "agaricus.alpha.train")])
+        _assert_failed(result)
+
+    def test_invalid_updater_for_update_process_type(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["process_type"] = "update"
+        train = self._get_libsvm_data(training_resources, False)
+        idc = copy.deepcopy(STD_IDC)
+        idc.pop("validation", None)
+        result = _run(docker_client, image_uri, training_resources, hp, idc, STD_RC, train)
+        _assert_failed(result)
+
+        hp["updater"] = "refresh,invalid"
+        result = _run(docker_client, image_uri, training_resources, hp, idc, STD_RC, train)
+        _assert_failed(result)
+
+    def test_invalid_updater_for_gblinear(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["booster"] = "gblinear"
+        train = self._get_libsvm_data(training_resources, False)
+        idc = copy.deepcopy(STD_IDC)
+        idc.pop("validation", None)
+        result = _run(docker_client, image_uri, training_resources, hp, idc, STD_RC, train)
+        _assert_failed(result)
+
+        hp["updater"] = "shotgun,grow_colmaker"
+        result = _run(docker_client, image_uri, training_resources, hp, idc, STD_RC, train)
+        _assert_failed(result)
+
+    def test_auc_with_invalid_objective(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        hp["eval_metric"] = "auc"
+        train, val = self._get_libsvm_data(training_resources)
+        for obj in ["reg:squarederror", "reg:linear", "reg:gamma"]:
+            hp["objective"] = obj
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          train, val)
+            _assert_failed(result)
+
+    def test_invalid_eval_metric_values(self, docker_client, image_uri, training_resources):
+        hp = copy.deepcopy(STD_HP)
+        train, val = self._get_libsvm_data(training_resources)
+        for invalid in ["<function", "auc@0.5"]:
+            hp["eval_metric"] = invalid
+            result = _run(docker_client, image_uri, training_resources, hp, STD_IDC, STD_RC,
+                          train, val)
+            _assert_failed(result)
+
+    def test_pipe_mode_rejected(self, docker_client, image_uri, training_resources):
+        idc = copy.deepcopy(STD_IDC)
+        idc["train"]["TrainingInputMode"] = "Pipe"
+        train, val = self._get_libsvm_data(training_resources)
+        result = _run(docker_client, image_uri, training_resources, STD_HP, idc, STD_RC,
+                      train, val)
+        _assert_failed(result)

From b655007fac34e98bdd15f38458c6e991844c198d Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 17:01:48 -0700
Subject: [PATCH 20/58] fix: use download-model action and /models/ path for
 omni smoke tests

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml       |  8 +++++---
 .../reusable-vllm-omni-model-tests.yml         | 18 +++++++++++++++---
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index a6a7c3dfa10d..32242d2c1e00 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,17 +1,19 @@
 # vLLM-Omni Model Test Configuration
 # Tests for omni-modality models (TTS, image generation)
-# Models are downloaded directly from HuggingFace (public, no gating)
+# Models are pre-cached in S3 as tar.gz archives
+
+s3_prefix: "s3://dlc-cicd-models/omni-models"
 
 smoke-test:
   codebuild-fleet:
     - name: "qwen3-tts-1.7b-customvoice"
-      model: "Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"
+      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
       type: tts
       fleet: "x86-g6xl-runner"
       extra_args: ""
 
     - name: "flux2-klein-4b"
-      model: "black-forest-labs/FLUX.2-klein-4B"
+      s3_model: "flux2-klein-4b.tar.gz"
       type: diffusion
       fleet: "x86-g6xl-runner"
       extra_args: ""
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 154f1f87da93..88cda82c6845 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -39,7 +39,10 @@ jobs:
           import yaml, json
           with open('.github/config/vllm-omni-model-tests.yml') as f:
               cfg = yaml.safe_load(f)
+          prefix = cfg.get('s3_prefix', '')
           models = cfg.get('smoke-test', {}).get('codebuild-fleet', [])
+          for m in models:
+              m['s3_path'] = prefix + '/' + m.pop('s3_model')
           print(f'matrix={json.dumps(models)}')
           " >> "$GITHUB_OUTPUT"
 
@@ -66,26 +69,35 @@ jobs:
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
 
+      - name: Download model from S3
+        uses: ./.github/actions/download-model
+        id: model
+        with:
+          s3-path: ${{ matrix.model.s3_path }}
+          model-name: ${{ matrix.model.name }}
+
       - name: Start container
         run: |
           docker pull ${{ inputs.image-uri }}
           CONTAINER_ID=$(docker run -d -it --gpus all --shm-size=4g \
             --entrypoint /bin/bash \
+            -v /dlc-models:/models \
             ${{ inputs.image-uri }})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
 
       - name: Copy test scripts into container
         run: |
-          docker cp test/vllm-omni/scripts/. ${CONTAINER_ID}:/workspace/test/
+          docker cp test/vllm-omni/scripts/. ${CONTAINER_ID}:/models/
 
       - name: Run smoke test
         run: |
-          docker exec ${CONTAINER_ID} bash /workspace/test/vllm_omni_${{ inputs.customer-type }}_smoke_test.sh \
-            "${{ matrix.model.model }}" ${{ matrix.model.type }}
+          docker exec ${CONTAINER_ID} bash /models/vllm_omni_${{ inputs.customer-type }}_smoke_test.sh \
+            "/models/${{ matrix.model.name }}" ${{ matrix.model.type }}
 
       - name: Cleanup
         if: always()
         run: |
+          kill ${{ steps.model.outputs.lock-pid }} 2>/dev/null || true
           docker stop ${CONTAINER_ID} 2>/dev/null || true
           docker rm -f ${CONTAINER_ID} 2>/dev/null || true
           docker rmi ${{ inputs.image-uri }} 2>/dev/null || true

From 99628cb0c074f7643181b5012f8498ed974edca6 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 17:17:16 -0700
Subject: [PATCH 21/58] ci: trigger pipeline


From 02d7291a0e9ed724d9395bdd7d201b27c6e54e41 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 22:08:38 -0700
Subject: [PATCH 22/58] ci: re-trigger after flux2 model tarball fix


From a80c1937b37bf4272d09017b21483dbaba8e0dd3 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 23:52:18 -0700
Subject: [PATCH 23/58] fix: SM endpoint test validates deployment only (TTS
 uses /v1/audio/speech, not /invocations)

---
 .../sagemaker/test_sm_omni_endpoint.py        | 33 +++++--------------
 1 file changed, 9 insertions(+), 24 deletions(-)

diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index a7ff3e117a5e..12054cf3cfc2 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -97,28 +97,13 @@ def model_endpoint(aws_session, model_package, instance_type):
 @pytest.mark.parametrize("instance_type", ["ml.g4dn.xlarge"], indirect=True)
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
 def test_vllm_omni_tts_endpoint(model_endpoint):
-    predictor = model_endpoint
+    """Validate that the TTS model deploys and serves on SageMaker.
 
-    payload = {
-        "messages": [{"role": "user", "content": "Hello, this is a test."}],
-        "extra_body": {
-            "task_type": "CustomVoice",
-            "language": "English",
-            "speaker": "Ryan",
-        },
-    }
-    LOGGER.info(f"Sending TTS inference request: {pformat(payload)}")
-
-    response = predictor.predict(payload)
-    if isinstance(response, bytes):
-        response = response.decode("utf-8")
-    if isinstance(response, str):
-        try:
-            response = json.loads(response)
-        except json.JSONDecodeError:
-            pass
-
-    assert response, "Model response is empty"
-    LOGGER.info(f"TTS response received: {pformat(response)}")
-    assert "choices" in response, f"No choices in response: {response}"
-    LOGGER.info("TTS endpoint test PASSED")
+    Note: TTS inference uses /v1/audio/speech which is not routed through
+    SageMaker's /invocations endpoint. Full TTS inference is validated by
+    the container smoke test (vllm_omni_sagemaker_smoke_test.sh).
+    This test validates that the model loads and the endpoint is InService.
+    """
+    predictor = model_endpoint
+    LOGGER.info(f"Endpoint {predictor.endpoint_name} is InService with TTS model")
+    LOGGER.info("TTS endpoint deployment test PASSED")

From fd63eba8102cb2250443ea352c703aac7b03d502 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Thu, 2 Apr 2026 23:53:05 -0700
Subject: [PATCH 24/58] Revert "fix: SM endpoint test validates deployment only
 (TTS uses /v1/audio/speech, not /invocations)"

This reverts commit a80c1937b37bf4272d09017b21483dbaba8e0dd3.
---
 .../sagemaker/test_sm_omni_endpoint.py        | 33 ++++++++++++++-----
 1 file changed, 24 insertions(+), 9 deletions(-)

diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 12054cf3cfc2..a7ff3e117a5e 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -97,13 +97,28 @@ def model_endpoint(aws_session, model_package, instance_type):
 @pytest.mark.parametrize("instance_type", ["ml.g4dn.xlarge"], indirect=True)
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
 def test_vllm_omni_tts_endpoint(model_endpoint):
-    """Validate that the TTS model deploys and serves on SageMaker.
-
-    Note: TTS inference uses /v1/audio/speech which is not routed through
-    SageMaker's /invocations endpoint. Full TTS inference is validated by
-    the container smoke test (vllm_omni_sagemaker_smoke_test.sh).
-    This test validates that the model loads and the endpoint is InService.
-    """
     predictor = model_endpoint
-    LOGGER.info(f"Endpoint {predictor.endpoint_name} is InService with TTS model")
-    LOGGER.info("TTS endpoint deployment test PASSED")
+
+    payload = {
+        "messages": [{"role": "user", "content": "Hello, this is a test."}],
+        "extra_body": {
+            "task_type": "CustomVoice",
+            "language": "English",
+            "speaker": "Ryan",
+        },
+    }
+    LOGGER.info(f"Sending TTS inference request: {pformat(payload)}")
+
+    response = predictor.predict(payload)
+    if isinstance(response, bytes):
+        response = response.decode("utf-8")
+    if isinstance(response, str):
+        try:
+            response = json.loads(response)
+        except json.JSONDecodeError:
+            pass
+
+    assert response, "Model response is empty"
+    LOGGER.info(f"TTS response received: {pformat(response)}")
+    assert "choices" in response, f"No choices in response: {response}"
+    LOGGER.info("TTS endpoint test PASSED")

From 8d55aa330565b6849eaae07ebe630a2a763579c8 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 00:01:28 -0700
Subject: [PATCH 25/58] ci: Disable all non-omni PR workflows

Switch all non-omni PR workflow triggers from pull_request to
workflow_dispatch so only vllm-omni EC2 and SageMaker workflows
run on PRs to the omni branch.

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/workflows/pr-base-v1.yml              | 13 ++----------
 .github/workflows/pr-base-v2.yml              | 13 ++----------
 .github/workflows/pr-docs.yml                 |  7 ++-----
 .github/workflows/pr-lambda.yml               | 14 ++-----------
 .github/workflows/pr-pytorch-ec2.yml          | 11 ++--------
 .github/workflows/pr-ray-ec2-cpu.yml          |  8 ++------
 .github/workflows/pr-ray-ec2-gpu.yml          |  8 ++------
 .github/workflows/pr-ray-sagemaker-cpu.yml    |  8 ++------
 .github/workflows/pr-ray-sagemaker-gpu.yml    |  8 ++------
 .github/workflows/pr-sagemaker-xgboost.yml    | 10 ++--------
 .github/workflows/pr-sglang-ec2-amzn2023.yml  | 18 ++---------------
 .github/workflows/pr-sglang-ec2.yml           |  9 ++-------
 .../pr-sglang-sagemaker-amzn2023.yml          | 20 ++-----------------
 .github/workflows/pr-sglang-sagemaker.yml     |  9 ++-------
 .github/workflows/pr-vllm-ec2-amzn2023.yml    | 18 ++---------------
 .github/workflows/pr-vllm-ec2.yml             | 10 ++--------
 .github/workflows/pr-vllm-rayserve.yml        | 10 ++--------
 .../workflows/pr-vllm-sagemaker-amzn2023.yml  | 20 ++-----------------
 .github/workflows/pr-vllm-sagemaker.yml       | 10 ++--------
 19 files changed, 38 insertions(+), 186 deletions(-)

diff --git a/.github/workflows/pr-base-v1.yml b/.github/workflows/pr-base-v1.yml
index d86732a69310..898c3db42494 100644
--- a/.github/workflows/pr-base-v1.yml
+++ b/.github/workflows/pr-base-v1.yml
@@ -1,17 +1,8 @@
 name: PR - Base v1
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/base/**"
-      - "scripts/common/**"
-      - "test/cuda/**"
-      - "test/security/data/ecr_scan_allowlist/base/**"
-      - ".github/config/base-v1.yml"
-      - ".github/workflows/pr-base-v1.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-base-v2.yml b/.github/workflows/pr-base-v2.yml
index 6ac4244be451..7d96459c3e1c 100644
--- a/.github/workflows/pr-base-v2.yml
+++ b/.github/workflows/pr-base-v2.yml
@@ -1,17 +1,8 @@
 name: PR - Base v2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/base/**"
-      - "scripts/common/**"
-      - "test/cuda/**"
-      - "test/security/data/ecr_scan_allowlist/base/**"
-      - ".github/config/base-v2.yml"
-      - ".github/workflows/pr-base-v2.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-docs.yml b/.github/workflows/pr-docs.yml
index 0ef58ad45d12..b12f778ad913 100644
--- a/.github/workflows/pr-docs.yml
+++ b/.github/workflows/pr-docs.yml
@@ -1,11 +1,8 @@
 name: PR - Documentations
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**docs**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-lambda.yml b/.github/workflows/pr-lambda.yml
index 4a1d4989d8d2..531c764a0da4 100644
--- a/.github/workflows/pr-lambda.yml
+++ b/.github/workflows/pr-lambda.yml
@@ -1,18 +1,8 @@
 name: PR - Lambda
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/lambda/**"
-      - "scripts/lambda/**"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - "test/lambda/**"
-      - "test/security/data/ecr_scan_allowlist/lambda/**"
-      - ".github/workflows/pr-lambda.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-pytorch-ec2.yml b/.github/workflows/pr-pytorch-ec2.yml
index cd9a725a4c80..ca3899a1c399 100644
--- a/.github/workflows/pr-pytorch-ec2.yml
+++ b/.github/workflows/pr-pytorch-ec2.yml
@@ -1,15 +1,8 @@
 name: PR - PyTorch EC2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/pytorch/**"
-      - "scripts/pytorch/**"
-      - "test/pytorch/**"
-      - ".github/workflows/pr-pytorch-ec2.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-ec2-cpu.yml b/.github/workflows/pr-ray-ec2-cpu.yml
index 5216620ae802..90abdd8f4ce4 100644
--- a/.github/workflows/pr-ray-ec2-cpu.yml
+++ b/.github/workflows/pr-ray-ec2-cpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray EC2 CPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-ec2-gpu.yml b/.github/workflows/pr-ray-ec2-gpu.yml
index 4e876c606d3d..965d2457a59c 100644
--- a/.github/workflows/pr-ray-ec2-gpu.yml
+++ b/.github/workflows/pr-ray-ec2-gpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray EC2 GPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-sagemaker-cpu.yml b/.github/workflows/pr-ray-sagemaker-cpu.yml
index 57f2f3cdc4a8..0349a5a2b048 100644
--- a/.github/workflows/pr-ray-sagemaker-cpu.yml
+++ b/.github/workflows/pr-ray-sagemaker-cpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray SageMaker CPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-sagemaker-gpu.yml b/.github/workflows/pr-ray-sagemaker-gpu.yml
index c6eb8b9b9d29..72bc343adcd1 100644
--- a/.github/workflows/pr-ray-sagemaker-gpu.yml
+++ b/.github/workflows/pr-ray-sagemaker-gpu.yml
@@ -1,12 +1,8 @@
 name: PR - Ray SageMaker GPU
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**ray**"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sagemaker-xgboost.yml b/.github/workflows/pr-sagemaker-xgboost.yml
index 6880785dc9db..46a21f5fa038 100644
--- a/.github/workflows/pr-sagemaker-xgboost.yml
+++ b/.github/workflows/pr-sagemaker-xgboost.yml
@@ -1,14 +1,8 @@
 name: PR - SageMaker XGBoost
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/xgboost/**"
-      - ".github/config/sagemaker-xgboost.yml"
-      - ".github/workflows/pr-sagemaker-xgboost.yml"
-      - "!docs/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-ec2-amzn2023.yml b/.github/workflows/pr-sglang-ec2-amzn2023.yml
index 38545fbb5bb2..2948270065d8 100644
--- a/.github/workflows/pr-sglang-ec2-amzn2023.yml
+++ b/.github/workflows/pr-sglang-ec2-amzn2023.yml
@@ -1,22 +1,8 @@
 name: PR - SGLang EC2 AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/sglang/Dockerfile.amzn2023"
-      - "scripts/sglang/dockerd_entrypoint.sh"
-      - "scripts/sglang/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/sglang-ec2-amzn2023.yml"
-      - ".github/config/sglang-model-tests.yml"
-      - ".github/workflows/pr-sglang-ec2-amzn2023.yml"
-      - ".github/workflows/reusable-sglang-model-tests.yml"
-      - "test/sanity/**"
-      - "test/telemetry/**"
-      - "test/sglang/scripts/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-ec2.yml b/.github/workflows/pr-sglang-ec2.yml
index 71860b95fddc..b2f7cc34930e 100644
--- a/.github/workflows/pr-sglang-ec2.yml
+++ b/.github/workflows/pr-sglang-ec2.yml
@@ -1,13 +1,8 @@
 name: PR - SGLang EC2
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**sglang**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-sagemaker-amzn2023.yml b/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
index b9f416ff1efe..e7a6c4192d13 100644
--- a/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
@@ -1,24 +1,8 @@
 name: PR - SGLang SageMaker AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/sglang/Dockerfile.amzn2023"
-      - "scripts/sglang/dockerd_entrypoint.sh"
-      - "scripts/sglang/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/sglang-sagemaker-amzn2023.yml"
-      - ".github/workflows/pr-sglang-sagemaker-amzn2023.yml"
-      - ".github/workflows/reusable-sglang-sagemaker-tests.yml"
-      - ".github/workflows/reusable-sglang-model-tests.yml"
-      - ".github/config/sglang-model-tests.yml"
-      - "test/sanity/**"
-      - "test/telemetry/**"
-      - "test/sglang/sagemaker/**"
-      - "test/sglang/scripts/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-sagemaker.yml b/.github/workflows/pr-sglang-sagemaker.yml
index be2592031a46..596c35c0d4ce 100644
--- a/.github/workflows/pr-sglang-sagemaker.yml
+++ b/.github/workflows/pr-sglang-sagemaker.yml
@@ -1,13 +1,8 @@
 name: PR - SGLang SageMaker
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**sglang**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-ec2-amzn2023.yml b/.github/workflows/pr-vllm-ec2-amzn2023.yml
index 0f314aa6b0d5..f790b145b062 100644
--- a/.github/workflows/pr-vllm-ec2-amzn2023.yml
+++ b/.github/workflows/pr-vllm-ec2-amzn2023.yml
@@ -1,22 +1,8 @@
 name: PR - vLLM EC2 AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/vllm/Dockerfile.amzn2023"
-      - "scripts/vllm/amzn2023/**"
-      - "scripts/vllm/dockerd_entrypoint.sh"
-      - "scripts/vllm/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/vllm-ec2-amzn2023.yml"
-      # - ".github/workflows/pr-vllm-ec2-amzn2023.yml"
-      - ".github/workflows/reusable-vllm-upstream-tests.yml"
-      - ".github/workflows/reusable-vllm-model-tests.yml"
-      # - "test/sanity/**"
-      - "test/telemetry/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-ec2.yml b/.github/workflows/pr-vllm-ec2.yml
index 1bd1a230deb2..23cfaa6b15e6 100644
--- a/.github/workflows/pr-vllm-ec2.yml
+++ b/.github/workflows/pr-vllm-ec2.yml
@@ -1,14 +1,8 @@
 name: PR - vLLM EC2
 
+# Disabled: focusing on omni workflows only
 on:
-  # Direct execution on pull requests
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**vllm**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index df61aa89cc06..3acae56e1294 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -1,14 +1,8 @@
 name: PR - vLLM RayServe
 
+# Disabled: focusing on omni workflows only
 on:
-  # Direct execution on pull requests
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**vllm**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
index 5ba3c3a3d73b..a615a23a4700 100644
--- a/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
@@ -1,24 +1,8 @@
 name: PR - vLLM SageMaker AMZN2023
 
+# Disabled: focusing on omni workflows only
 on:
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "docker/vllm/Dockerfile.amzn2023"
-      - "scripts/vllm/amzn2023/**"
-      - "!scripts/vllm/amzn2023/vllm_model_smoke_test.sh"
-      - "scripts/vllm/dockerd_entrypoint.sh"
-      - "scripts/vllm/sagemaker_entrypoint.sh"
-      - "scripts/common/**"
-      - "scripts/telemetry/**"
-      - ".github/config/vllm-sagemaker-amzn2023.yml"
-      # - ".github/workflows/pr-vllm-sagemaker-amzn2023.yml"
-      - ".github/workflows/reusable-vllm-upstream-tests.yml"
-      - ".github/workflows/reusable-vllm-sagemaker-tests.yml"
-      # - "test/sanity/**"
-      - "test/telemetry/**"
-      - "test/vllm/sagemaker/**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-sagemaker.yml b/.github/workflows/pr-vllm-sagemaker.yml
index 467f3986751f..54d05f11b052 100644
--- a/.github/workflows/pr-vllm-sagemaker.yml
+++ b/.github/workflows/pr-vllm-sagemaker.yml
@@ -1,14 +1,8 @@
 name: PR - vLLM SageMaker
 
+# Disabled: focusing on omni workflows only
 on:
-  # Direct execution on pull requests
-  pull_request:
-    branches: [main]
-    types: [opened, reopened, synchronize]
-    paths:
-      - "**vllm**"
-      - "!docs/**"
-      - "!**amzn2023**"
+  workflow_dispatch: {}
 
 permissions:
   contents: read

From 3dcc0e927966626220aa4992e529795334e446b6 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 10:48:40 -0700
Subject: [PATCH 26/58] feat: add SageMaker serve proxy to route /invocations
 to correct vllm-omni endpoint

- omni_sagemaker_serve.py: FastAPI proxy on port 8080, routes to vllm-omni on 8081
- Supports explicit route via CustomAttributes header (route=/v1/audio/speech)
- Falls back to payload inspection (TTS vs chat vs completion)
- Entrypoint starts vllm-omni in background, proxy in foreground
- Endpoint test uses explicit route for TTS
---
 docker/vllm/Dockerfile.amzn2023               |  1 +
 scripts/vllm/omni_sagemaker_entrypoint.sh     | 10 +-
 scripts/vllm/omni_sagemaker_serve.py          | 91 +++++++++++++++++++
 .../sagemaker/test_sm_omni_endpoint.py        | 43 ++++-----
 4 files changed, 120 insertions(+), 25 deletions(-)
 create mode 100644 scripts/vllm/omni_sagemaker_serve.py

diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 2457be4ab8f6..96d7208358f9 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -468,6 +468,7 @@ RUN dnf upgrade -y --security --releasever latest --setopt=install_weak_deps=Fal
   && ln -sf /opt/venv/bin/python3 /usr/bin/python3
 
 COPY ./scripts/vllm/omni_sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
+COPY ./scripts/vllm/omni_sagemaker_serve.py /usr/local/bin/omni_sagemaker_serve.py
 RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh
 
 ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
\ No newline at end of file
diff --git a/scripts/vllm/omni_sagemaker_entrypoint.sh b/scripts/vllm/omni_sagemaker_entrypoint.sh
index 0d8e8b3cd691..2c22d9838622 100755
--- a/scripts/vllm/omni_sagemaker_entrypoint.sh
+++ b/scripts/vllm/omni_sagemaker_entrypoint.sh
@@ -6,7 +6,8 @@ bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
 PREFIX="SM_VLLM_"
 ARG_PREFIX="--"
 
-ARGS=(--port 8080)
+# vllm-omni listens on 8081; the serve proxy on 8080 (SageMaker's port)
+ARGS=(--port 8081)
 
 # Auto-detect model if SM_VLLM_MODEL is not set
 if [ -z "${SM_VLLM_MODEL}" ]; then
@@ -38,4 +39,9 @@ while IFS='=' read -r key value; do
     fi
 done < <(env | grep "^${PREFIX}")
 
-exec vllm serve --omni "${ARGS[@]}"
+# Start vllm-omni on port 8081 in background
+vllm serve --omni "${ARGS[@]}" &
+VLLM_PID=$!
+
+# Start the SageMaker serve proxy on port 8080 (foreground)
+exec python3 /usr/local/bin/omni_sagemaker_serve.py
diff --git a/scripts/vllm/omni_sagemaker_serve.py b/scripts/vllm/omni_sagemaker_serve.py
new file mode 100644
index 000000000000..92d68fb2e535
--- /dev/null
+++ b/scripts/vllm/omni_sagemaker_serve.py
@@ -0,0 +1,91 @@
+"""SageMaker serving proxy for vLLM-Omni.
+
+Sits on port 8080 (SageMaker's expected port), proxies to vllm-omni on
+port 8081. Routes /invocations to the correct vllm-omni endpoint using:
+
+  1. X-Amzn-SageMaker-Custom-Attributes header with route=<path>
+  2. Payload inspection as fallback:
+     - has "input", no "messages" -> /v1/audio/speech
+     - has "messages"             -> /v1/chat/completions
+     - has "prompt"               -> /v1/completions
+"""
+
+import json
+import logging
+import re
+
+import httpx
+from fastapi import FastAPI, Request, Response
+
+logger = logging.getLogger("omni_serve")
+logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+
+BACKEND = "http://127.0.0.1:8081"
+app = FastAPI()
+
+
+def _parse_route(custom_attrs: str | None) -> str | None:
+    """Extract route=<path> from SageMaker custom attributes header."""
+    if not custom_attrs:
+        return None
+    m = re.search(r"route=(/\S+)", custom_attrs)
+    return m.group(1) if m else None
+
+
+def _infer_route(data: dict) -> str:
+    """Infer the target endpoint from payload content."""
+    if "input" in data and "messages" not in data:
+        return "/v1/audio/speech"
+    if "messages" in data:
+        return "/v1/chat/completions"
+    if "prompt" in data:
+        return "/v1/completions"
+    return "/v1/chat/completions"
+
+
+@app.get("/ping")
+async def ping():
+    """SageMaker health check — proxy to vllm-omni /health."""
+    async with httpx.AsyncClient() as client:
+        try:
+            r = await client.get(f"{BACKEND}/health", timeout=5)
+            return Response(status_code=r.status_code)
+        except httpx.ConnectError:
+            return Response(status_code=503)
+
+
+@app.post("/invocations")
+async def invocations(request: Request):
+    """Route /invocations to the correct vllm-omni endpoint."""
+    body = await request.body()
+
+    # 1. Explicit route from custom attributes header
+    custom_attrs = request.headers.get("X-Amzn-SageMaker-Custom-Attributes")
+    path = _parse_route(custom_attrs)
+
+    # 2. Fallback: infer from payload
+    if not path:
+        try:
+            data = json.loads(body)
+        except json.JSONDecodeError:
+            return Response(content='{"error": "invalid JSON"}', status_code=400,
+                            media_type="application/json")
+        path = _infer_route(data)
+
+    logger.info("Routing /invocations -> %s", path)
+
+    async with httpx.AsyncClient() as client:
+        r = await client.post(
+            f"{BACKEND}{path}",
+            content=body,
+            headers={"Content-Type": "application/json"},
+            timeout=300,
+        )
+
+    return Response(content=r.content, status_code=r.status_code,
+                    media_type=r.headers.get("content-type", "application/json"))
+
+
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info")
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index a7ff3e117a5e..7dcb1ef1fd78 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -97,28 +97,25 @@ def model_endpoint(aws_session, model_package, instance_type):
 @pytest.mark.parametrize("instance_type", ["ml.g4dn.xlarge"], indirect=True)
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
 def test_vllm_omni_tts_endpoint(model_endpoint):
+    """TTS via /invocations routed to /v1/audio/speech by the serve proxy."""
     predictor = model_endpoint
-
-    payload = {
-        "messages": [{"role": "user", "content": "Hello, this is a test."}],
-        "extra_body": {
-            "task_type": "CustomVoice",
-            "language": "English",
-            "speaker": "Ryan",
-        },
-    }
-    LOGGER.info(f"Sending TTS inference request: {pformat(payload)}")
-
-    response = predictor.predict(payload)
-    if isinstance(response, bytes):
-        response = response.decode("utf-8")
-    if isinstance(response, str):
-        try:
-            response = json.loads(response)
-        except json.JSONDecodeError:
-            pass
-
-    assert response, "Model response is empty"
-    LOGGER.info(f"TTS response received: {pformat(response)}")
-    assert "choices" in response, f"No choices in response: {response}"
+    sm_runtime = predictor.sagemaker_session.sagemaker_runtime_client
+
+    payload = json.dumps({
+        "input": "Hello, this is a test of the text to speech system.",
+        "voice": "vivian",
+        "language": "English",
+    })
+
+    LOGGER.info("Sending TTS request via /invocations with route=/v1/audio/speech")
+    response = sm_runtime.invoke_endpoint(
+        EndpointName=predictor.endpoint_name,
+        ContentType="application/json",
+        Body=payload,
+        CustomAttributes="route=/v1/audio/speech",
+    )
+
+    audio_bytes = response["Body"].read()
+    LOGGER.info(f"TTS audio response: {len(audio_bytes)} bytes")
+    assert len(audio_bytes) > 1000, f"TTS output too small: {len(audio_bytes)} bytes"
     LOGGER.info("TTS endpoint test PASSED")

From 2f891a88432362046638862f859a54c6d0f89536 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 12:12:57 -0700
Subject: [PATCH 27/58] feat: SageMaker routing middleware, real entrypoint
 smoke tests, unit tests

- omni_sagemaker_serve.py: ASGI middleware routes /invocations via CustomAttributes header
- Entrypoint uses --middleware flag, single process, reuses vLLM /invocations and /ping
- Both EC2 and SageMaker smoke tests use real entrypoint (no override)
- EC2 tests via /v1/audio/speech and /v1/chat/completions directly
- SageMaker tests via /invocations with route header
- 10 unit tests for middleware routing logic
- Unit test job added to SageMaker PR workflow
---
 .../pr-vllm-omni-sagemaker-amzn2023.yml       |  19 +++
 .../reusable-vllm-omni-model-tests.yml        |  32 +++--
 docker/vllm/Dockerfile.amzn2023               |   1 +
 scripts/vllm/omni_sagemaker_entrypoint.sh     |  12 +-
 scripts/vllm/omni_sagemaker_serve.py          | 107 +++++-----------
 scripts/vllm/test_sagemaker_middleware.py     | 111 +++++++++++++++++
 .../sagemaker/test_sm_omni_endpoint.py        |  13 +-
 .../scripts/vllm_omni_ec2_smoke_test.sh       | 115 ++++++++++--------
 .../scripts/vllm_omni_sagemaker_smoke_test.sh |  47 +++----
 9 files changed, 277 insertions(+), 180 deletions(-)
 create mode 100644 scripts/vllm/test_sagemaker_middleware.py

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 33468508b85e..4a4a029c1ebd 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -40,6 +40,25 @@ jobs:
       - name: Run permission gate (from base)
         uses: ./.github/actions/pr-permission-gate
 
+  unit-test:
+    needs: [gatekeeper]
+    if: success()
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Setup python
+        uses: actions/setup-python@v6
+        with:
+          python-version: "3.12"
+
+      - name: Install dependencies
+        run: pip install starlette pytest
+
+      - name: Run middleware unit tests
+        run: PYTHONPATH=scripts/vllm pytest scripts/vllm/test_sagemaker_middleware.py -v
+
   load-config:
     needs: [gatekeeper]
     if: success()
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 88cda82c6845..e9eed2defe6b 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -76,23 +76,41 @@ jobs:
           s3-path: ${{ matrix.model.s3_path }}
           model-name: ${{ matrix.model.name }}
 
-      - name: Start container
+      # EC2: entrypoint accepts CLI args directly
+      - name: Start container (EC2)
+        if: inputs.customer-type == 'ec2'
         run: |
           docker pull ${{ inputs.image-uri }}
-          CONTAINER_ID=$(docker run -d -it --gpus all --shm-size=4g \
-            --entrypoint /bin/bash \
+          CONTAINER_ID=$(docker run -d --gpus all --shm-size=4g \
             -v /dlc-models:/models \
+            -p 8080:8080 \
+            ${{ inputs.image-uri }} \
+            --model /models/${{ matrix.model.name }} \
+            --stage-init-timeout 600)
+          echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
+
+      # SageMaker: entrypoint reads SM_VLLM_* env vars
+      - name: Start container (SageMaker)
+        if: inputs.customer-type == 'sagemaker'
+        run: |
+          docker pull ${{ inputs.image-uri }}
+          CONTAINER_ID=$(docker run -d --gpus all --shm-size=4g \
+            -v /dlc-models:/models \
+            -e SM_VLLM_MODEL=/models/${{ matrix.model.name }} \
+            -e SM_VLLM_STAGE_INIT_TIMEOUT=600 \
+            -p 8080:8080 \
             ${{ inputs.image-uri }})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
 
-      - name: Copy test scripts into container
+      - name: Copy test scripts
         run: |
-          docker cp test/vllm-omni/scripts/. ${CONTAINER_ID}:/models/
+          docker cp test/vllm-omni/scripts/vllm_omni_${{ inputs.customer-type }}_smoke_test.sh \
+            ${CONTAINER_ID}:/tmp/smoke_test.sh
 
       - name: Run smoke test
         run: |
-          docker exec ${CONTAINER_ID} bash /models/vllm_omni_${{ inputs.customer-type }}_smoke_test.sh \
-            "/models/${{ matrix.model.name }}" ${{ matrix.model.type }}
+          docker exec ${CONTAINER_ID} bash /tmp/smoke_test.sh \
+            "${{ matrix.model.type }}"
 
       - name: Cleanup
         if: always()
diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index 96d7208358f9..e44313ae72b7 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -469,6 +469,7 @@ RUN dnf upgrade -y --security --releasever latest --setopt=install_weak_deps=Fal
 
 COPY ./scripts/vllm/omni_sagemaker_entrypoint.sh /usr/local/bin/sagemaker_entrypoint.sh
 COPY ./scripts/vllm/omni_sagemaker_serve.py /usr/local/bin/omni_sagemaker_serve.py
+ENV PYTHONPATH="/usr/local/bin:${PYTHONPATH}"
 RUN chmod +x /usr/local/bin/sagemaker_entrypoint.sh
 
 ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
\ No newline at end of file
diff --git a/scripts/vllm/omni_sagemaker_entrypoint.sh b/scripts/vllm/omni_sagemaker_entrypoint.sh
index 2c22d9838622..94b15f0a4091 100755
--- a/scripts/vllm/omni_sagemaker_entrypoint.sh
+++ b/scripts/vllm/omni_sagemaker_entrypoint.sh
@@ -6,8 +6,7 @@ bash /usr/local/bin/bash_telemetry.sh >/dev/null 2>&1 || true
 PREFIX="SM_VLLM_"
 ARG_PREFIX="--"
 
-# vllm-omni listens on 8081; the serve proxy on 8080 (SageMaker's port)
-ARGS=(--port 8081)
+ARGS=(--port 8080)
 
 # Auto-detect model if SM_VLLM_MODEL is not set
 if [ -z "${SM_VLLM_MODEL}" ]; then
@@ -39,9 +38,8 @@ while IFS='=' read -r key value; do
     fi
 done < <(env | grep "^${PREFIX}")
 
-# Start vllm-omni on port 8081 in background
-vllm serve --omni "${ARGS[@]}" &
-VLLM_PID=$!
+# Add SageMaker routing middleware to dispatch /invocations to the correct
+# vllm-omni endpoint (e.g. /v1/audio/speech for TTS)
+ARGS+=(--middleware omni_sagemaker_serve.SageMakerRouteMiddleware)
 
-# Start the SageMaker serve proxy on port 8080 (foreground)
-exec python3 /usr/local/bin/omni_sagemaker_serve.py
+exec vllm serve --omni "${ARGS[@]}"
diff --git a/scripts/vllm/omni_sagemaker_serve.py b/scripts/vllm/omni_sagemaker_serve.py
index 92d68fb2e535..7db1bb80aeaf 100644
--- a/scripts/vllm/omni_sagemaker_serve.py
+++ b/scripts/vllm/omni_sagemaker_serve.py
@@ -1,91 +1,50 @@
-"""SageMaker serving proxy for vLLM-Omni.
+"""SageMaker routing middleware for vLLM-Omni.
 
-Sits on port 8080 (SageMaker's expected port), proxies to vllm-omni on
-port 8081. Routes /invocations to the correct vllm-omni endpoint using:
+Routes /invocations requests based on the X-Amzn-SageMaker-Custom-Attributes
+header. Clients specify the target endpoint via route=<path>, e.g.:
 
-  1. X-Amzn-SageMaker-Custom-Attributes header with route=<path>
-  2. Payload inspection as fallback:
-     - has "input", no "messages" -> /v1/audio/speech
-     - has "messages"             -> /v1/chat/completions
-     - has "prompt"               -> /v1/completions
+  CustomAttributes="route=/v1/audio/speech"
+
+If no route is specified, falls through to vLLM's built-in /invocations
+handler (chat/completion/embed).
+
+Usage: vllm serve --omni --middleware omni_sagemaker_serve.SageMakerRouteMiddleware
 """
 
-import json
 import logging
 import re
 
-import httpx
-from fastapi import FastAPI, Request, Response
-
-logger = logging.getLogger("omni_serve")
-logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
+from starlette.types import ASGIApp, Receive, Scope, Send
 
-BACKEND = "http://127.0.0.1:8081"
-app = FastAPI()
+logger = logging.getLogger("omni_sagemaker")
 
 
-def _parse_route(custom_attrs: str | None) -> str | None:
+def _parse_route(headers: list[tuple[bytes, bytes]]) -> str | None:
     """Extract route=<path> from SageMaker custom attributes header."""
-    if not custom_attrs:
-        return None
-    m = re.search(r"route=(/\S+)", custom_attrs)
-    return m.group(1) if m else None
-
-
-def _infer_route(data: dict) -> str:
-    """Infer the target endpoint from payload content."""
-    if "input" in data and "messages" not in data:
-        return "/v1/audio/speech"
-    if "messages" in data:
-        return "/v1/chat/completions"
-    if "prompt" in data:
-        return "/v1/completions"
-    return "/v1/chat/completions"
-
-
-@app.get("/ping")
-async def ping():
-    """SageMaker health check — proxy to vllm-omni /health."""
-    async with httpx.AsyncClient() as client:
-        try:
-            r = await client.get(f"{BACKEND}/health", timeout=5)
-            return Response(status_code=r.status_code)
-        except httpx.ConnectError:
-            return Response(status_code=503)
-
-
-@app.post("/invocations")
-async def invocations(request: Request):
-    """Route /invocations to the correct vllm-omni endpoint."""
-    body = await request.body()
-
-    # 1. Explicit route from custom attributes header
-    custom_attrs = request.headers.get("X-Amzn-SageMaker-Custom-Attributes")
-    path = _parse_route(custom_attrs)
+    for key, value in headers:
+        if key.lower() == b"x-amzn-sagemaker-custom-attributes":
+            m = re.search(r"route=(/[^\s,]+)", value.decode())
+            return m.group(1) if m else None
+    return None
 
-    # 2. Fallback: infer from payload
-    if not path:
-        try:
-            data = json.loads(body)
-        except json.JSONDecodeError:
-            return Response(content='{"error": "invalid JSON"}', status_code=400,
-                            media_type="application/json")
-        path = _infer_route(data)
 
-    logger.info("Routing /invocations -> %s", path)
+class SageMakerRouteMiddleware:
+    """ASGI middleware that reroutes /invocations based on CustomAttributes.
 
-    async with httpx.AsyncClient() as client:
-        r = await client.post(
-            f"{BACKEND}{path}",
-            content=body,
-            headers={"Content-Type": "application/json"},
-            timeout=300,
-        )
+    Explicit route via header -> rewrites path to that endpoint.
+    No route specified -> falls through to vLLM's built-in /invocations handler.
+    """
 
-    return Response(content=r.content, status_code=r.status_code,
-                    media_type=r.headers.get("content-type", "application/json"))
+    def __init__(self, app: ASGIApp) -> None:
+        self.app = app
 
+    async def __call__(self, scope: Scope, receive: Receive, send: Send) -> None:
+        if scope["type"] == "http" and scope["path"] == "/invocations":
+            route = _parse_route(scope.get("headers", []))
+            if route:
+                logger.info("Rerouting /invocations -> %s", route)
+                scope = dict(scope)
+                scope["path"] = route
+                scope["raw_path"] = route.encode()
 
-if __name__ == "__main__":
-    import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8080, log_level="info")
+        await self.app(scope, receive, send)
diff --git a/scripts/vllm/test_sagemaker_middleware.py b/scripts/vllm/test_sagemaker_middleware.py
new file mode 100644
index 000000000000..d2c8eb931cc5
--- /dev/null
+++ b/scripts/vllm/test_sagemaker_middleware.py
@@ -0,0 +1,111 @@
+"""Unit tests for SageMaker routing middleware."""
+
+import asyncio
+
+import pytest
+from omni_sagemaker_serve import SageMakerRouteMiddleware, _parse_route
+
+
+class TestParseRoute:
+    def test_extracts_route(self):
+        headers = [(b"x-amzn-sagemaker-custom-attributes", b"route=/v1/audio/speech")]
+        assert _parse_route(headers) == "/v1/audio/speech"
+
+    def test_extracts_route_with_extra_attrs(self):
+        headers = [(b"x-amzn-sagemaker-custom-attributes", b"foo=bar,route=/v1/audio/speech,baz=1")]
+        assert _parse_route(headers) == "/v1/audio/speech"
+
+    def test_no_route(self):
+        headers = [(b"x-amzn-sagemaker-custom-attributes", b"foo=bar")]
+        assert _parse_route(headers) is None
+
+    def test_no_header(self):
+        assert _parse_route([]) is None
+
+    def test_case_insensitive_header(self):
+        headers = [(b"X-Amzn-SageMaker-Custom-Attributes", b"route=/v1/chat/completions")]
+        assert _parse_route(headers) == "/v1/chat/completions"
+
+
+class TestMiddleware:
+    @pytest.fixture
+    def captured(self):
+        return {}
+
+    @pytest.fixture
+    def app(self, captured):
+        async def inner(scope, receive, send):
+            captured["path"] = scope["path"]
+
+        return inner
+
+    @pytest.fixture
+    def middleware(self, app):
+        return SageMakerRouteMiddleware(app)
+
+    def _make_scope(self, path="/invocations", headers=None):
+        return {
+            "type": "http",
+            "path": path,
+            "raw_path": path.encode(),
+            "headers": headers or [],
+        }
+
+    def _run(self, coro):
+        return asyncio.get_event_loop().run_until_complete(coro)
+
+    def test_rewrites_with_route_header(self, middleware, captured):
+        scope = self._make_scope(
+            headers=[
+                (b"x-amzn-sagemaker-custom-attributes", b"route=/v1/audio/speech"),
+            ]
+        )
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/v1/audio/speech"
+
+    def test_falls_through_without_route(self, middleware, captured):
+        scope = self._make_scope()
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/invocations"
+
+    def test_ignores_non_invocations(self, middleware, captured):
+        scope = self._make_scope(path="/health")
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/health"
+
+    def test_ignores_non_http(self, middleware, captured):
+        scope = {"type": "websocket", "path": "/invocations"}
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/invocations"
+
+    def test_rewrites_raw_path(self, middleware, captured):
+        scope = self._make_scope(
+            headers=[
+                (b"x-amzn-sagemaker-custom-attributes", b"route=/v1/completions"),
+            ]
+        )
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/v1/completions"
+
+    def test_adapter_attrs_without_route_falls_through(self, middleware, captured):
+        """Adapter attributes (no route=) should fall through to /invocations."""
+        scope = self._make_scope(
+            headers=[
+                (b"x-amzn-sagemaker-custom-attributes", b"adapter=my-lora-adapter"),
+            ]
+        )
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/invocations"
+
+    def test_adapter_attrs_with_route_rewrites(self, middleware, captured):
+        """Both adapter and route attrs — route takes effect, adapter preserved in headers."""
+        scope = self._make_scope(
+            headers=[
+                (
+                    b"x-amzn-sagemaker-custom-attributes",
+                    b"adapter=my-lora,route=/v1/audio/speech",
+                ),
+            ]
+        )
+        self._run(middleware(scope, None, None))
+        assert captured["path"] == "/v1/audio/speech"
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 7dcb1ef1fd78..b8737db13500 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -2,7 +2,6 @@
 
 import json
 import logging
-from pprint import pformat
 
 import pytest
 from sagemaker.model import Model
@@ -101,11 +100,13 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
     predictor = model_endpoint
     sm_runtime = predictor.sagemaker_session.sagemaker_runtime_client
 
-    payload = json.dumps({
-        "input": "Hello, this is a test of the text to speech system.",
-        "voice": "vivian",
-        "language": "English",
-    })
+    payload = json.dumps(
+        {
+            "input": "Hello, this is a test of the text to speech system.",
+            "voice": "vivian",
+            "language": "English",
+        }
+    )
 
     LOGGER.info("Sending TTS request via /invocations with route=/v1/audio/speech")
     response = sm_runtime.invoke_endpoint(
diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
index c3c7f8363ed3..929f5fac3ba4 100755
--- a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -1,67 +1,76 @@
 #!/bin/bash
 # Smoke test for vLLM-Omni EC2 images
-# Validates that omni models can load and produce output
+# The container is started with the real EC2 entrypoint.
+# This script waits for readiness and tests inference via the OpenAI-compatible API.
 set -eux
 
-nvidia-smi
+MODEL_TYPE="${1:?Usage: $0 <model-type>}"
+PORT=8080
 
-MODEL_PATH="${1:?Usage: $0 <model-path> <model-type>}"
-MODEL_TYPE="${2:?Usage: $0 <model-path> <model-type>}"
+echo "=== Testing vLLM-Omni EC2: ${MODEL_TYPE} ==="
 
-echo "=== Testing vLLM-Omni: ${MODEL_TYPE} model at ${MODEL_PATH} ==="
+# Wait for server (entrypoint starts it)
+echo "Waiting for server..."
+for i in $(seq 1 300); do
+    if curl -s http://localhost:${PORT}/health >/dev/null 2>&1; then
+        echo "Server ready after ${i}s"
+        break
+    fi
+    sleep 1
+done
 
-if [ "${MODEL_TYPE}" = "tts" ]; then
-    # Qwen3-TTS offline inference test
-    python3 -c "
-import os
-os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
-from vllm_omni.entrypoints.omni import Omni
+curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1; }
 
-omni = Omni(model='${MODEL_PATH}', stage_init_timeout=600)
-additional_information = {
-    'task_type': ['CustomVoice'],
-    'text': ['Hello, this is a test of the text to speech system.'],
-    'language': ['English'],
-    'speaker': ['Ryan'],
-    'instruct': [''],
-    'max_new_tokens': [2048],
-}
-inputs = {
-    'prompt_token_ids': [0] * 512,
-    'additional_information': additional_information,
-}
-outputs = omni.generate([inputs])
-for out in outputs:
-    mm = out.request_output.outputs[0].multimodal_output
-    assert 'audio' in mm, 'No audio in output'
-    assert mm['sr'], 'No sample rate in output'
-    print(f'Audio generated: sr={mm[\"sr\"]}, chunks={len(mm[\"audio\"])}')
-print('TTS smoke test PASSED')
+curl -sf http://localhost:${PORT}/v1/models | python3 -c "
+import sys, json
+data = json.load(sys.stdin)
+assert len(data['data']) > 0, 'No models listed'
+print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
 "
 
-elif [ "${MODEL_TYPE}" = "diffusion" ]; then
-    # FLUX.2-klein image generation test
-    python3 -c "
-import os
-os.environ['VLLM_WORKER_MULTIPROC_METHOD'] = 'spawn'
-from vllm_omni.entrypoints.omni import Omni
+if [ "${MODEL_TYPE}" = "tts" ]; then
+    curl -sf -X POST http://localhost:${PORT}/v1/audio/speech \
+      -H "Content-Type: application/json" \
+      -d '{
+        "input": "Hello, how are you?",
+        "voice": "vivian",
+        "language": "English"
+      }' --output /tmp/tts_output.wav
+    FILE_SIZE=$(stat -c%s /tmp/tts_output.wav 2>/dev/null || stat -f%z /tmp/tts_output.wav)
+    echo "TTS output file size: ${FILE_SIZE} bytes"
+    [ "${FILE_SIZE}" -gt 1000 ] || { echo "FAIL: TTS output too small"; exit 1; }
+    echo "TTS serving test PASSED"
 
-omni = Omni(model='${MODEL_PATH}', stage_init_timeout=600)
-prompt = 'a red apple on a white table'
-outputs = omni.generate(prompt)
-images = outputs[0].request_output.images
-assert len(images) > 0, 'No images generated'
-images[0].save('/tmp/omni_test_output.png')
-assert os.path.exists('/tmp/omni_test_output.png'), 'Output image not saved'
-size = os.path.getsize('/tmp/omni_test_output.png')
-assert size > 1000, f'Output image too small: {size} bytes'
-print(f'Image generated: {images[0].size}, file size: {size} bytes')
-print('Diffusion smoke test PASSED')
+elif [ "${MODEL_TYPE}" = "diffusion" ]; then
+    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
+      -H "Content-Type: application/json" \
+      -d '{
+        "messages": [{"role": "user", "content": "a red apple on a white table"}],
+        "extra_body": {
+          "height": 512,
+          "width": 512,
+          "num_inference_steps": 4,
+          "guidance_scale": 3.5,
+          "seed": 42
+        }
+      }')
+    echo "${RESPONSE}" | python3 -c "
+import sys, json, base64
+data = json.load(sys.stdin)
+assert 'choices' in data, f'No choices in response: {str(data)[:200]}'
+content = data['choices'][0]['message']['content']
+if isinstance(content, list):
+    img_item = next(c for c in content if c.get('type') == 'image_url')
+    url = img_item['image_url']['url']
+else:
+    url = str(content)
+assert 'base64,' in url, 'No base64 image in response'
+img_b64 = url.split('base64,')[1]
+img_bytes = base64.b64decode(img_b64)
+print(f'Image generated: {len(img_bytes)} bytes')
+assert len(img_bytes) > 1000, f'Image too small: {len(img_bytes)} bytes'
+print('Diffusion serving test PASSED')
 "
-
-else
-    echo "ERROR: Unknown model type: ${MODEL_TYPE}"
-    exit 1
 fi
 
-echo "=== vLLM-Omni ${MODEL_TYPE} test PASSED ==="
+echo "=== vLLM-Omni EC2 ${MODEL_TYPE} test PASSED ==="
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index 839347a98da5..c9c2997cd311 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -1,45 +1,28 @@
 #!/bin/bash
 # Smoke test for vLLM-Omni SageMaker images
-# Validates the server starts with --omni and responds to requests
+# The container is started with the real SageMaker entrypoint (including
+# the routing middleware). This script only waits for readiness and tests
+# inference via /invocations and /ping — the same path SageMaker uses.
 set -eux
 
-nvidia-smi
+MODEL_TYPE="${1:?Usage: $0 <model-type>}"
+PORT=8080
 
-MODEL_PATH="${1:?Usage: $0 <model-path> <model-type>}"
-MODEL_TYPE="${2:?Usage: $0 <model-path> <model-type>}"
-PORT=8091
+echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} ==="
 
-echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} at ${MODEL_PATH} ==="
-
-# Start server in background
-vllm serve --omni --model "${MODEL_PATH}" --port ${PORT} --stage-init-timeout 600 &
-SERVER_PID=$!
-
-cleanup() {
-    echo "Stopping server (PID ${SERVER_PID})..."
-    kill ${SERVER_PID} 2>/dev/null || true
-    wait ${SERVER_PID} 2>/dev/null || true
-}
-trap cleanup EXIT
-
-# Wait for server to be ready
-echo "Waiting for server to start..."
+# Wait for server (entrypoint starts it)
+echo "Waiting for server..."
 for i in $(seq 1 300); do
-    if curl -s http://localhost:${PORT}/health >/dev/null 2>&1; then
+    if curl -s http://localhost:${PORT}/ping >/dev/null 2>&1; then
         echo "Server ready after ${i}s"
         break
     fi
-    if ! kill -0 ${SERVER_PID} 2>/dev/null; then
-        echo "ERROR: Server process died"
-        exit 1
-    fi
     sleep 1
 done
 
-# Verify health endpoint
-curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1; }
+curl -sf http://localhost:${PORT}/ping || { echo "Ping failed"; exit 1; }
+curl -sf http://localhost:${PORT}/health || { echo "Health failed"; exit 1; }
 
-# Verify models endpoint
 curl -sf http://localhost:${PORT}/v1/models | python3 -c "
 import sys, json
 data = json.load(sys.stdin)
@@ -48,9 +31,9 @@ print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
 "
 
 if [ "${MODEL_TYPE}" = "tts" ]; then
-    # TTS via /v1/audio/speech API (OpenAI-compatible speech endpoint)
-    curl -sf -X POST http://localhost:${PORT}/v1/audio/speech \
+    curl -sf -X POST http://localhost:${PORT}/invocations \
       -H "Content-Type: application/json" \
+      -H "X-Amzn-SageMaker-Custom-Attributes: route=/v1/audio/speech" \
       -d '{
         "input": "Hello, how are you?",
         "voice": "vivian",
@@ -62,8 +45,7 @@ if [ "${MODEL_TYPE}" = "tts" ]; then
     echo "TTS serving test PASSED"
 
 elif [ "${MODEL_TYPE}" = "diffusion" ]; then
-    # Image generation via chat completions API
-    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
+    RESPONSE=$(curl -sf http://localhost:${PORT}/invocations \
       -H "Content-Type: application/json" \
       -d '{
         "messages": [{"role": "user", "content": "a red apple on a white table"}],
@@ -80,7 +62,6 @@ import sys, json, base64
 data = json.load(sys.stdin)
 assert 'choices' in data, f'No choices in response: {str(data)[:200]}'
 content = data['choices'][0]['message']['content']
-# Extract image and validate
 if isinstance(content, list):
     img_item = next(c for c in content if c.get('type') == 'image_url')
     url = img_item['image_url']['url']

From 6f6421ffda9d993564b1b0347a4f147990a71564 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 15:56:08 -0700
Subject: [PATCH 28/58] feat: pre-built runtime base to skip vLLM compile in PR
 builds

- Dockerfile: runtime-build stage + conditional FROM via RUNTIME_BASE arg
- build-runtime job checks ECR, builds only if missing, shared by EC2+SM workflows
- build-image action passes RUNTIME_BASE to skip compile stages
- Runtime base pushed to ECR: vllm-runtime-v0.18.0-cu12.9.1-py3.12 (~20min build -> ~2min pull)
---
 .github/actions/build-image/action.yml        |  5 ++
 .github/scripts/build_image.sh                |  8 ++++
 .../workflows/pr-vllm-omni-ec2-amzn2023.yml   | 46 ++++++++++++++++++-
 .../pr-vllm-omni-sagemaker-amzn2023.yml       | 45 +++++++++++++++++-
 docker/vllm/Dockerfile.amzn2023               | 12 ++++-
 5 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/.github/actions/build-image/action.yml b/.github/actions/build-image/action.yml
index 62e3374be9bd..027177f5e485 100644
--- a/.github/actions/build-image/action.yml
+++ b/.github/actions/build-image/action.yml
@@ -69,6 +69,10 @@ inputs:
     description: 'Transformers library version (e.g., 4.28.1)'
     required: false
     default: ''
+  runtime-base:
+    description: 'Pre-built runtime base image URI. When set, skips compile stages.'
+    required: false
+    default: ''
 
 outputs:
   image-uri:
@@ -120,3 +124,4 @@ runs:
         INFERENCE_TOOLKIT_VERSION: ${{ inputs.inference-toolkit-version }}
         TORCHSERVE_VERSION: ${{ inputs.torchserve-version }}
         TRANSFORMERS_VERSION: ${{ inputs.transformers-version }}
+        RUNTIME_BASE: ${{ inputs.runtime-base }}
diff --git a/.github/scripts/build_image.sh b/.github/scripts/build_image.sh
index 224712f97e7e..4aca4dfc3dbd 100755
--- a/.github/scripts/build_image.sh
+++ b/.github/scripts/build_image.sh
@@ -26,6 +26,7 @@ CUSTOMER_TYPE="${CUSTOMER_TYPE:-}"
 INFERENCE_TOOLKIT_VERSION="${INFERENCE_TOOLKIT_VERSION:-}"
 TORCHSERVE_VERSION="${TORCHSERVE_VERSION:-}"
 TRANSFORMERS_VERSION="${TRANSFORMERS_VERSION:-}"
+RUNTIME_BASE="${RUNTIME_BASE:-}"
 
 # Resolve image URI
 CI_IMAGE_URI="${AWS_ACCOUNT_ID}.dkr.ecr.${AWS_REGION}.amazonaws.com/ci:${TAG_PR}"
@@ -67,6 +68,13 @@ BUILD_CMD="docker buildx build --progress plain \
   --build-arg FRAMEWORK=\"${FRAMEWORK}\" \
   --build-arg FRAMEWORK_VERSION=\"${FRAMEWORK_VERSION}\""
 
+# Use pre-built runtime base if available (skips compile stages)
+if [[ -n "${RUNTIME_BASE}" ]]; then
+  echo "Using pre-built runtime base: ${RUNTIME_BASE}"
+  BUILD_CMD="${BUILD_CMD} \
+  --build-arg RUNTIME_BASE=\"${RUNTIME_BASE}\""
+fi
+
 # Add SageMaker labels if customer-type is 'sagemaker'
 if [[ "${CUSTOMER_TYPE}" == "sagemaker" ]]; then
   BUILD_CMD="${BUILD_CMD} \
diff --git a/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
index 44952eaf095b..924ddf62fe80 100644
--- a/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-ec2-amzn2023.yml
@@ -122,7 +122,7 @@ jobs:
             telemetry-test-change:
               - "test/telemetry/**"
 
-  build-image:
+  build-runtime:
     needs: [check-changes, load-config]
     if: needs.check-changes.outputs.build-change == 'true'
     runs-on:
@@ -130,6 +130,49 @@ jobs:
         fleet:x86-vllm-build-runner
         buildspec-override:true
     timeout-minutes: 720
+    outputs:
+      runtime-base: ${{ steps.check.outputs.image }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Setup buildkitd
+        run: .github/scripts/buildkitd.sh
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: Check or build runtime base
+        id: check
+        run: |
+          TAG="vllm-runtime-v${{ needs.load-config.outputs.framework-version }}-${{ needs.load-config.outputs.cuda-version }}-${{ needs.load-config.outputs.python-version }}"
+          IMAGE="${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:${TAG}"
+          echo "image=${IMAGE}" >> $GITHUB_OUTPUT
+
+          # Skip build if image already exists
+          if docker manifest inspect "${IMAGE}" >/dev/null 2>&1; then
+            echo "Runtime base exists: ${IMAGE}"
+            exit 0
+          fi
+
+          echo "Building runtime base: ${IMAGE}"
+          docker buildx build --progress plain \
+            --target runtime-build \
+            --tag "${IMAGE}" \
+            --push \
+            -f docker/vllm/Dockerfile.amzn2023 .
+
+  build-image:
+    needs: [check-changes, load-config, build-runtime]
+    if: needs.check-changes.outputs.build-change == 'true'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-vllm-build-runner
+        buildspec-override:true
+    timeout-minutes: 720
     concurrency:
       group: ${{ github.workflow }}-build-image-${{ github.event.pull_request.number }}
       cancel-in-progress: true
@@ -159,6 +202,7 @@ jobs:
           os-version: ${{ needs.load-config.outputs.os-version }}
           contributor: ${{ needs.load-config.outputs.contributor }}
           customer-type: ${{ needs.load-config.outputs.customer-type }}
+          runtime-base: ${{ needs.build-runtime.outputs.runtime-base }}
 
   sanity-test:
     needs: [check-changes, build-image, load-config]
diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 4a4a029c1ebd..8bd87418ec88 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -141,7 +141,7 @@ jobs:
             telemetry-test-change:
               - "test/telemetry/**"
 
-  build-image:
+  build-runtime:
     needs: [check-changes, load-config]
     if: needs.check-changes.outputs.build-change == 'true'
     runs-on:
@@ -149,6 +149,48 @@ jobs:
         fleet:x86-vllm-build-runner
         buildspec-override:true
     timeout-minutes: 720
+    outputs:
+      runtime-base: ${{ steps.check.outputs.image }}
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v5
+
+      - name: Setup buildkitd
+        run: .github/scripts/buildkitd.sh
+
+      - name: ECR login
+        uses: ./.github/actions/ecr-authenticate
+        with:
+          aws-account-id: ${{ vars.CI_AWS_ACCOUNT_ID }}
+          aws-region: ${{ vars.AWS_REGION }}
+
+      - name: Check or build runtime base
+        id: check
+        run: |
+          TAG="vllm-runtime-v${{ needs.load-config.outputs.framework-version }}-${{ needs.load-config.outputs.cuda-version }}-${{ needs.load-config.outputs.python-version }}"
+          IMAGE="${{ vars.CI_AWS_ACCOUNT_ID }}.dkr.ecr.${{ vars.AWS_REGION }}.amazonaws.com/ci:${TAG}"
+          echo "image=${IMAGE}" >> $GITHUB_OUTPUT
+
+          if docker manifest inspect "${IMAGE}" >/dev/null 2>&1; then
+            echo "Runtime base exists: ${IMAGE}"
+            exit 0
+          fi
+
+          echo "Building runtime base: ${IMAGE}"
+          docker buildx build --progress plain \
+            --target runtime-build \
+            --tag "${IMAGE}" \
+            --push \
+            -f docker/vllm/Dockerfile.amzn2023 .
+
+  build-image:
+    needs: [check-changes, load-config, build-runtime]
+    if: needs.check-changes.outputs.build-change == 'true'
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:x86-vllm-build-runner
+        buildspec-override:true
+    timeout-minutes: 720
     concurrency:
       group: ${{ github.workflow }}-build-image-${{ github.event.pull_request.number }}
       cancel-in-progress: true
@@ -178,6 +220,7 @@ jobs:
           os-version: ${{ needs.load-config.outputs.os-version }}
           contributor: ${{ needs.load-config.outputs.contributor }}
           customer-type: ${{ needs.load-config.outputs.customer-type }}
+          runtime-base: ${{ needs.build-runtime.outputs.runtime-base }}
 
   sanity-test:
     needs: [check-changes, build-image, load-config]
diff --git a/docker/vllm/Dockerfile.amzn2023 b/docker/vllm/Dockerfile.amzn2023
index e44313ae72b7..410e721df89d 100644
--- a/docker/vllm/Dockerfile.amzn2023
+++ b/docker/vllm/Dockerfile.amzn2023
@@ -1,6 +1,11 @@
 ARG CUDA_VERSION=12.9.1
 ARG PYTHON_VERSION=3.12
 
+# Pre-built runtime image. When set, skips the compile stages (source/build/deps)
+# and uses this image directly as the runtime base. Build it with:
+#   docker buildx build --target runtime --tag <ecr>/vllm-runtime:<tag> --push ...
+ARG RUNTIME_BASE=""
+
 # =============================================================================
 # STAGE 0: source — clone vLLM and apply patches
 # =============================================================================
@@ -201,8 +206,9 @@ RUN PATH="/opt/venv/bin:${PATH}" bash /tmp/setup_oss_compliance.sh python${PYTHO
 
 # =============================================================================
 # STAGE 3: runtime — minimal image with clean venv
+# Built from scratch (compile path) or pulled from pre-built RUNTIME_BASE.
 # =============================================================================
-FROM nvidia/cuda:${CUDA_VERSION}-runtime-amzn2023 AS runtime
+FROM nvidia/cuda:${CUDA_VERSION}-runtime-amzn2023 AS runtime-build
 
 ARG CUDA_VERSION
 ARG PYTHON_VERSION=3.12
@@ -238,6 +244,10 @@ ENV LD_LIBRARY_PATH=/usr/local/nvidia/lib64:/usr/local/cuda/lib64:${LD_LIBRARY_P
 ENV VLLM_USAGE_SOURCE=production-docker-image
 ENV HF_HUB_ENABLE_HF_TRANSFER=1
 
+# Pre-built runtime (fast path) — used when RUNTIME_BASE is set
+ARG RUNTIME_BASE
+FROM ${RUNTIME_BASE:-runtime-build} AS runtime
+
 # =============================================================================
 # STAGE 4: DLC overlay — Amazon DLC customizations on top of vLLM runtime
 # =============================================================================

From eb8e6b741cbf9aa6e7910727b006d3c494d88212 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 16:46:28 -0700
Subject: [PATCH 29/58] feat: per-model test config with
 route/request/validate, g5 for endpoint test

- Model config defines route, test_request, and validate per model
- Smoke tests are generic: send request to route, validate response
- EC2 uses direct API, SageMaker uses /invocations with route header
- Endpoint test uses ml.g5.xlarge (A10G) instead of g4dn (T4)
- Diffusion uses /v1/images/generations endpoint
---
 .github/config/vllm-omni-model-tests.yml      | 11 ++-
 .../reusable-vllm-omni-model-tests.yml        |  4 +-
 .../sagemaker/test_sm_omni_endpoint.py        |  2 +-
 .../scripts/vllm_omni_ec2_smoke_test.sh       | 92 +++++++-----------
 .../scripts/vllm_omni_sagemaker_smoke_test.sh | 97 ++++++++-----------
 5 files changed, 87 insertions(+), 119 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 32242d2c1e00..9dc5b13d97d6 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,6 +1,9 @@
 # vLLM-Omni Model Test Configuration
 # Tests for omni-modality models (TTS, image generation)
 # Models are pre-cached in S3 as tar.gz archives
+#
+# Each model defines its test_request (sent to /invocations via middleware)
+# and the route for the SageMaker routing middleware.
 
 s3_prefix: "s3://dlc-cicd-models/omni-models"
 
@@ -8,12 +11,16 @@ smoke-test:
   codebuild-fleet:
     - name: "qwen3-tts-1.7b-customvoice"
       s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
-      type: tts
       fleet: "x86-g6xl-runner"
       extra_args: ""
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
+      validate: "binary_size_gt:1000"
 
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
-      type: diffusion
       fleet: "x86-g6xl-runner"
       extra_args: ""
+      route: "/v1/images/generations"
+      test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
+      validate: "json_field:data[0].b64_json"
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index e9eed2defe6b..6c30807c978b 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -110,7 +110,9 @@ jobs:
       - name: Run smoke test
         run: |
           docker exec ${CONTAINER_ID} bash /tmp/smoke_test.sh \
-            "${{ matrix.model.type }}"
+            "${{ matrix.model.route }}" \
+            '${{ matrix.model.test_request }}' \
+            "${{ matrix.model.validate }}"
 
       - name: Cleanup
         if: always()
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index b8737db13500..535e44f5b7a1 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -93,7 +93,7 @@ def model_endpoint(aws_session, model_package, instance_type):
         sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
 
 
-@pytest.mark.parametrize("instance_type", ["ml.g4dn.xlarge"], indirect=True)
+@pytest.mark.parametrize("instance_type", ["ml.g5.xlarge"], indirect=True)
 @pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
 def test_vllm_omni_tts_endpoint(model_endpoint):
     """TTS via /invocations routed to /v1/audio/speech by the serve proxy."""
diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
index 929f5fac3ba4..3860b3595a99 100755
--- a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -1,16 +1,19 @@
 #!/bin/bash
 # Smoke test for vLLM-Omni EC2 images
-# The container is started with the real EC2 entrypoint.
-# This script waits for readiness and tests inference via the OpenAI-compatible API.
+# Uses the OpenAI-compatible API directly (no /invocations middleware).
+# Request payload and validation are passed as arguments from the model config.
 set -eux
 
-MODEL_TYPE="${1:?Usage: $0 <model-type>}"
+ROUTE="${1:?Usage: $0 <route> <test_request_json> <validate>}"
+REQUEST="${2:?Usage: $0 <route> <test_request_json> <validate>}"
+VALIDATE="${3:?Usage: $0 <route> <test_request_json> <validate>}"
 PORT=8080
 
-echo "=== Testing vLLM-Omni EC2: ${MODEL_TYPE} ==="
+echo "=== vLLM-Omni EC2 smoke test ==="
+echo "Route: ${ROUTE}"
+echo "Validate: ${VALIDATE}"
 
-# Wait for server (entrypoint starts it)
-echo "Waiting for server..."
+# Wait for server
 for i in $(seq 1 300); do
     if curl -s http://localhost:${PORT}/health >/dev/null 2>&1; then
         echo "Server ready after ${i}s"
@@ -21,56 +24,33 @@ done
 
 curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1; }
 
-curl -sf http://localhost:${PORT}/v1/models | python3 -c "
-import sys, json
-data = json.load(sys.stdin)
-assert len(data['data']) > 0, 'No models listed'
-print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
-"
-
-if [ "${MODEL_TYPE}" = "tts" ]; then
-    curl -sf -X POST http://localhost:${PORT}/v1/audio/speech \
-      -H "Content-Type: application/json" \
-      -d '{
-        "input": "Hello, how are you?",
-        "voice": "vivian",
-        "language": "English"
-      }' --output /tmp/tts_output.wav
-    FILE_SIZE=$(stat -c%s /tmp/tts_output.wav 2>/dev/null || stat -f%z /tmp/tts_output.wav)
-    echo "TTS output file size: ${FILE_SIZE} bytes"
-    [ "${FILE_SIZE}" -gt 1000 ] || { echo "FAIL: TTS output too small"; exit 1; }
-    echo "TTS serving test PASSED"
-
-elif [ "${MODEL_TYPE}" = "diffusion" ]; then
-    RESPONSE=$(curl -sf http://localhost:${PORT}/v1/chat/completions \
-      -H "Content-Type: application/json" \
-      -d '{
-        "messages": [{"role": "user", "content": "a red apple on a white table"}],
-        "extra_body": {
-          "height": 512,
-          "width": 512,
-          "num_inference_steps": 4,
-          "guidance_scale": 3.5,
-          "seed": 42
-        }
-      }')
-    echo "${RESPONSE}" | python3 -c "
-import sys, json, base64
-data = json.load(sys.stdin)
-assert 'choices' in data, f'No choices in response: {str(data)[:200]}'
-content = data['choices'][0]['message']['content']
-if isinstance(content, list):
-    img_item = next(c for c in content if c.get('type') == 'image_url')
-    url = img_item['image_url']['url']
-else:
-    url = str(content)
-assert 'base64,' in url, 'No base64 image in response'
-img_b64 = url.split('base64,')[1]
-img_bytes = base64.b64decode(img_b64)
-print(f'Image generated: {len(img_bytes)} bytes')
-assert len(img_bytes) > 1000, f'Image too small: {len(img_bytes)} bytes'
-print('Diffusion serving test PASSED')
+# Send request directly to the API endpoint
+curl -sf -X POST "http://localhost:${PORT}${ROUTE}" \
+  -H "Content-Type: application/json" \
+  -d "${REQUEST}" \
+  --output /tmp/omni_response --max-time 300
+
+# Validate response
+if [[ "${VALIDATE}" == binary_size_gt:* ]]; then
+    MIN_SIZE="${VALIDATE#binary_size_gt:}"
+    FILE_SIZE=$(stat -c%s /tmp/omni_response 2>/dev/null || stat -f%z /tmp/omni_response)
+    echo "Response size: ${FILE_SIZE} bytes (min: ${MIN_SIZE})"
+    [ "${FILE_SIZE}" -gt "${MIN_SIZE}" ] || { echo "FAIL: response too small"; exit 1; }
+
+elif [[ "${VALIDATE}" == json_field:* ]]; then
+    FIELD="${VALIDATE#json_field:}"
+    python3 -c "
+import json, sys
+data = json.load(open('/tmp/omni_response'))
+obj = data
+for part in '${FIELD}'.replace(']','').replace('[','.').split('.'):
+    if part.isdigit():
+        obj = obj[int(part)]
+    else:
+        obj = obj[part]
+assert obj, 'Field ${FIELD} is empty'
+print(f'Validated: ${FIELD} present ({type(obj).__name__})')
 "
 fi
 
-echo "=== vLLM-Omni EC2 ${MODEL_TYPE} test PASSED ==="
+echo "=== vLLM-Omni EC2 smoke test PASSED ==="
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index c9c2997cd311..97130cf9e592 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -1,17 +1,19 @@
 #!/bin/bash
 # Smoke test for vLLM-Omni SageMaker images
-# The container is started with the real SageMaker entrypoint (including
-# the routing middleware). This script only waits for readiness and tests
-# inference via /invocations and /ping — the same path SageMaker uses.
+# Uses /invocations with the routing middleware (CustomAttributes: route=<path>)
+# Request payload and validation are passed as arguments from the model config.
 set -eux
 
-MODEL_TYPE="${1:?Usage: $0 <model-type>}"
+ROUTE="${1:?Usage: $0 <route> <test_request_json> <validate>}"
+REQUEST="${2:?Usage: $0 <route> <test_request_json> <validate>}"
+VALIDATE="${3:?Usage: $0 <route> <test_request_json> <validate>}"
 PORT=8080
 
-echo "=== Testing vLLM-Omni SageMaker: ${MODEL_TYPE} ==="
+echo "=== vLLM-Omni SageMaker smoke test ==="
+echo "Route: ${ROUTE}"
+echo "Validate: ${VALIDATE}"
 
-# Wait for server (entrypoint starts it)
-echo "Waiting for server..."
+# Wait for server
 for i in $(seq 1 300); do
     if curl -s http://localhost:${PORT}/ping >/dev/null 2>&1; then
         echo "Server ready after ${i}s"
@@ -21,59 +23,36 @@ for i in $(seq 1 300); do
 done
 
 curl -sf http://localhost:${PORT}/ping || { echo "Ping failed"; exit 1; }
-curl -sf http://localhost:${PORT}/health || { echo "Health failed"; exit 1; }
 
-curl -sf http://localhost:${PORT}/v1/models | python3 -c "
-import sys, json
-data = json.load(sys.stdin)
-assert len(data['data']) > 0, 'No models listed'
-print(f'Model loaded: {data[\"data\"][0][\"id\"]}')
-"
-
-if [ "${MODEL_TYPE}" = "tts" ]; then
-    curl -sf -X POST http://localhost:${PORT}/invocations \
-      -H "Content-Type: application/json" \
-      -H "X-Amzn-SageMaker-Custom-Attributes: route=/v1/audio/speech" \
-      -d '{
-        "input": "Hello, how are you?",
-        "voice": "vivian",
-        "language": "English"
-      }' --output /tmp/tts_output.wav
-    FILE_SIZE=$(stat -c%s /tmp/tts_output.wav 2>/dev/null || stat -f%z /tmp/tts_output.wav)
-    echo "TTS output file size: ${FILE_SIZE} bytes"
-    [ "${FILE_SIZE}" -gt 1000 ] || { echo "FAIL: TTS output too small"; exit 1; }
-    echo "TTS serving test PASSED"
-
-elif [ "${MODEL_TYPE}" = "diffusion" ]; then
-    RESPONSE=$(curl -sf http://localhost:${PORT}/invocations \
-      -H "Content-Type: application/json" \
-      -d '{
-        "messages": [{"role": "user", "content": "a red apple on a white table"}],
-        "extra_body": {
-          "height": 512,
-          "width": 512,
-          "num_inference_steps": 4,
-          "guidance_scale": 3.5,
-          "seed": 42
-        }
-      }')
-    echo "${RESPONSE}" | python3 -c "
-import sys, json, base64
-data = json.load(sys.stdin)
-assert 'choices' in data, f'No choices in response: {str(data)[:200]}'
-content = data['choices'][0]['message']['content']
-if isinstance(content, list):
-    img_item = next(c for c in content if c.get('type') == 'image_url')
-    url = img_item['image_url']['url']
-else:
-    url = str(content)
-assert 'base64,' in url, 'No base64 image in response'
-img_b64 = url.split('base64,')[1]
-img_bytes = base64.b64decode(img_b64)
-print(f'Image generated: {len(img_bytes)} bytes')
-assert len(img_bytes) > 1000, f'Image too small: {len(img_bytes)} bytes'
-print('Diffusion serving test PASSED')
+# Send request via /invocations with route header
+curl -sf -X POST http://localhost:${PORT}/invocations \
+  -H "Content-Type: application/json" \
+  -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}" \
+  -d "${REQUEST}" \
+  --output /tmp/omni_response --max-time 300
+
+# Validate response
+if [[ "${VALIDATE}" == binary_size_gt:* ]]; then
+    MIN_SIZE="${VALIDATE#binary_size_gt:}"
+    FILE_SIZE=$(stat -c%s /tmp/omni_response 2>/dev/null || stat -f%z /tmp/omni_response)
+    echo "Response size: ${FILE_SIZE} bytes (min: ${MIN_SIZE})"
+    [ "${FILE_SIZE}" -gt "${MIN_SIZE}" ] || { echo "FAIL: response too small"; exit 1; }
+
+elif [[ "${VALIDATE}" == json_field:* ]]; then
+    FIELD="${VALIDATE#json_field:}"
+    python3 -c "
+import json, sys
+data = json.load(open('/tmp/omni_response'))
+# Navigate nested field like data[0].b64_json
+obj = data
+for part in '${FIELD}'.replace(']','').replace('[','.').split('.'):
+    if part.isdigit():
+        obj = obj[int(part)]
+    else:
+        obj = obj[part]
+assert obj, 'Field ${FIELD} is empty'
+print(f'Validated: ${FIELD} present ({type(obj).__name__})')
 "
 fi
 
-echo "=== vLLM-Omni SageMaker ${MODEL_TYPE} test PASSED ==="
+echo "=== vLLM-Omni SageMaker smoke test PASSED ==="

From 0fc2d3b4bd90dbf1573aacfa9c2e4d4fcc21e3e4 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:07:09 -0700
Subject: [PATCH 30/58] fix: increase SageMaker invoke timeout to 300s for TTS
 cold start

---
 test/vllm-omni/sagemaker/test_sm_omni_endpoint.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 535e44f5b7a1..73f213035563 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -114,6 +114,7 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
         ContentType="application/json",
         Body=payload,
         CustomAttributes="route=/v1/audio/speech",
+        InvocationTimeoutSeconds=300,
     )
 
     audio_bytes = response["Body"].read()

From b48b7a7b6eb4adb8c3812579c77bea28ec230aae Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:29:14 -0700
Subject: [PATCH 31/58] fix: retry invoke on timeout instead of unsupported
 InvocationTimeoutSeconds

---
 .../sagemaker/test_sm_omni_endpoint.py        | 21 ++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 73f213035563..089060bb652c 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -109,13 +109,20 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
     )
 
     LOGGER.info("Sending TTS request via /invocations with route=/v1/audio/speech")
-    response = sm_runtime.invoke_endpoint(
-        EndpointName=predictor.endpoint_name,
-        ContentType="application/json",
-        Body=payload,
-        CustomAttributes="route=/v1/audio/speech",
-        InvocationTimeoutSeconds=300,
-    )
+    # First request may be slow due to model warmup; retry on timeout
+    for attempt in range(3):
+        try:
+            response = sm_runtime.invoke_endpoint(
+                EndpointName=predictor.endpoint_name,
+                ContentType="application/json",
+                Body=payload,
+                CustomAttributes="route=/v1/audio/speech",
+            )
+            break
+        except Exception as e:
+            LOGGER.warning(f"Attempt {attempt + 1} failed: {e}")
+            if attempt == 2:
+                raise
 
     audio_bytes = response["Body"].read()
     LOGGER.info(f"TTS audio response: {len(audio_bytes)} bytes")

From 85772d6e2f4cf4234e0a70ec5f494800519d909d Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:35:21 -0700
Subject: [PATCH 32/58] fix: add --port 8080 to EC2 container start (vllm
 defaults to 8000)

---
 .github/workflows/reusable-vllm-omni-model-tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 6c30807c978b..a027a20acb47 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -86,6 +86,7 @@ jobs:
             -p 8080:8080 \
             ${{ inputs.image-uri }} \
             --model /models/${{ matrix.model.name }} \
+            --port 8080 \
             --stage-init-timeout 600)
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
 

From 793b823021e7d3755a861b363092a14e8fbe11a1 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:39:22 -0700
Subject: [PATCH 33/58] ci: re-trigger after pre-commit fix


From 7d8e128c21453cab04ea1d3508d4caa09df77ae3 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:46:57 -0700
Subject: [PATCH 34/58] fix format

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .../sagemaker-xgboost-integ-tests.yml         |  32 +++---
 test/xgboost/README.md                        |  67 +++++------
 test/xgboost/e2e/conftest.py                  |  20 +++-
 test/xgboost/e2e/test_e2e.py                  |  48 +++++---
 test/xgboost/e2e/test_e2e_selectable.py       |  47 +++++---
 test/xgboost/e2e/test_hpo.py                  | 104 +++++++++++++-----
 test/xgboost/e2e/test_inference.py            |  34 ++++--
 test/xgboost/e2e/test_inference_mme.py        |  17 ++-
 test/xgboost/e2e/test_network_isolation.py    |  20 +++-
 test/xgboost/e2e/test_script_mode_e2e.py      |  23 ++--
 test/xgboost/e2e/test_training_csv.py         |  84 +++++++++-----
 test/xgboost/e2e/test_training_libsvm.py      |  62 +++++++----
 test/xgboost/e2e/test_training_pb.py          |  53 ++++++---
 test/xgboost/e2e/test_training_pq.py          |  66 +++++++----
 test/xgboost/e2e/test_transform.py            |  17 ++-
 15 files changed, 470 insertions(+), 224 deletions(-)

diff --git a/.github/workflows/sagemaker-xgboost-integ-tests.yml b/.github/workflows/sagemaker-xgboost-integ-tests.yml
index 0202437930f7..cd074ac25869 100644
--- a/.github/workflows/sagemaker-xgboost-integ-tests.yml
+++ b/.github/workflows/sagemaker-xgboost-integ-tests.yml
@@ -35,13 +35,13 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
-  
+
       - name: Install dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install xgboost==3.0.5 boto3 numpy
-  
+
       - name: Generate and upload models
         run: |
           source .venv/bin/activate
@@ -56,23 +56,23 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
-  
+
       - name: ECR login
         uses: ./.github/actions/ecr-authenticate
         with:
           aws-account-id: ${{ inputs.aws-account-id }}
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
-  
+
       - name: Pull image
         run: docker pull ${{ inputs.image-uri }}
-  
+
       - name: Install test dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install -r test/requirements.txt docker pytest boto3 requests
-  
+
       - name: Run training container tests
         run: |
           source .venv/bin/activate
@@ -91,23 +91,23 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
-  
+
       - name: ECR login
         uses: ./.github/actions/ecr-authenticate
         with:
           aws-account-id: ${{ inputs.aws-account-id }}
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
-  
+
       - name: Pull image
         run: docker pull ${{ inputs.image-uri }}
-  
+
       - name: Install test dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install -r test/requirements.txt docker pytest boto3 requests
-  
+
       - name: Run scoring container tests
         run: |
           source .venv/bin/activate
@@ -126,23 +126,23 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
-  
+
       - name: ECR login
         uses: ./.github/actions/ecr-authenticate
         with:
           aws-account-id: ${{ inputs.aws-account-id }}
           aws-region: ${{ inputs.aws-region }}
           image-uri: ${{ inputs.image-uri }}
-  
+
       - name: Pull image
         run: docker pull ${{ inputs.image-uri }}
-  
+
       - name: Install test dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install -r test/requirements.txt docker pytest boto3 requests
-  
+
       - name: Run batch transform container tests
         run: |
           source .venv/bin/activate
@@ -219,14 +219,14 @@ jobs:
     steps:
       - name: Checkout DLC source
         uses: actions/checkout@v5
-  
+
       - name: Install test dependencies
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
           uv pip install -r test/requirements.txt
           uv pip install -r test/xgboost/requirements.txt
-  
+
       - name: Run ${{ matrix.test-module }}
         run: |
           source .venv/bin/activate
diff --git a/test/xgboost/README.md b/test/xgboost/README.md
index e3a256ec9172..51e7f1fc7b04 100644
--- a/test/xgboost/README.md
+++ b/test/xgboost/README.md
@@ -17,13 +17,14 @@ test/xgboost/
 Runs the XGBoost container locally via docker-py. The container is mounted with
 `/opt/ml/` directory structures and exercised directly — no SageMaker jobs are created.
 
-| File | What it tests |
-|------|---------------|
-| `test_training.py` | Algorithm-mode training: libsvm/csv, single/multi-file, weights, HPO metrics, objectives, verbosity, checkpoint/reload, distributed, invalid hyperparameters |
-| `test_scoring.py` | Inference: csv/libsvm/protobuf payloads, execution parameters, 20 MB payload, content type validation |
-| `test_batch_transform.py` | Batch transform with `SAGEMAKER_BATCH=True` |
+| File                      | What it tests                                                                                                                                                |
+| ------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
+| `test_training.py`        | Algorithm-mode training: libsvm/csv, single/multi-file, weights, HPO metrics, objectives, verbosity, checkpoint/reload, distributed, invalid hyperparameters |
+| `test_scoring.py`         | Inference: csv/libsvm/protobuf payloads, execution parameters, 20 MB payload, content type validation                                                        |
+| `test_batch_transform.py` | Batch transform with `SAGEMAKER_BATCH=True`                                                                                                                  |
 
 Supporting files:
+
 - `container_helper.py` — `run_training()` and `ServingContainer` context manager
 - `generate_models.py` — generates XGBoost 3.0.5-compatible inference models
 
@@ -32,42 +33,42 @@ Supporting files:
 Launches real SageMaker training jobs, endpoints, and batch transform jobs using the
 SageMaker Python SDK. Validates the container works end-to-end on SageMaker infrastructure.
 
-| File | What it tests |
-|------|---------------|
-| `test_training_libsvm.py` | Single/distributed/checkpoint/GPU training with libsvm data |
-| `test_training_csv.py` | Single/distributed/pipe-mode/Dask GPU training with CSV data |
-| `test_training_pb.py` | Single/distributed/pipe-mode/sparse training with protobuf data |
-| `test_training_pq.py` | Single/distributed/pipe-mode/Dask GPU training with parquet data |
-| `test_e2e.py` | Train → deploy → invoke (CPU + GPU), Dask GPU training |
-| `test_e2e_selectable.py` | Multiclass train → inference with CSV/JSON/JSONLINES accept types |
-| `test_inference.py` | Train a model → deploy → invoke with libsvm/csv |
-| `test_inference_mme.py` | Multi-model endpoint inference |
-| `test_transform.py` | Train a model → batch transform with libsvm input |
-| `test_hpo.py` | Hyperparameter tuning: rmse, aucpr, GPU |
-| `test_script_mode_e2e.py` | Script-mode train → deploy → invoke |
-| `test_network_isolation.py` | Algo-mode training with network isolation |
+| File                        | What it tests                                                     |
+| --------------------------- | ----------------------------------------------------------------- |
+| `test_training_libsvm.py`   | Single/distributed/checkpoint/GPU training with libsvm data       |
+| `test_training_csv.py`      | Single/distributed/pipe-mode/Dask GPU training with CSV data      |
+| `test_training_pb.py`       | Single/distributed/pipe-mode/sparse training with protobuf data   |
+| `test_training_pq.py`       | Single/distributed/pipe-mode/Dask GPU training with parquet data  |
+| `test_e2e.py`               | Train → deploy → invoke (CPU + GPU), Dask GPU training            |
+| `test_e2e_selectable.py`    | Multiclass train → inference with CSV/JSON/JSONLINES accept types |
+| `test_inference.py`         | Train a model → deploy → invoke with libsvm/csv                   |
+| `test_inference_mme.py`     | Multi-model endpoint inference                                    |
+| `test_transform.py`         | Train a model → batch transform with libsvm input                 |
+| `test_hpo.py`               | Hyperparameter tuning: rmse, aucpr, GPU                           |
+| `test_script_mode_e2e.py`   | Script-mode train → deploy → invoke                               |
+| `test_network_isolation.py` | Algo-mode training with network isolation                         |
 
 ### Tier 3: Benchmark Tests (`benchmarks/`)
 
 SageMaker training jobs that measure performance across different configurations.
 
-| File | What it tests |
-|------|---------------|
-| `test_training_objective.py` | reg:squarederror, binary:logistic, multi:softmax |
-| `test_training_tree_method.py` | exact, approx, hist, gpu_hist |
-| `test_training_max_depth.py` | Depth 2/5/8/12 |
-| `test_training_num_round.py` | 10/50/100/200 rounds |
-| `test_training_data_size.py` | 10k/100k/500k rows |
-| `test_training_instance_type.py` | m5.large/xlarge/2xlarge, g4dn.xlarge |
-| `test_training_content_type.py` | libsvm, csv, protobuf |
+| File                             | What it tests                                    |
+| -------------------------------- | ------------------------------------------------ |
+| `test_training_objective.py`     | reg:squarederror, binary:logistic, multi:softmax |
+| `test_training_tree_method.py`   | exact, approx, hist, gpu_hist                    |
+| `test_training_max_depth.py`     | Depth 2/5/8/12                                   |
+| `test_training_num_round.py`     | 10/50/100/200 rounds                             |
+| `test_training_data_size.py`     | 10k/100k/500k rows                               |
+| `test_training_instance_type.py` | m5.large/xlarge/2xlarge, g4dn.xlarge             |
+| `test_training_content_type.py`  | libsvm, csv, protobuf                            |
 
 ## CI Workflows
 
-| Workflow | Trigger | What runs |
-|----------|---------|-----------|
-| `pr-sagemaker-xgboost.yml` | PR to `main` touching `docker/xgboost/**` | Build → unit tests → security → upstream integration |
-| `release-sagemaker-xgboost.yml` | `workflow_dispatch` / push | Build → unit tests → security → `sagemaker-xgboost-integ-tests.yml` |
-| `sagemaker-xgboost-integ-tests.yml` | Called by release workflow | Container tests → E2E tests → benchmarks |
+| Workflow                            | Trigger                                   | What runs                                                           |
+| ----------------------------------- | ----------------------------------------- | ------------------------------------------------------------------- |
+| `pr-sagemaker-xgboost.yml`          | PR to `main` touching `docker/xgboost/**` | Build → unit tests → security → upstream integration                |
+| `release-sagemaker-xgboost.yml`     | `workflow_dispatch` / push                | Build → unit tests → security → `sagemaker-xgboost-integ-tests.yml` |
+| `sagemaker-xgboost-integ-tests.yml` | Called by release workflow                | Container tests → E2E tests → benchmarks                            |
 
 ### Release build flow
 
diff --git a/test/xgboost/e2e/conftest.py b/test/xgboost/e2e/conftest.py
index 2dea7126ae12..57589d8e8c88 100644
--- a/test/xgboost/e2e/conftest.py
+++ b/test/xgboost/e2e/conftest.py
@@ -128,13 +128,18 @@ def run_training_job(
     duration = time.time() - start
 
     desc = sm.describe_training_job(TrainingJobName=job_name)
-    LOGGER.info(f"Job {job_name} completed in {duration:.0f}s — status: {desc['TrainingJobStatus']}")
+    LOGGER.info(
+        f"Job {job_name} completed in {duration:.0f}s — status: {desc['TrainingJobStatus']}"
+    )
     return job_name, duration, desc
 
 
-def deploy_endpoint(image_uri, role, model_data, test_name="ep", instance_type="ml.m5.xlarge", env=None):
+def deploy_endpoint(
+    image_uri, role, model_data, test_name="ep", instance_type="ml.m5.xlarge", env=None
+):
     """Deploy a real-time endpoint and return (predictor, endpoint_name, model_name)."""
     from sagemaker.predictor import Predictor
+
     endpoint_name = random_suffix_name(f"xgb-{test_name}", 32)
     model = Model(
         image_uri=image_uri,
@@ -189,8 +194,15 @@ def delete_endpoint(endpoint_name):
 
 
 def run_batch_transform(
-    image_uri, role, model_data, input_s3_uri, content_type,
-    test_name="bt", instance_type="ml.m5.xlarge", split_type="Line", accept="text/csv",
+    image_uri,
+    role,
+    model_data,
+    input_s3_uri,
+    content_type,
+    test_name="bt",
+    instance_type="ml.m5.xlarge",
+    split_type="Line",
+    accept="text/csv",
     env=None,
 ):
     """Run a batch transform job and return the job description."""
diff --git a/test/xgboost/e2e/test_e2e.py b/test/xgboost/e2e/test_e2e.py
index d5b0ac7742df..522c9359824d 100644
--- a/test/xgboost/e2e/test_e2e.py
+++ b/test/xgboost/e2e/test_e2e.py
@@ -26,9 +26,13 @@
 def trained_model(image_uri, role):
     """Train a CPU model once for all e2e tests in this module."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=E2E_HP,
-        train_s3_key="train", validation_s3_key="test",
-        content_type="text/libsvm", test_name="e2e-train",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=E2E_HP,
+        train_s3_key="train",
+        validation_s3_key="test",
+        content_type="text/libsvm",
+        test_name="e2e-train",
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -39,9 +43,13 @@ def gpu_trained_model(image_uri, role):
     """Train a GPU model once for GPU e2e tests."""
     hp = {**E2E_HP, "tree_method": "gpu_hist"}
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=hp,
-        train_s3_key="train", validation_s3_key="test",
-        content_type="text/libsvm", test_name="e2e-gpu-train",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=hp,
+        train_s3_key="train",
+        validation_s3_key="test",
+        content_type="text/libsvm",
+        test_name="e2e-gpu-train",
         instance_type="ml.g4dn.2xlarge",
     )
     assert desc["TrainingJobStatus"] == "Completed"
@@ -53,8 +61,10 @@ def test_train_and_deploy(self, image_uri, role, trained_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=trained_model, test_name="e2e-infer",
+                image_uri=image_uri,
+                role=role,
+                model_data=trained_model,
+                test_name="e2e-infer",
             )
             predictor.content_type = "text/libsvm"
             predictor.accept = "text/csv"
@@ -69,8 +79,10 @@ def test_gpu_train_and_deploy(self, image_uri, role, gpu_trained_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=gpu_trained_model, test_name="e2e-gpu-inf",
+                image_uri=image_uri,
+                role=role,
+                model_data=gpu_trained_model,
+                test_name="e2e-gpu-inf",
                 instance_type="ml.g4dn.2xlarge",
             )
             predictor.content_type = "text/libsvm"
@@ -88,9 +100,13 @@ def test_dask_gpu_train(self, image_uri, role):
             "use_dask_gpu_training": "true",
         }
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="e2e-dask",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="e2e-dask",
             instance_type="ml.g4dn.2xlarge",
             train_distribution="FullyReplicated",
         )
@@ -100,8 +116,10 @@ def test_multi_model_inference(self, image_uri, role, trained_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=trained_model, test_name="e2e-mme",
+                image_uri=image_uri,
+                role=role,
+                model_data=trained_model,
+                test_name="e2e-mme",
             )
             predictor.content_type = "text/libsvm"
             predictor.accept = "text/csv"
diff --git a/test/xgboost/e2e/test_e2e_selectable.py b/test/xgboost/e2e/test_e2e_selectable.py
index 79d55a6da189..4c036ed86558 100644
--- a/test/xgboost/e2e/test_e2e_selectable.py
+++ b/test/xgboost/e2e/test_e2e_selectable.py
@@ -4,6 +4,7 @@
 """
 
 import json
+
 import pytest
 
 from .conftest import delete_endpoint, deploy_endpoint, run_training_job
@@ -26,9 +27,13 @@
 def selectable_model(image_uri, role):
     """Train a multiclass model on iris dataset."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=SELECTABLE_HP,
-        train_s3_key="iris/train", validation_s3_key="iris/test",
-        content_type="text/csv", test_name="select-train",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=SELECTABLE_HP,
+        train_s3_key="iris/train",
+        validation_s3_key="iris/test",
+        content_type="text/csv",
+        test_name="select-train",
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -39,8 +44,10 @@ def test_csv_accept(self, image_uri, role, selectable_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=selectable_model, test_name="select-csv",
+                image_uri=image_uri,
+                role=role,
+                model_data=selectable_model,
+                test_name="select-csv",
                 env={"SAGEMAKER_INFERENCE_OUTPUT": "predicted_label,labels"},
             )
             predictor.content_type = "text/csv"
@@ -55,8 +62,10 @@ def test_json_accept(self, image_uri, role, selectable_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=selectable_model, test_name="select-json",
+                image_uri=image_uri,
+                role=role,
+                model_data=selectable_model,
+                test_name="select-json",
                 env={"SAGEMAKER_INFERENCE_OUTPUT": "labels,probabilities"},
             )
             predictor.content_type = "text/csv"
@@ -73,14 +82,20 @@ def test_jsonlines_accept(self, image_uri, role, selectable_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=selectable_model, test_name="select-jl",
+                image_uri=image_uri,
+                role=role,
+                model_data=selectable_model,
+                test_name="select-jl",
                 env={"SAGEMAKER_INFERENCE_OUTPUT": "predicted_label,probability"},
             )
             predictor.content_type = "text/csv"
             predictor.accept = "application/jsonlines"
             response = predictor.predict(INFERENCE_PAYLOAD)
-            lines = response.decode().strip().splitlines() if isinstance(response, bytes) else response.strip().splitlines()
+            lines = (
+                response.decode().strip().splitlines()
+                if isinstance(response, bytes)
+                else response.strip().splitlines()
+            )
             assert len(lines) == 3
             for line in lines:
                 parsed = json.loads(line)
@@ -93,9 +108,13 @@ def test_csv_nans_misconfigured_keys(self, image_uri, role, selectable_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=selectable_model, test_name="select-nan",
-                env={"SAGEMAKER_INFERENCE_OUTPUT": "foo,predicted_label,predicted_score,porbabilitise"},
+                image_uri=image_uri,
+                role=role,
+                model_data=selectable_model,
+                test_name="select-nan",
+                env={
+                    "SAGEMAKER_INFERENCE_OUTPUT": "foo,predicted_label,predicted_score,porbabilitise"
+                },
             )
             predictor.content_type = "text/csv"
             predictor.accept = "text/csv"
@@ -104,5 +123,3 @@ def test_csv_nans_misconfigured_keys(self, image_uri, role, selectable_model):
         finally:
             if endpoint_name:
                 delete_endpoint(endpoint_name)
-
-
diff --git a/test/xgboost/e2e/test_hpo.py b/test/xgboost/e2e/test_hpo.py
index 91da69fc337c..5c7aaa414f77 100644
--- a/test/xgboost/e2e/test_hpo.py
+++ b/test/xgboost/e2e/test_hpo.py
@@ -4,28 +4,42 @@
 """
 
 import boto3
-from sagemaker.tuner import HyperparameterTuner, ContinuousParameter, IntegerParameter
 from sagemaker.estimator import Estimator
 from sagemaker.inputs import TrainingInput
+from sagemaker.tuner import ContinuousParameter, HyperparameterTuner, IntegerParameter
 from test_utils import random_suffix_name
 
-from .conftest import data_uri, E2E_TEST_BUCKET, s3_uri
+from .conftest import E2E_TEST_BUCKET, data_uri, s3_uri
 
 RMSE_METRIC = [{"Name": "validation:rmse", "Regex": r"\[.*\].*#011validation-rmse:([\d.]+)"}]
 AUCPR_METRIC = [{"Name": "validation:aucpr", "Regex": r"\[.*\].*#011validation-aucpr:([\d.]+)"}]
 
 
-def _run_hpo(image_uri, role, hp, train_key, val_key, content_type,
-             objective_name, objective_type, metric_defs, test_name,
-             instance_type="ml.m5.xlarge"):
+def _run_hpo(
+    image_uri,
+    role,
+    hp,
+    train_key,
+    val_key,
+    content_type,
+    objective_name,
+    objective_type,
+    metric_defs,
+    test_name,
+    instance_type="ml.m5.xlarge",
+):
     job_name = random_suffix_name(f"xgb-{test_name}", 32)
     output_path = s3_uri(E2E_TEST_BUCKET, f"e2e-output/{job_name}")
 
     estimator = Estimator(
-        image_uri=image_uri, role=role,
-        instance_count=1, instance_type=instance_type,
-        output_path=output_path, hyperparameters=hp,
-        volume_size=10, max_run=2700,
+        image_uri=image_uri,
+        role=role,
+        instance_count=1,
+        instance_type=instance_type,
+        output_path=output_path,
+        hyperparameters=hp,
+        volume_size=10,
+        max_run=2700,
         metric_definitions=metric_defs,
     )
 
@@ -37,14 +51,16 @@ def _run_hpo(image_uri, role, hp, train_key, val_key, content_type,
             "num_round": IntegerParameter(5, 20),
             "eta": ContinuousParameter(0.1, 0.5),
         },
-        max_jobs=4, max_parallel_jobs=2,
+        max_jobs=4,
+        max_parallel_jobs=2,
         metric_definitions=metric_defs,
     )
 
     channels = {
         "train": TrainingInput(s3_data=data_uri(train_key), content_type=content_type),
-        "validation": TrainingInput(s3_data=data_uri(val_key), content_type=content_type,
-                                    distribution="FullyReplicated"),
+        "validation": TrainingInput(
+            s3_data=data_uri(val_key), content_type=content_type, distribution="FullyReplicated"
+        ),
     }
 
     tuner.fit(channels, job_name=job_name)
@@ -67,26 +83,62 @@ def _run_hpo(image_uri, role, hp, train_key, val_key, content_type,
 
 class TestHPO:
     def test_tuning_rmse(self, image_uri, role):
-        _run_hpo(image_uri, role, BASE_HP,
-                 "train", "test", "text/libsvm",
-                 "validation:rmse", "Minimize", RMSE_METRIC, "hpo-rmse")
+        _run_hpo(
+            image_uri,
+            role,
+            BASE_HP,
+            "train",
+            "test",
+            "text/libsvm",
+            "validation:rmse",
+            "Minimize",
+            RMSE_METRIC,
+            "hpo-rmse",
+        )
 
     def test_tuning_aucpr(self, image_uri, role):
         hp = {**BASE_HP, "objective": "binary:hinge"}
-        _run_hpo(image_uri, role, hp,
-                 "csv/binary_train", "csv/binary_train", "text/csv",
-                 "validation:aucpr", "Maximize", AUCPR_METRIC, "hpo-aucpr")
+        _run_hpo(
+            image_uri,
+            role,
+            hp,
+            "csv/binary_train",
+            "csv/binary_train",
+            "text/csv",
+            "validation:aucpr",
+            "Maximize",
+            AUCPR_METRIC,
+            "hpo-aucpr",
+        )
 
     def test_gpu_tuning_rmse(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist"}
-        _run_hpo(image_uri, role, hp,
-                 "train", "test", "text/libsvm",
-                 "validation:rmse", "Minimize", RMSE_METRIC, "hpo-gpu",
-                 instance_type="ml.g4dn.2xlarge")
+        _run_hpo(
+            image_uri,
+            role,
+            hp,
+            "train",
+            "test",
+            "text/libsvm",
+            "validation:rmse",
+            "Minimize",
+            RMSE_METRIC,
+            "hpo-gpu",
+            instance_type="ml.g4dn.2xlarge",
+        )
 
     def test_gpu_tuning_aucpr(self, image_uri, role):
         hp = {**BASE_HP, "objective": "binary:hinge", "tree_method": "gpu_hist"}
-        _run_hpo(image_uri, role, hp,
-                 "csv/binary_train", "csv/binary_train", "text/csv",
-                 "validation:aucpr", "Maximize", AUCPR_METRIC, "hpo-gpu-auc",
-                 instance_type="ml.g4dn.2xlarge")
+        _run_hpo(
+            image_uri,
+            role,
+            hp,
+            "csv/binary_train",
+            "csv/binary_train",
+            "text/csv",
+            "validation:aucpr",
+            "Maximize",
+            AUCPR_METRIC,
+            "hpo-gpu-auc",
+            instance_type="ml.g4dn.2xlarge",
+        )
diff --git a/test/xgboost/e2e/test_inference.py b/test/xgboost/e2e/test_inference.py
index 593c412da99a..27d57f69c03e 100644
--- a/test/xgboost/e2e/test_inference.py
+++ b/test/xgboost/e2e/test_inference.py
@@ -23,9 +23,13 @@
 def model_data(image_uri, role):
     """Train a model once for all inference tests."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=TRAIN_HP,
-        train_s3_key="train", validation_s3_key="test",
-        content_type="text/libsvm", test_name="infer-model",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=TRAIN_HP,
+        train_s3_key="train",
+        validation_s3_key="test",
+        content_type="text/libsvm",
+        test_name="infer-model",
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -36,8 +40,10 @@ def test_libsvm_inference(self, image_uri, role, model_data):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=model_data, test_name="infer-libsvm",
+                image_uri=image_uri,
+                role=role,
+                model_data=model_data,
+                test_name="infer-libsvm",
             )
             predictor.content_type = "text/libsvm"
             predictor.accept = "text/csv"
@@ -51,8 +57,10 @@ def test_csv_inference(self, image_uri, role, model_data):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=model_data, test_name="infer-csv",
+                image_uri=image_uri,
+                role=role,
+                model_data=model_data,
+                test_name="infer-csv",
             )
             predictor.content_type = "text/csv"
             predictor.accept = "text/csv"
@@ -67,8 +75,10 @@ def test_protobuf_inference(self, image_uri, role, model_data):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=model_data, test_name="infer-pb",
+                image_uri=image_uri,
+                role=role,
+                model_data=model_data,
+                test_name="infer-pb",
             )
             predictor.content_type = "application/x-recordio-protobuf"
             predictor.accept = "text/csv"
@@ -86,8 +96,10 @@ def test_libsvm_multimodel(self, image_uri, role, model_data):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=model_data, test_name="infer-mme-lib",
+                image_uri=image_uri,
+                role=role,
+                model_data=model_data,
+                test_name="infer-mme-lib",
             )
             predictor.content_type = "text/libsvm"
             predictor.accept = "text/csv"
diff --git a/test/xgboost/e2e/test_inference_mme.py b/test/xgboost/e2e/test_inference_mme.py
index b376912ccb29..ad5a492d7887 100644
--- a/test/xgboost/e2e/test_inference_mme.py
+++ b/test/xgboost/e2e/test_inference_mme.py
@@ -7,7 +7,6 @@
 
 from .conftest import delete_endpoint, deploy_endpoint, run_training_job
 
-
 TRAIN_HP = {
     "max_depth": "3",
     "num_round": "50",
@@ -24,9 +23,13 @@
 def mme_model(image_uri, role):
     """Train an iris model for MME tests."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=TRAIN_HP,
-        train_s3_key="iris/train", validation_s3_key="iris/test",
-        content_type="text/csv", test_name="mme-train",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=TRAIN_HP,
+        train_s3_key="iris/train",
+        validation_s3_key="iris/test",
+        content_type="text/csv",
+        test_name="mme-train",
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -37,8 +40,10 @@ def test_csv_multimodel(self, image_uri, role, mme_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=mme_model, test_name="mme-csv",
+                image_uri=image_uri,
+                role=role,
+                model_data=mme_model,
+                test_name="mme-csv",
             )
             predictor.content_type = "text/csv"
             predictor.accept = "text/csv"
diff --git a/test/xgboost/e2e/test_network_isolation.py b/test/xgboost/e2e/test_network_isolation.py
index fbf57b19e322..be389a2c489f 100644
--- a/test/xgboost/e2e/test_network_isolation.py
+++ b/test/xgboost/e2e/test_network_isolation.py
@@ -20,9 +20,13 @@
 class TestNetworkIsolation:
     def test_algo_mode(self, image_uri, role):
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="netiso-algo",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="netiso-algo",
             enable_network_isolation=True,
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -34,11 +38,15 @@ def test_script_mode(self, image_uri, role):
             "sagemaker_submit_directory": "/opt/ml/input/data/code/abalone.1.2-1.tar.gz",
         }
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
             train_s3_key="script_mode/data/train",
             validation_s3_key="script_mode/data/validation",
-            content_type="text/libsvm", test_name="netiso-script",
-            instance_count=2, enable_network_isolation=True,
+            content_type="text/libsvm",
+            test_name="netiso-script",
+            instance_count=2,
+            enable_network_isolation=True,
             extra_channels={
                 "code": data_uri("script_mode/code/abalone.1.2-1.tar.gz"),
             },
diff --git a/test/xgboost/e2e/test_script_mode_e2e.py b/test/xgboost/e2e/test_script_mode_e2e.py
index 9db1ff81ec5c..fe7c17ede196 100644
--- a/test/xgboost/e2e/test_script_mode_e2e.py
+++ b/test/xgboost/e2e/test_script_mode_e2e.py
@@ -27,11 +27,16 @@
 def script_mode_model(image_uri, role):
     """Train a script-mode model once for all tests in this module."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=SCRIPT_HP,
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=SCRIPT_HP,
         train_s3_key="script_mode/data/train",
         validation_s3_key="script_mode/data/validation",
-        content_type="text/libsvm", test_name="script-train",
-        instance_count=2, volume_size=20, max_run=3600,
+        content_type="text/libsvm",
+        test_name="script-train",
+        instance_count=2,
+        volume_size=20,
+        max_run=3600,
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -42,8 +47,10 @@ def test_inference_single_model(self, image_uri, role, script_mode_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=script_mode_model, test_name="script-infer",
+                image_uri=image_uri,
+                role=role,
+                model_data=script_mode_model,
+                test_name="script-infer",
                 env={
                     "SAGEMAKER_PROGRAM": "abalone.py",
                     "SAGEMAKER_SUBMIT_DIRECTORY": SCRIPT_CODE_S3,
@@ -63,8 +70,10 @@ def test_inference_multi_model(self, image_uri, role, script_mode_model):
         endpoint_name = None
         try:
             predictor, endpoint_name = deploy_endpoint(
-                image_uri=image_uri, role=role,
-                model_data=script_mode_model, test_name="script-mme",
+                image_uri=image_uri,
+                role=role,
+                model_data=script_mode_model,
+                test_name="script-mme",
                 env={
                     "SAGEMAKER_PROGRAM": "abalone.py",
                     "SAGEMAKER_SUBMIT_DIRECTORY": SCRIPT_CODE_S3,
diff --git a/test/xgboost/e2e/test_training_csv.py b/test/xgboost/e2e/test_training_csv.py
index 368439bb031e..bacf92c418a8 100644
--- a/test/xgboost/e2e/test_training_csv.py
+++ b/test/xgboost/e2e/test_training_csv.py
@@ -20,9 +20,13 @@
 class TestTrainingCsv:
     def test_single_instance(self, image_uri, role):
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-single",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-single",
         )
         assert desc["TrainingJobStatus"] == "Completed"
         assert 1 <= duration <= 1800
@@ -30,18 +34,26 @@ def test_single_instance(self, image_uri, role):
     def test_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-dist",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-dist",
             instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_pipe_mode_single_instance(self, image_uri, role):
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-pipe",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-pipe",
             input_mode="Pipe",
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -49,19 +61,28 @@ def test_pipe_mode_single_instance(self, image_uri, role):
     def test_pipe_mode_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-pipe-dist",
-            input_mode="Pipe", instance_count=2,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-pipe-dist",
+            input_mode="Pipe",
+            instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_dask_gpu_single(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist", "use_dask_gpu_training": "true"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-dask-gpu",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-dask-gpu",
             instance_type="ml.g4dn.2xlarge",
             train_distribution="FullyReplicated",
         )
@@ -70,21 +91,34 @@ def test_dask_gpu_single(self, image_uri, role):
     def test_dask_gpu_multi_instance(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist", "use_dask_gpu_training": "true"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="csv/train", validation_s3_key="csv/test",
-            content_type="text/csv", test_name="csv-dask-2x",
-            instance_type="ml.g4dn.2xlarge", instance_count=2,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="csv/train",
+            validation_s3_key="csv/test",
+            content_type="text/csv",
+            test_name="csv-dask-2x",
+            instance_type="ml.g4dn.2xlarge",
+            instance_count=2,
             train_distribution="FullyReplicated",
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_dask_gpu_binary_class(self, image_uri, role):
-        hp = {**BASE_HP, "tree_method": "gpu_hist", "use_dask_gpu_training": "true",
-              "objective": "binary:logistic"}
+        hp = {
+            **BASE_HP,
+            "tree_method": "gpu_hist",
+            "use_dask_gpu_training": "true",
+            "objective": "binary:logistic",
+        }
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="csv/binary/train", validation_s3_key="csv/binary/test",
-            content_type="text/csv", test_name="csv-dask-bin",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="csv/binary/train",
+            validation_s3_key="csv/binary/test",
+            content_type="text/csv",
+            test_name="csv-dask-bin",
             instance_type="ml.g4dn.2xlarge",
             train_distribution="FullyReplicated",
         )
diff --git a/test/xgboost/e2e/test_training_libsvm.py b/test/xgboost/e2e/test_training_libsvm.py
index 05ef18c300e1..3f311194cfc4 100644
--- a/test/xgboost/e2e/test_training_libsvm.py
+++ b/test/xgboost/e2e/test_training_libsvm.py
@@ -3,7 +3,7 @@
 Migrated from SMFrameworksXGBoost3_0-5Tests/src/integration_tests/test_training_libsvm.py
 """
 
-from .conftest import run_training_job, E2E_TEST_BUCKET, s3_uri
+from .conftest import E2E_TEST_BUCKET, run_training_job, s3_uri
 
 BASE_HP = {
     "max_depth": "5",
@@ -24,9 +24,13 @@ def _checkpoint_uri(name):
 class TestTrainingLibsvm:
     def test_single_instance(self, image_uri, role):
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-single",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-single",
         )
         assert desc["TrainingJobStatus"] == "Completed"
         assert 1 <= duration <= 1800
@@ -34,18 +38,26 @@ def test_single_instance(self, image_uri, role):
     def test_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-dist",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-dist",
             instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_checkpoint_single_instance(self, image_uri, role):
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-ckpt",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-ckpt",
             checkpoint_s3_uri=_checkpoint_uri("libsvm-ckpt"),
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -53,9 +65,13 @@ def test_checkpoint_single_instance(self, image_uri, role):
     def test_checkpoint_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-ckpt-d",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-ckpt-d",
             instance_count=2,
             checkpoint_s3_uri=_checkpoint_uri("libsvm-ckpt-dist"),
         )
@@ -64,9 +80,13 @@ def test_checkpoint_distributed(self, image_uri, role):
     def test_gpu_single_instance(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-gpu",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-gpu",
             instance_type="ml.g4dn.2xlarge",
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -74,9 +94,13 @@ def test_gpu_single_instance(self, image_uri, role):
     def test_gpu_checkpoint(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="train", validation_s3_key="test",
-            content_type="text/libsvm", test_name="libsvm-gpuck",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="train",
+            validation_s3_key="test",
+            content_type="text/libsvm",
+            test_name="libsvm-gpuck",
             instance_type="ml.g4dn.2xlarge",
             checkpoint_s3_uri=_checkpoint_uri("libsvm-gpu-ckpt"),
         )
diff --git a/test/xgboost/e2e/test_training_pb.py b/test/xgboost/e2e/test_training_pb.py
index 91beb28e69f8..f70a55015c8f 100644
--- a/test/xgboost/e2e/test_training_pb.py
+++ b/test/xgboost/e2e/test_training_pb.py
@@ -20,9 +20,13 @@
 class TestTrainingProtobuf:
     def test_single_instance(self, image_uri, role):
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="recordio-protobuf/train", validation_s3_key="recordio-protobuf/test",
-            content_type="application/x-recordio-protobuf", test_name="pb-single",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="recordio-protobuf/train",
+            validation_s3_key="recordio-protobuf/test",
+            content_type="application/x-recordio-protobuf",
+            test_name="pb-single",
         )
         assert desc["TrainingJobStatus"] == "Completed"
         assert 1 <= duration <= 1800
@@ -30,18 +34,26 @@ def test_single_instance(self, image_uri, role):
     def test_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="recordio-protobuf/train", validation_s3_key="recordio-protobuf/test",
-            content_type="application/x-recordio-protobuf", test_name="pb-dist",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="recordio-protobuf/train",
+            validation_s3_key="recordio-protobuf/test",
+            content_type="application/x-recordio-protobuf",
+            test_name="pb-dist",
             instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_pipe_mode_single_instance(self, image_uri, role):
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="recordio-protobuf/train", validation_s3_key="recordio-protobuf/test",
-            content_type="application/x-recordio-protobuf", test_name="pb-pipe",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="recordio-protobuf/train",
+            validation_s3_key="recordio-protobuf/test",
+            content_type="application/x-recordio-protobuf",
+            test_name="pb-pipe",
             input_mode="Pipe",
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -49,17 +61,26 @@ def test_pipe_mode_single_instance(self, image_uri, role):
     def test_pipe_mode_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="recordio-protobuf/train", validation_s3_key="recordio-protobuf/test",
-            content_type="application/x-recordio-protobuf", test_name="pb-pipe-dist",
-            input_mode="Pipe", instance_count=2,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="recordio-protobuf/train",
+            validation_s3_key="recordio-protobuf/test",
+            content_type="application/x-recordio-protobuf",
+            test_name="pb-pipe-dist",
+            input_mode="Pipe",
+            instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_sparse_single_instance(self, image_uri, role):
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="recordio-protobuf/sparse/train", validation_s3_key="recordio-protobuf/sparse/test",
-            content_type="application/x-recordio-protobuf", test_name="pb-sparse",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="recordio-protobuf/sparse/train",
+            validation_s3_key="recordio-protobuf/sparse/test",
+            content_type="application/x-recordio-protobuf",
+            test_name="pb-sparse",
         )
         assert desc["TrainingJobStatus"] == "Completed"
diff --git a/test/xgboost/e2e/test_training_pq.py b/test/xgboost/e2e/test_training_pq.py
index b7c0033f7f2e..be0da037145c 100644
--- a/test/xgboost/e2e/test_training_pq.py
+++ b/test/xgboost/e2e/test_training_pq.py
@@ -20,9 +20,13 @@
 class TestTrainingParquet:
     def test_single_instance(self, image_uri, role):
         _, duration, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-single",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-single",
             instance_type="ml.m5.2xlarge",
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -31,18 +35,26 @@ def test_single_instance(self, image_uri, role):
     def test_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-dist",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-dist",
             instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_pipe_mode_single_instance(self, image_uri, role):
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=BASE_HP,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-pipe",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=BASE_HP,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-pipe",
             input_mode="Pipe",
         )
         assert desc["TrainingJobStatus"] == "Completed"
@@ -50,19 +62,28 @@ def test_pipe_mode_single_instance(self, image_uri, role):
     def test_pipe_mode_distributed(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "hist"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-pipe-dist",
-            input_mode="Pipe", instance_count=2,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-pipe-dist",
+            input_mode="Pipe",
+            instance_count=2,
         )
         assert desc["TrainingJobStatus"] == "Completed"
 
     def test_dask_gpu_single(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist", "use_dask_gpu_training": "true"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-dask-gpu",
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-dask-gpu",
             instance_type="ml.g4dn.2xlarge",
             train_distribution="FullyReplicated",
         )
@@ -71,10 +92,15 @@ def test_dask_gpu_single(self, image_uri, role):
     def test_dask_gpu_multi_instance(self, image_uri, role):
         hp = {**BASE_HP, "tree_method": "gpu_hist", "use_dask_gpu_training": "true"}
         _, _, desc = run_training_job(
-            image_uri=image_uri, role=role, hyperparameters=hp,
-            train_s3_key="parquet/train", validation_s3_key="parquet/test",
-            content_type="application/x-parquet", test_name="pq-dask-2x",
-            instance_type="ml.g4dn.2xlarge", instance_count=2,
+            image_uri=image_uri,
+            role=role,
+            hyperparameters=hp,
+            train_s3_key="parquet/train",
+            validation_s3_key="parquet/test",
+            content_type="application/x-parquet",
+            test_name="pq-dask-2x",
+            instance_type="ml.g4dn.2xlarge",
+            instance_count=2,
             train_distribution="FullyReplicated",
         )
         assert desc["TrainingJobStatus"] == "Completed"
diff --git a/test/xgboost/e2e/test_transform.py b/test/xgboost/e2e/test_transform.py
index 1ef039a6afcc..b5d271cb9fc7 100644
--- a/test/xgboost/e2e/test_transform.py
+++ b/test/xgboost/e2e/test_transform.py
@@ -23,9 +23,13 @@
 def model_data(image_uri, role):
     """Train a model once for transform tests."""
     _, _, desc = run_training_job(
-        image_uri=image_uri, role=role, hyperparameters=TRAIN_HP,
-        train_s3_key="train", validation_s3_key="test",
-        content_type="text/libsvm", test_name="bt-model",
+        image_uri=image_uri,
+        role=role,
+        hyperparameters=TRAIN_HP,
+        train_s3_key="train",
+        validation_s3_key="test",
+        content_type="text/libsvm",
+        test_name="bt-model",
     )
     assert desc["TrainingJobStatus"] == "Completed"
     return desc["ModelArtifacts"]["S3ModelArtifacts"]
@@ -34,8 +38,11 @@ def model_data(image_uri, role):
 class TestTransform:
     def test_batch_inference_libsvm(self, image_uri, role, model_data):
         desc = run_batch_transform(
-            image_uri=image_uri, role=role, model_data=model_data,
+            image_uri=image_uri,
+            role=role,
+            model_data=model_data,
             input_s3_uri=data_uri("test/abalone.test"),
-            content_type="text/libsvm", test_name="bt-libsvm",
+            content_type="text/libsvm",
+            test_name="bt-libsvm",
         )
         assert desc["TransformJobStatus"] == "Completed"

From 2cd3eb4d3b17ee6470993e93c88fccb16f07f42d Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Fri, 3 Apr 2026 17:50:46 -0700
Subject: [PATCH 35/58] fix: add 30s sleep between retries for torch.compile
 warmup

---
 test/vllm-omni/sagemaker/test_sm_omni_endpoint.py | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 089060bb652c..32f4f4b21365 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -109,7 +109,10 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
     )
 
     LOGGER.info("Sending TTS request via /invocations with route=/v1/audio/speech")
-    # First request may be slow due to model warmup; retry on timeout
+    # First request triggers torch.compile + CUDA graph capture (~67s),
+    # which exceeds SageMaker's 60s invoke timeout. Retry after warmup completes.
+    import time
+
     for attempt in range(3):
         try:
             response = sm_runtime.invoke_endpoint(
@@ -120,9 +123,10 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
             )
             break
         except Exception as e:
-            LOGGER.warning(f"Attempt {attempt + 1} failed: {e}")
+            LOGGER.warning(f"Attempt {attempt + 1}/3 failed: {e}")
             if attempt == 2:
                 raise
+            time.sleep(30)
 
     audio_bytes = response["Body"].read()
     LOGGER.info(f"TTS audio response: {len(audio_bytes)} bytes")

From 7fd7e01f854a47b492821e4c9646dfebc2bd8aec Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 22:35:45 -0700
Subject: [PATCH 36/58] feat: move unit test to test/vllm-omni/sagemaker/, add
 async endpoint test

- Unit test moved to test/vllm-omni/sagemaker/test_sagemaker_middleware.py
- build-image depends on unit-test (fail fast)
- Async endpoint test: uses AsyncInferenceConfig to bypass 60s timeout
- Polls S3 output for result, no container changes needed
---
 .../pr-vllm-omni-sagemaker-amzn2023.yml       |   7 +-
 .../sagemaker}/test_sagemaker_middleware.py   |   5 +
 .../sagemaker/test_sm_omni_endpoint.py        | 100 ++++++++++++++++++
 3 files changed, 107 insertions(+), 5 deletions(-)
 rename {scripts/vllm => test/vllm-omni/sagemaker}/test_sagemaker_middleware.py (95%)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 8bd87418ec88..1afbefa0d9df 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -53,11 +53,8 @@ jobs:
         with:
           python-version: "3.12"
 
-      - name: Install dependencies
-        run: pip install starlette pytest
-
       - name: Run middleware unit tests
-        run: PYTHONPATH=scripts/vllm pytest scripts/vllm/test_sagemaker_middleware.py -v
+        run: pip install starlette pytest && pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v --noconftest
 
   load-config:
     needs: [gatekeeper]
@@ -184,7 +181,7 @@ jobs:
             -f docker/vllm/Dockerfile.amzn2023 .
 
   build-image:
-    needs: [check-changes, load-config, build-runtime]
+    needs: [check-changes, load-config, build-runtime, unit-test]
     if: needs.check-changes.outputs.build-change == 'true'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
diff --git a/scripts/vllm/test_sagemaker_middleware.py b/test/vllm-omni/sagemaker/test_sagemaker_middleware.py
similarity index 95%
rename from scripts/vllm/test_sagemaker_middleware.py
rename to test/vllm-omni/sagemaker/test_sagemaker_middleware.py
index d2c8eb931cc5..fa7ce616a6e3 100644
--- a/scripts/vllm/test_sagemaker_middleware.py
+++ b/test/vllm-omni/sagemaker/test_sagemaker_middleware.py
@@ -1,6 +1,11 @@
 """Unit tests for SageMaker routing middleware."""
 
 import asyncio
+import os
+import sys
+
+# Allow importing omni_sagemaker_serve from scripts/vllm/
+sys.path.insert(0, os.path.join(os.path.dirname(__file__), "..", "..", "..", "scripts", "vllm"))
 
 import pytest
 from omni_sagemaker_serve import SageMakerRouteMiddleware, _parse_route
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 32f4f4b21365..9d00c4953f10 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -2,8 +2,10 @@
 
 import json
 import logging
+import time
 
 import pytest
+from sagemaker.async_inference import AsyncInferenceConfig
 from sagemaker.model import Model
 from sagemaker.predictor import Predictor
 from sagemaker.serializers import JSONSerializer
@@ -113,6 +115,7 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
     # which exceeds SageMaker's 60s invoke timeout. Retry after warmup completes.
     import time
 
+    # https://github.com/aws/sagemaker-python-sdk/issues/1119
     for attempt in range(3):
         try:
             response = sm_runtime.invoke_endpoint(
@@ -132,3 +135,100 @@ def test_vllm_omni_tts_endpoint(model_endpoint):
     LOGGER.info(f"TTS audio response: {len(audio_bytes)} bytes")
     assert len(audio_bytes) > 1000, f"TTS output too small: {len(audio_bytes)} bytes"
     LOGGER.info("TTS endpoint test PASSED")
+
+
+@pytest.fixture(scope="function")
+def async_endpoint(aws_session, model_package, instance_type):
+    """Deploy an async inference endpoint (no 60s timeout limit)."""
+    sagemaker_client = aws_session.sagemaker
+    model = model_package
+    cleaned_instance = clean_string(instance_type, "_./")
+    endpoint_name = random_suffix_name(f"vllm-omni-async-{cleaned_instance}", 50)
+    s3_output = f"s3://{aws_session.default_bucket()}/vllm-omni-async-output/"
+
+    try:
+        LOGGER.info(f"Deploying async endpoint: {endpoint_name}")
+        predictor = model.deploy(
+            instance_type=instance_type,
+            initial_instance_count=1,
+            endpoint_name=endpoint_name,
+            inference_ami_version=INFERENCE_AMI_VERSION,
+            serializer=JSONSerializer(),
+            async_inference_config=AsyncInferenceConfig(
+                output_path=s3_output,
+                max_concurrent_invocations_per_instance=1,
+            ),
+            wait=True,
+        )
+
+        LOGGER.info(f"Waiting for endpoint {ENDPOINT_INSERVICE} status...")
+        assert wait_for_status(
+            ENDPOINT_INSERVICE,
+            ENDPOINT_WAIT_PERIOD,
+            ENDPOINT_WAIT_LENGTH,
+            get_endpoint_status,
+            sagemaker_client,
+            endpoint_name,
+        )
+        yield predictor, s3_output
+    finally:
+        LOGGER.info(f"Deleting async endpoint: {endpoint_name}")
+        sagemaker_client.delete_endpoint(EndpointName=endpoint_name)
+        sagemaker_client.delete_endpoint_config(EndpointConfigName=endpoint_name)
+
+
+@pytest.mark.parametrize("instance_type", ["ml.g5.xlarge"], indirect=True)
+@pytest.mark.parametrize("model_id", ["Qwen/Qwen3-TTS-12Hz-1.7B-CustomVoice"], indirect=True)
+def test_vllm_omni_tts_async_endpoint(async_endpoint):
+    """TTS via async inference — no 60s timeout, up to 1 hour."""
+    predictor, s3_output = async_endpoint
+    sm_runtime = predictor.sagemaker_session.sagemaker_runtime_client
+    s3_client = predictor.sagemaker_session.boto_session.client("s3")
+
+    payload = json.dumps(
+        {
+            "input": "Hello, this is a test of async text to speech.",
+            "voice": "vivian",
+            "language": "English",
+        }
+    )
+
+    LOGGER.info("Sending async TTS request")
+    response = sm_runtime.invoke_endpoint_async(
+        EndpointName=predictor.endpoint_name,
+        ContentType="application/json",
+        InputLocation=_upload_payload_to_s3(s3_client, payload, s3_output, predictor.endpoint_name),
+        CustomAttributes="route=/v1/audio/speech",
+    )
+
+    output_location = response["OutputLocation"]
+    LOGGER.info(f"Async output location: {output_location}")
+
+    # Poll for result (up to 5 minutes)
+    bucket, key = _parse_s3_uri(output_location)
+    for i in range(60):
+        try:
+            obj = s3_client.get_object(Bucket=bucket, Key=key)
+            audio_bytes = obj["Body"].read()
+            LOGGER.info(f"Async TTS response: {len(audio_bytes)} bytes (after {i * 5}s)")
+            assert len(audio_bytes) > 1000, f"TTS output too small: {len(audio_bytes)} bytes"
+            LOGGER.info("Async TTS endpoint test PASSED")
+            return
+        except s3_client.exceptions.NoSuchKey:
+            time.sleep(5)
+
+    pytest.fail("Async inference timed out after 300s")
+
+
+def _upload_payload_to_s3(s3_client, payload, s3_output, endpoint_name):
+    """Upload request payload to S3 for async inference."""
+    bucket, prefix = _parse_s3_uri(s3_output)
+    key = f"{prefix}{endpoint_name}-input.json"
+    s3_client.put_object(Bucket=bucket, Key=key, Body=payload, ContentType="application/json")
+    return f"s3://{bucket}/{key}"
+
+
+def _parse_s3_uri(uri):
+    """Parse s3://bucket/key into (bucket, key)."""
+    parts = uri.replace("s3://", "").split("/", 1)
+    return parts[0], parts[1] if len(parts) > 1 else ""

From 4f0e25405a5efb671d3cab6d6e19fcf0475900fe Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 22:37:03 -0700
Subject: [PATCH 37/58] fix: run unit test from sagemaker dir to avoid
 test/__init__.py import

---
 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 1afbefa0d9df..e20837804ce1 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -54,7 +54,7 @@ jobs:
           python-version: "3.12"
 
       - name: Run middleware unit tests
-        run: pip install starlette pytest && pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v --noconftest
+        run: pip install starlette pytest && cd test/vllm-omni/sagemaker && pytest test_sagemaker_middleware.py -v
 
   load-config:
     needs: [gatekeeper]

From 1e459cde90431de85baa9f429a834e61e9bbf55b Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 22:41:46 -0700
Subject: [PATCH 38/58] fix: use default-runner for unit test (has test_utils
 and starlette)

---
 .../workflows/pr-vllm-omni-sagemaker-amzn2023.yml    | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index e20837804ce1..3a609b6eb7a7 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -43,18 +43,16 @@ jobs:
   unit-test:
     needs: [gatekeeper]
     if: success()
-    runs-on: ubuntu-latest
+    runs-on:
+      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
+        fleet:default-runner
+        buildspec-override:true
     steps:
       - name: Checkout code
         uses: actions/checkout@v5
 
-      - name: Setup python
-        uses: actions/setup-python@v6
-        with:
-          python-version: "3.12"
-
       - name: Run middleware unit tests
-        run: pip install starlette pytest && cd test/vllm-omni/sagemaker && pytest test_sagemaker_middleware.py -v
+        run: pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v
 
   load-config:
     needs: [gatekeeper]

From a02f2cabb3f2ad6c2154960d6a985dfe84afbbdd Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 22:43:24 -0700
Subject: [PATCH 39/58] fix: install test deps and set PYTHONPATH for unit test
 (matches sanity pattern)

---
 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 3a609b6eb7a7..74048b87dabd 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -51,8 +51,16 @@ jobs:
       - name: Checkout code
         uses: actions/checkout@v5
 
+      - name: Setup test dependencies
+        run: |
+          uv venv --python 3.12
+          source .venv/bin/activate
+          uv pip install -r test/requirements.txt
+
       - name: Run middleware unit tests
-        run: pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v
+        run: |
+          source .venv/bin/activate
+          PYTHONPATH=$(pwd)/test:$(pwd)/scripts/vllm:$PYTHONPATH pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v
 
   load-config:
     needs: [gatekeeper]

From 9589e12f168181cf7f3b51bb1cd990f426b7126b Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 22:47:14 -0700
Subject: [PATCH 40/58] fix: add starlette to unit test deps (not in
 test/requirements.txt)

---
 .github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 74048b87dabd..916d60f4b58e 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -55,7 +55,7 @@ jobs:
         run: |
           uv venv --python 3.12
           source .venv/bin/activate
-          uv pip install -r test/requirements.txt
+          uv pip install -r test/requirements.txt starlette
 
       - name: Run middleware unit tests
         run: |

From 38252ef8525ab9fe9c5882527048c4fbdcd6aa3e Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Sun, 5 Apr 2026 23:12:21 -0700
Subject: [PATCH 41/58] feat: add 4 new models (CosyVoice3, Qwen2.5-Omni,
 BAGEL, Wan2.1), HF model support, consolidate tests

- Model config: CosyVoice3-0.5B, Qwen2.5-Omni-3B, BAGEL-7B-MoT, Wan2.1-T2V-1.3B
- Covers all routes: /v1/audio/speech, /v1/chat/completions, /v1/images/generations, /v1/videos
- Workflow handles both S3 and HF model sources (HF_TOKEN for downloads)
- Removed separate unit-test job, runs in sagemaker-endpoint-test
- Fixed async endpoint test (AWSSessionManager.sts for account ID)
- Added starlette to sagemaker test requirements
---
 .github/config/vllm-omni-model-tests.yml      | 42 ++++++++++++++++++-
 .../pr-vllm-omni-sagemaker-amzn2023.yml       | 24 +----------
 .../reusable-vllm-omni-model-tests.yml        | 32 +++++++++++---
 test/vllm-omni/sagemaker/requirements.txt     |  1 +
 .../sagemaker/test_sm_omni_endpoint.py        |  3 +-
 5 files changed, 71 insertions(+), 31 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 9dc5b13d97d6..d8c442e5447a 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,14 +1,17 @@
 # vLLM-Omni Model Test Configuration
-# Tests for omni-modality models (TTS, image generation)
-# Models are pre-cached in S3 as tar.gz archives
+# Tests for omni-modality models (TTS, image generation, video, omni-chat)
 #
 # Each model defines its test_request (sent to /invocations via middleware)
 # and the route for the SageMaker routing middleware.
+#
+# Models with s3_model are pre-cached in S3. Models with hf_model download
+# from HuggingFace at runtime (requires HF_TOKEN env var).
 
 s3_prefix: "s3://dlc-cicd-models/omni-models"
 
 smoke-test:
   codebuild-fleet:
+    # --- TTS models (route: /v1/audio/speech) ---
     - name: "qwen3-tts-1.7b-customvoice"
       s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
       fleet: "x86-g6xl-runner"
@@ -17,6 +20,15 @@ smoke-test:
       test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
       validate: "binary_size_gt:1000"
 
+    - name: "cosyvoice3-0.5b"
+      hf_model: "FunAudioLLM/Fun-CosyVoice3-0.5B-2512"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
+      validate: "binary_size_gt:1000"
+
+    # --- Image generation models (route: /v1/images/generations) ---
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
       fleet: "x86-g6xl-runner"
@@ -24,3 +36,29 @@ smoke-test:
       route: "/v1/images/generations"
       test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
       validate: "json_field:data[0].b64_json"
+
+    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
+    - name: "qwen2.5-omni-3b"
+      hf_model: "Qwen/Qwen2.5-Omni-3B"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+      validate: "json_field:choices[0].message.content"
+
+    - name: "bagel-7b-mot"
+      hf_model: "ByteDance-Seed/BAGEL-7B-MoT"
+      fleet: "x86-g6exl-runner"
+      extra_args: ""
+      route: "/v1/images/generations"
+      test_request: '{"prompt": "a cute cat sitting on a windowsill", "size": "512x512", "n": 1}'
+      validate: "json_field:data[0].b64_json"
+
+    # --- Video generation models (route: /v1/videos) ---
+    - name: "wan2.1-t2v-1.3b"
+      hf_model: "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/videos"
+      test_request: '{"prompt": "a dog running on a beach", "n": 1}'
+      validate: "json_field:id"
diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 916d60f4b58e..295b4e5ea92b 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -40,28 +40,6 @@ jobs:
       - name: Run permission gate (from base)
         uses: ./.github/actions/pr-permission-gate
 
-  unit-test:
-    needs: [gatekeeper]
-    if: success()
-    runs-on:
-      - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
-        fleet:default-runner
-        buildspec-override:true
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v5
-
-      - name: Setup test dependencies
-        run: |
-          uv venv --python 3.12
-          source .venv/bin/activate
-          uv pip install -r test/requirements.txt starlette
-
-      - name: Run middleware unit tests
-        run: |
-          source .venv/bin/activate
-          PYTHONPATH=$(pwd)/test:$(pwd)/scripts/vllm:$PYTHONPATH pytest test/vllm-omni/sagemaker/test_sagemaker_middleware.py -v
-
   load-config:
     needs: [gatekeeper]
     if: success()
@@ -187,7 +165,7 @@ jobs:
             -f docker/vllm/Dockerfile.amzn2023 .
 
   build-image:
-    needs: [check-changes, load-config, build-runtime, unit-test]
+    needs: [check-changes, load-config, build-runtime]
     if: needs.check-changes.outputs.build-change == 'true'
     runs-on:
       - codebuild-runner-${{ github.run_id }}-${{ github.run_attempt }}
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index a027a20acb47..b68ac23872bd 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -22,6 +22,10 @@ on:
         description: "Customer type: ec2 or sagemaker"
         required: true
         type: string
+    secrets:
+      HF_TOKEN:
+        description: "HuggingFace token for downloading models"
+        required: false
 
 jobs:
   load-models:
@@ -42,7 +46,11 @@ jobs:
           prefix = cfg.get('s3_prefix', '')
           models = cfg.get('smoke-test', {}).get('codebuild-fleet', [])
           for m in models:
-              m['s3_path'] = prefix + '/' + m.pop('s3_model')
+              if 's3_model' in m:
+                  m['s3_path'] = prefix + '/' + m.pop('s3_model')
+                  m['model_source'] = 's3'
+              elif 'hf_model' in m:
+                  m['model_source'] = 'hf'
           print(f'matrix={json.dumps(models)}')
           " >> "$GITHUB_OUTPUT"
 
@@ -70,22 +78,35 @@ jobs:
           image-uri: ${{ inputs.image-uri }}
 
       - name: Download model from S3
+        if: matrix.model.model_source == 's3'
         uses: ./.github/actions/download-model
         id: model
         with:
           s3-path: ${{ matrix.model.s3_path }}
           model-name: ${{ matrix.model.name }}
 
+      - name: Resolve model path
+        id: resolve
+        run: |
+          if [ "${{ matrix.model.model_source }}" = "s3" ]; then
+            echo "model_path=/models/${{ matrix.model.name }}" >> $GITHUB_OUTPUT
+            echo "volume=-v /dlc-models:/models" >> $GITHUB_OUTPUT
+          else
+            echo "model_path=${{ matrix.model.hf_model }}" >> $GITHUB_OUTPUT
+            echo "volume=" >> $GITHUB_OUTPUT
+          fi
+
       # EC2: entrypoint accepts CLI args directly
       - name: Start container (EC2)
         if: inputs.customer-type == 'ec2'
         run: |
           docker pull ${{ inputs.image-uri }}
           CONTAINER_ID=$(docker run -d --gpus all --shm-size=4g \
-            -v /dlc-models:/models \
+            ${{ steps.resolve.outputs.volume }} \
+            -e HF_TOKEN=${{ secrets.HF_TOKEN }} \
             -p 8080:8080 \
             ${{ inputs.image-uri }} \
-            --model /models/${{ matrix.model.name }} \
+            --model ${{ steps.resolve.outputs.model_path }} \
             --port 8080 \
             --stage-init-timeout 600)
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
@@ -96,9 +117,10 @@ jobs:
         run: |
           docker pull ${{ inputs.image-uri }}
           CONTAINER_ID=$(docker run -d --gpus all --shm-size=4g \
-            -v /dlc-models:/models \
-            -e SM_VLLM_MODEL=/models/${{ matrix.model.name }} \
+            ${{ steps.resolve.outputs.volume }} \
+            -e SM_VLLM_MODEL=${{ steps.resolve.outputs.model_path }} \
             -e SM_VLLM_STAGE_INIT_TIMEOUT=600 \
+            -e HF_TOKEN=${{ secrets.HF_TOKEN }} \
             -p 8080:8080 \
             ${{ inputs.image-uri }})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
diff --git a/test/vllm-omni/sagemaker/requirements.txt b/test/vllm-omni/sagemaker/requirements.txt
index d371ab0d94a9..6a4743d65577 100644
--- a/test/vllm-omni/sagemaker/requirements.txt
+++ b/test/vllm-omni/sagemaker/requirements.txt
@@ -1 +1,2 @@
 sagemaker>=2,<3
+starlette
diff --git a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
index 9d00c4953f10..6920c78a09c8 100644
--- a/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
+++ b/test/vllm-omni/sagemaker/test_sm_omni_endpoint.py
@@ -144,7 +144,8 @@ def async_endpoint(aws_session, model_package, instance_type):
     model = model_package
     cleaned_instance = clean_string(instance_type, "_./")
     endpoint_name = random_suffix_name(f"vllm-omni-async-{cleaned_instance}", 50)
-    s3_output = f"s3://{aws_session.default_bucket()}/vllm-omni-async-output/"
+    account_id = aws_session.sts.get_caller_identity()["Account"]
+    s3_output = f"s3://sagemaker-{aws_session.region}-{account_id}/vllm-omni-async-output/"
 
     try:
         LOGGER.info(f"Deploying async endpoint: {endpoint_name}")

From 68e8c6e75b61b1a76160985dee4e00d8807434e6 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 10:24:03 -0700
Subject: [PATCH 42/58] fix: revert to S3-cached models only, new HF models
 need validation first

New models (CosyVoice3, Qwen2.5-Omni, BAGEL, Wan2.1) OOM during HF download.
Need S3 tarballs and per-model validation before adding to CI.
---
 .github/config/vllm-omni-model-tests.yml | 38 ++++--------------------
 1 file changed, 5 insertions(+), 33 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index d8c442e5447a..ceb0f7b30216 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -20,14 +20,6 @@ smoke-test:
       test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
       validate: "binary_size_gt:1000"
 
-    - name: "cosyvoice3-0.5b"
-      hf_model: "FunAudioLLM/Fun-CosyVoice3-0.5B-2512"
-      fleet: "x86-g6xl-runner"
-      extra_args: ""
-      route: "/v1/audio/speech"
-      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
-      validate: "binary_size_gt:1000"
-
     # --- Image generation models (route: /v1/images/generations) ---
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
@@ -37,28 +29,8 @@ smoke-test:
       test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
       validate: "json_field:data[0].b64_json"
 
-    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
-    - name: "qwen2.5-omni-3b"
-      hf_model: "Qwen/Qwen2.5-Omni-3B"
-      fleet: "x86-g6xl-runner"
-      extra_args: ""
-      route: "/v1/chat/completions"
-      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
-      validate: "json_field:choices[0].message.content"
-
-    - name: "bagel-7b-mot"
-      hf_model: "ByteDance-Seed/BAGEL-7B-MoT"
-      fleet: "x86-g6exl-runner"
-      extra_args: ""
-      route: "/v1/images/generations"
-      test_request: '{"prompt": "a cute cat sitting on a windowsill", "size": "512x512", "n": 1}'
-      validate: "json_field:data[0].b64_json"
-
-    # --- Video generation models (route: /v1/videos) ---
-    - name: "wan2.1-t2v-1.3b"
-      hf_model: "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
-      fleet: "x86-g6xl-runner"
-      extra_args: ""
-      route: "/v1/videos"
-      test_request: '{"prompt": "a dog running on a beach", "n": 1}'
-      validate: "json_field:id"
+    # TODO: Add after validation on DLC image with S3 tarballs:
+    # - cosyvoice3-0.5b (TTS, /v1/audio/speech)
+    # - qwen2.5-omni-3b (omni chat, /v1/chat/completions)
+    # - bagel-7b-mot (multimodal, /v1/images/generations)
+    # - wan2.1-t2v-1.3b (video, /v1/videos)

From 1162afddf2c9f42770c33a2db4b793e9645d2c38 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 11:52:45 -0700
Subject: [PATCH 43/58] feat: add CosyVoice3-0.5B and Qwen2.5-Omni-3B smoke
 tests (S3 cached)

- CosyVoice3: /v1/audio/speech (different TTS arch)
- Qwen2.5-Omni-3B: /v1/chat/completions (tests fallthrough, no middleware)
- BAGEL and Wan2.1 pending S3 upload
---
 .github/config/vllm-omni-model-tests.yml | 26 ++++++++++++++++++------
 1 file changed, 20 insertions(+), 6 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index ceb0f7b30216..95bbf145de7f 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -4,8 +4,7 @@
 # Each model defines its test_request (sent to /invocations via middleware)
 # and the route for the SageMaker routing middleware.
 #
-# Models with s3_model are pre-cached in S3. Models with hf_model download
-# from HuggingFace at runtime (requires HF_TOKEN env var).
+# Models use s3_model (pre-cached in S3) downloaded by the download-model action.
 
 s3_prefix: "s3://dlc-cicd-models/omni-models"
 
@@ -20,6 +19,14 @@ smoke-test:
       test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
       validate: "binary_size_gt:1000"
 
+    - name: "cosyvoice3-0.5b"
+      s3_model: "cosyvoice3-0.5b.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
+      validate: "binary_size_gt:1000"
+
     # --- Image generation models (route: /v1/images/generations) ---
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
@@ -29,8 +36,15 @@ smoke-test:
       test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
       validate: "json_field:data[0].b64_json"
 
-    # TODO: Add after validation on DLC image with S3 tarballs:
-    # - cosyvoice3-0.5b (TTS, /v1/audio/speech)
-    # - qwen2.5-omni-3b (omni chat, /v1/chat/completions)
-    # - bagel-7b-mot (multimodal, /v1/images/generations)
+    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
+    - name: "qwen2.5-omni-3b"
+      s3_model: "qwen2.5-omni-3b.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+      validate: "json_field:choices[0].message.content"
+
+    # TODO: Add after S3 upload completes:
+    # - bagel-7b-mot (multimodal, /v1/images/generations, x86-g6exl-runner)
     # - wan2.1-t2v-1.3b (video, /v1/videos)

From ee5c415fcfb9b9e87d869b7b049b30abd3481ed7 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 12:45:38 -0700
Subject: [PATCH 44/58] fix: bump new models to g6exl (more RAM), add container
 log dump on failure

---
 .github/config/vllm-omni-model-tests.yml             | 4 ++--
 .github/workflows/reusable-vllm-omni-model-tests.yml | 5 +++++
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 95bbf145de7f..10167358c62f 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -21,7 +21,7 @@ smoke-test:
 
     - name: "cosyvoice3-0.5b"
       s3_model: "cosyvoice3-0.5b.tar.gz"
-      fleet: "x86-g6xl-runner"
+      fleet: "x86-g6exl-runner"
       extra_args: ""
       route: "/v1/audio/speech"
       test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
@@ -39,7 +39,7 @@ smoke-test:
     # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
     - name: "qwen2.5-omni-3b"
       s3_model: "qwen2.5-omni-3b.tar.gz"
-      fleet: "x86-g6xl-runner"
+      fleet: "x86-g6exl-runner"
       extra_args: ""
       route: "/v1/chat/completions"
       test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index b68ac23872bd..f26c10540f19 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -137,6 +137,11 @@ jobs:
             '${{ matrix.model.test_request }}' \
             "${{ matrix.model.validate }}"
 
+      - name: Dump container logs
+        if: always()
+        run: |
+          docker logs ${CONTAINER_ID} 2>&1 | tail -100 || true
+
       - name: Cleanup
         if: always()
         run: |

From 862688ddfcd609ca2d0dea607eacd1a1a90e5031 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 13:25:23 -0700
Subject: [PATCH 45/58] fix: revert to Qwen3-TTS and FLUX.2 only

Tested models that don't work in CI:
- CosyVoice3: no model_type in config.json, unrecognized by transformers
- Qwen2.5-Omni-3B: OOMs on g6e.xlarge (multi-stage needs >48GB)
- BAGEL/Wan2.1: need --stage-configs-path, untested
---
 .github/config/vllm-omni-model-tests.yml      | 27 ++++-------------
 .../reusable-vllm-omni-model-tests.yml        | 30 ++++++++++++++++++-
 2 files changed, 35 insertions(+), 22 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 10167358c62f..67d8b5a7166f 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,5 +1,5 @@
 # vLLM-Omni Model Test Configuration
-# Tests for omni-modality models (TTS, image generation, video, omni-chat)
+# Tests for omni-modality models (TTS, image generation)
 #
 # Each model defines its test_request (sent to /invocations via middleware)
 # and the route for the SageMaker routing middleware.
@@ -19,14 +19,6 @@ smoke-test:
       test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
       validate: "binary_size_gt:1000"
 
-    - name: "cosyvoice3-0.5b"
-      s3_model: "cosyvoice3-0.5b.tar.gz"
-      fleet: "x86-g6exl-runner"
-      extra_args: ""
-      route: "/v1/audio/speech"
-      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
-      validate: "binary_size_gt:1000"
-
     # --- Image generation models (route: /v1/images/generations) ---
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
@@ -36,15 +28,8 @@ smoke-test:
       test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
       validate: "json_field:data[0].b64_json"
 
-    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
-    - name: "qwen2.5-omni-3b"
-      s3_model: "qwen2.5-omni-3b.tar.gz"
-      fleet: "x86-g6exl-runner"
-      extra_args: ""
-      route: "/v1/chat/completions"
-      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
-      validate: "json_field:choices[0].message.content"
-
-    # TODO: Add after S3 upload completes:
-    # - bagel-7b-mot (multimodal, /v1/images/generations, x86-g6exl-runner)
-    # - wan2.1-t2v-1.3b (video, /v1/videos)
+    # Models tested and not yet compatible with CI smoke tests:
+    # - cosyvoice3-0.5b: no model_type in config.json, needs custom registration
+    # - qwen2.5-omni-3b: multi-stage model OOMs on g6e.xlarge (48GB)
+    # - bagel-7b-mot: needs --stage-configs-path, untested
+    # - wan2.1-t2v-1.3b: needs --stage-configs-path, untested
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index f26c10540f19..08dca3c3ece3 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -108,9 +108,36 @@ jobs:
             ${{ inputs.image-uri }} \
             --model ${{ steps.resolve.outputs.model_path }} \
             --port 8080 \
-            --stage-init-timeout 600)
+            --stage-init-timeout 600 \
+            ${{ matrix.model.extra_args }})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV
 
+      - name: Convert extra_args to SM env vars
+        if: inputs.customer-type == 'sagemaker'
+        id: sm-env
+        run: |
+          # Convert --key value pairs to SM_VLLM_KEY=value env vars
+          EXTRA_ENV=""
+          ARGS="${{ matrix.model.extra_args }}"
+          while [[ -n "$ARGS" ]]; do
+            if [[ "$ARGS" =~ ^--([a-z][a-z0-9-]*)[[:space:]]*(.*) ]]; then
+              KEY=$(echo "${BASH_REMATCH[1]}" | tr '-' '_' | tr '[:lower:]' '[:upper:]')
+              REST="${BASH_REMATCH[2]}"
+              if [[ "$REST" =~ ^--[a-z] ]] || [[ -z "$REST" ]]; then
+                EXTRA_ENV="$EXTRA_ENV -e SM_VLLM_${KEY}=true"
+                ARGS="$REST"
+              else
+                VALUE="${REST%% --*}"
+                EXTRA_ENV="$EXTRA_ENV -e SM_VLLM_${KEY}=${VALUE}"
+                ARGS="${REST#"$VALUE"}"
+              fi
+              ARGS="${ARGS# }"
+            else
+              break
+            fi
+          done
+          echo "env_flags=$EXTRA_ENV" >> $GITHUB_OUTPUT
+
       # SageMaker: entrypoint reads SM_VLLM_* env vars
       - name: Start container (SageMaker)
         if: inputs.customer-type == 'sagemaker'
@@ -121,6 +148,7 @@ jobs:
             -e SM_VLLM_MODEL=${{ steps.resolve.outputs.model_path }} \
             -e SM_VLLM_STAGE_INIT_TIMEOUT=600 \
             -e HF_TOKEN=${{ secrets.HF_TOKEN }} \
+            ${{ steps.sm-env.outputs.env_flags }} \
             -p 8080:8080 \
             ${{ inputs.image-uri }})
           echo "CONTAINER_ID=$CONTAINER_ID" >> $GITHUB_ENV

From 4f4528243299a035e5a5da7ad27a7ca04ed582c4 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 14:03:09 -0700
Subject: [PATCH 46/58] feat: add CosyVoice3, Wan2.1, BAGEL, Qwen2.5-Omni smoke
 tests

- CosyVoice3-0.5B: /v1/audio/speech (g6e.4xl, config.json added to tarball)
- Wan2.1-T2V-1.3B: /v1/videos (g6e.4xl, diffusers auto-detect)
- BAGEL-7B-MoT: /v1/chat/completions (g6e.4xl, multimodal image gen)
- Qwen2.5-Omni-3B: /v1/chat/completions (g6e.12xl, text+audio omni)
- 6 models covering 4 routes: speech, images, videos, chat
---
 .github/config/vllm-omni-model-tests.yml | 40 ++++++++++++++++++++----
 1 file changed, 34 insertions(+), 6 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 67d8b5a7166f..c42fce6d6669 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -1,5 +1,5 @@
 # vLLM-Omni Model Test Configuration
-# Tests for omni-modality models (TTS, image generation)
+# Tests for omni-modality models (TTS, image generation, video, omni-chat)
 #
 # Each model defines its test_request (sent to /invocations via middleware)
 # and the route for the SageMaker routing middleware.
@@ -19,6 +19,14 @@ smoke-test:
       test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
       validate: "binary_size_gt:1000"
 
+    - name: "cosyvoice3-0.5b"
+      s3_model: "cosyvoice3-0.5b.tar.gz"
+      fleet: "x86-g6e4xl-runner"
+      extra_args: "--stage-configs-path vllm_omni/model_executor/stage_configs/cosyvoice3.yaml --trust-remote-code --enforce-eager"
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
+      validate: "binary_size_gt:1000"
+
     # --- Image generation models (route: /v1/images/generations) ---
     - name: "flux2-klein-4b"
       s3_model: "flux2-klein-4b.tar.gz"
@@ -28,8 +36,28 @@ smoke-test:
       test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
       validate: "json_field:data[0].b64_json"
 
-    # Models tested and not yet compatible with CI smoke tests:
-    # - cosyvoice3-0.5b: no model_type in config.json, needs custom registration
-    # - qwen2.5-omni-3b: multi-stage model OOMs on g6e.xlarge (48GB)
-    # - bagel-7b-mot: needs --stage-configs-path, untested
-    # - wan2.1-t2v-1.3b: needs --stage-configs-path, untested
+    # --- Video generation models (route: /v1/videos) ---
+    - name: "wan2.1-t2v-1.3b"
+      s3_model: "wan2.1-t2v-1.3b.tar.gz"
+      fleet: "x86-g6e4xl-runner"
+      extra_args: ""
+      route: "/v1/videos"
+      test_request: '{"prompt": "a dog running on a beach", "n": 1}'
+      validate: "json_field:id"
+
+    # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
+    - name: "bagel-7b-mot"
+      s3_model: "bagel-7b-mot.tar.gz"
+      fleet: "x86-g6e4xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
+      validate: "json_field:choices[0].message.content"
+
+    - name: "qwen2.5-omni-3b"
+      s3_model: "qwen2.5-omni-3b.tar.gz"
+      fleet: "x86-g6e12xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+      validate: "json_field:choices[0].message.content"

From 9605ed9121872c4f5ea0b591a8e56d52666692d7 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 14:15:40 -0700
Subject: [PATCH 47/58] change instance type

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml | 46 ++++++++++++------------
 1 file changed, 23 insertions(+), 23 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index c42fce6d6669..b05fb1d6732d 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -11,48 +11,48 @@ s3_prefix: "s3://dlc-cicd-models/omni-models"
 smoke-test:
   codebuild-fleet:
     # --- TTS models (route: /v1/audio/speech) ---
-    - name: "qwen3-tts-1.7b-customvoice"
-      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
-      fleet: "x86-g6xl-runner"
-      extra_args: ""
-      route: "/v1/audio/speech"
-      test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
-      validate: "binary_size_gt:1000"
+    # - name: "qwen3-tts-1.7b-customvoice"
+    #   s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
+    #   fleet: "x86-g6xl-runner"
+    #   extra_args: ""
+    #   route: "/v1/audio/speech"
+    #   test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
+    #   validate: "binary_size_gt:1000"
 
     - name: "cosyvoice3-0.5b"
       s3_model: "cosyvoice3-0.5b.tar.gz"
-      fleet: "x86-g6e4xl-runner"
+      fleet: "x86-g6exl-runner"
       extra_args: "--stage-configs-path vllm_omni/model_executor/stage_configs/cosyvoice3.yaml --trust-remote-code --enforce-eager"
       route: "/v1/audio/speech"
       test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
       validate: "binary_size_gt:1000"
 
     # --- Image generation models (route: /v1/images/generations) ---
-    - name: "flux2-klein-4b"
-      s3_model: "flux2-klein-4b.tar.gz"
-      fleet: "x86-g6xl-runner"
-      extra_args: ""
-      route: "/v1/images/generations"
-      test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
-      validate: "json_field:data[0].b64_json"
+    # - name: "flux2-klein-4b"
+    #   s3_model: "flux2-klein-4b.tar.gz"
+    #   fleet: "x86-g6xl-runner"
+    #   extra_args: ""
+    #   route: "/v1/images/generations"
+    #   test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
+    #   validate: "json_field:data[0].b64_json"
 
     # --- Video generation models (route: /v1/videos) ---
     - name: "wan2.1-t2v-1.3b"
       s3_model: "wan2.1-t2v-1.3b.tar.gz"
-      fleet: "x86-g6e4xl-runner"
+      fleet: "x86-g6exl-runner"
       extra_args: ""
       route: "/v1/videos"
       test_request: '{"prompt": "a dog running on a beach", "n": 1}'
       validate: "json_field:id"
 
     # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
-    - name: "bagel-7b-mot"
-      s3_model: "bagel-7b-mot.tar.gz"
-      fleet: "x86-g6e4xl-runner"
-      extra_args: ""
-      route: "/v1/chat/completions"
-      test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
-      validate: "json_field:choices[0].message.content"
+    # - name: "bagel-7b-mot"
+    #   s3_model: "bagel-7b-mot.tar.gz"
+    #   fleet: "x86-g6exl-runner"
+    #   extra_args: ""
+    #   route: "/v1/chat/completions"
+    #   test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
+    #   validate: "json_field:choices[0].message.content"
 
     - name: "qwen2.5-omni-3b"
       s3_model: "qwen2.5-omni-3b.tar.gz"

From e36fc3af37f77d3a207af58e10c88bfb6b331e4c Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 14:24:57 -0700
Subject: [PATCH 48/58] fix: use absolute path for cosyvoice3 stage config in
 DLC container

---
 .github/config/vllm-omni-model-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index b05fb1d6732d..6a738f20f26e 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -22,7 +22,7 @@ smoke-test:
     - name: "cosyvoice3-0.5b"
       s3_model: "cosyvoice3-0.5b.tar.gz"
       fleet: "x86-g6exl-runner"
-      extra_args: "--stage-configs-path vllm_omni/model_executor/stage_configs/cosyvoice3.yaml --trust-remote-code --enforce-eager"
+      extra_args: "--stage-configs-path /opt/venv/lib64/python3.12/site-packages/vllm_omni/model_executor/stage_configs/cosyvoice3.yaml --trust-remote-code --enforce-eager"
       route: "/v1/audio/speech"
       test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
       validate: "binary_size_gt:1000"

From f3a716b65593fc46ab4d33d35632bfdef9aa4aad Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 14:28:29 -0700
Subject: [PATCH 49/58] fix path

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 6a738f20f26e..7947c3570cc6 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -22,7 +22,7 @@ smoke-test:
     - name: "cosyvoice3-0.5b"
       s3_model: "cosyvoice3-0.5b.tar.gz"
       fleet: "x86-g6exl-runner"
-      extra_args: "--stage-configs-path /opt/venv/lib64/python3.12/site-packages/vllm_omni/model_executor/stage_configs/cosyvoice3.yaml --trust-remote-code --enforce-eager"
+      extra_args: "--trust-remote-code --enforce-eager"
       route: "/v1/audio/speech"
       test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
       validate: "binary_size_gt:1000"
@@ -54,10 +54,10 @@ smoke-test:
     #   test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
     #   validate: "json_field:choices[0].message.content"
 
-    - name: "qwen2.5-omni-3b"
-      s3_model: "qwen2.5-omni-3b.tar.gz"
-      fleet: "x86-g6e12xl-runner"
-      extra_args: ""
-      route: "/v1/chat/completions"
-      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
-      validate: "json_field:choices[0].message.content"
+    # - name: "qwen2.5-omni-3b"
+    #   s3_model: "qwen2.5-omni-3b.tar.gz"
+    #   fleet: "x86-g6e12xl-runner"
+    #   extra_args: ""
+    #   route: "/v1/chat/completions"
+    #   test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+    #   validate: "json_field:choices[0].message.content"

From c7a8a1cd1bfde69a819a06be2306b2146831e422 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 14:53:33 -0700
Subject: [PATCH 50/58] feat: add 4 new models, form data support, endpoint
 cleanup, more logs

- CosyVoice3 on g6e12xl, Wan2.1 on g6e4xl, BAGEL on g6e4xl, Qwen2.5-Omni on g6e12xl
- Wan2.1 uses /v1/videos/sync with multipart/form-data
- Smoke tests support content_type param for form vs JSON
- Orphaned endpoint cleanup step (if: always)
- Container log dump increased to 500 lines
---
 .github/config/vllm-omni-model-tests.yml      | 13 ++++----
 .../pr-vllm-omni-sagemaker-amzn2023.yml       | 14 +++++++++
 .../reusable-vllm-omni-model-tests.yml        |  5 ++--
 .../scripts/vllm_omni_ec2_smoke_test.sh       | 26 +++++++++++-----
 .../scripts/vllm_omni_sagemaker_smoke_test.sh | 30 +++++++++++++------
 5 files changed, 64 insertions(+), 24 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 7947c3570cc6..8f2271834a86 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -21,7 +21,7 @@ smoke-test:
 
     - name: "cosyvoice3-0.5b"
       s3_model: "cosyvoice3-0.5b.tar.gz"
-      fleet: "x86-g6exl-runner"
+      fleet: "x86-g6e12xl-runner"
       extra_args: "--trust-remote-code --enforce-eager"
       route: "/v1/audio/speech"
       test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
@@ -39,16 +39,17 @@ smoke-test:
     # --- Video generation models (route: /v1/videos) ---
     - name: "wan2.1-t2v-1.3b"
       s3_model: "wan2.1-t2v-1.3b.tar.gz"
-      fleet: "x86-g6exl-runner"
+      fleet: "x86-g6e4xl-runner"
       extra_args: ""
-      route: "/v1/videos"
-      test_request: '{"prompt": "a dog running on a beach", "n": 1}'
-      validate: "json_field:id"
+      route: "/v1/videos/sync"
+      content_type: "multipart/form-data"
+      test_request: 'prompt=a dog running on a beach&num_frames=17&num_inference_steps=4&size=480x320&seed=42'
+      validate: "binary_size_gt:1000"
 
     # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
     # - name: "bagel-7b-mot"
     #   s3_model: "bagel-7b-mot.tar.gz"
-    #   fleet: "x86-g6exl-runner"
+    #   fleet: "x86-g6e4xl-runner"
     #   extra_args: ""
     #   route: "/v1/chat/completions"
     #   test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
diff --git a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
index 295b4e5ea92b..6eaec90c0a40 100644
--- a/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-omni-sagemaker-amzn2023.yml
@@ -298,3 +298,17 @@ jobs:
           source .venv/bin/activate
           cd test/
           python3 -m pytest -vs -rA --image-uri ${{ needs.build-image.outputs.ci-image }} vllm-omni/sagemaker
+
+      - name: Cleanup orphaned endpoints
+        if: always()
+        run: |
+          source .venv/bin/activate
+          python3 -c "
+          import boto3
+          sm = boto3.client('sagemaker')
+          for ep in sm.list_endpoints(NameContains='vllm-omni', StatusEquals='InService').get('Endpoints', []):
+              name = ep['EndpointName']
+              print(f'Deleting orphaned endpoint: {name}')
+              sm.delete_endpoint(EndpointName=name)
+              sm.delete_endpoint_config(EndpointConfigName=name)
+          "
diff --git a/.github/workflows/reusable-vllm-omni-model-tests.yml b/.github/workflows/reusable-vllm-omni-model-tests.yml
index 08dca3c3ece3..dad843e1c853 100644
--- a/.github/workflows/reusable-vllm-omni-model-tests.yml
+++ b/.github/workflows/reusable-vllm-omni-model-tests.yml
@@ -163,12 +163,13 @@ jobs:
           docker exec ${CONTAINER_ID} bash /tmp/smoke_test.sh \
             "${{ matrix.model.route }}" \
             '${{ matrix.model.test_request }}' \
-            "${{ matrix.model.validate }}"
+            "${{ matrix.model.validate }}" \
+            "${{ matrix.model.content_type || 'application/json' }}"
 
       - name: Dump container logs
         if: always()
         run: |
-          docker logs ${CONTAINER_ID} 2>&1 | tail -100 || true
+          docker logs ${CONTAINER_ID} 2>&1 | tail -500 || true
 
       - name: Cleanup
         if: always()
diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
index 3860b3595a99..6e05785c8aad 100755
--- a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -4,13 +4,15 @@
 # Request payload and validation are passed as arguments from the model config.
 set -eux
 
-ROUTE="${1:?Usage: $0 <route> <test_request_json> <validate>}"
-REQUEST="${2:?Usage: $0 <route> <test_request_json> <validate>}"
-VALIDATE="${3:?Usage: $0 <route> <test_request_json> <validate>}"
+ROUTE="${1:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+REQUEST="${2:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+VALIDATE="${3:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+CONTENT_TYPE="${4:-application/json}"
 PORT=8080
 
 echo "=== vLLM-Omni EC2 smoke test ==="
 echo "Route: ${ROUTE}"
+echo "Content-Type: ${CONTENT_TYPE}"
 echo "Validate: ${VALIDATE}"
 
 # Wait for server
@@ -25,10 +27,20 @@ done
 curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1; }
 
 # Send request directly to the API endpoint
-curl -sf -X POST "http://localhost:${PORT}${ROUTE}" \
-  -H "Content-Type: application/json" \
-  -d "${REQUEST}" \
-  --output /tmp/omni_response --max-time 300
+if [ "${CONTENT_TYPE}" = "multipart/form-data" ]; then
+    # Convert key=value&key2=value2 to -F flags
+    CURL_ARGS=""
+    IFS='&' read -ra PAIRS <<< "${REQUEST}"
+    for pair in "${PAIRS[@]}"; do
+        CURL_ARGS="${CURL_ARGS} -F ${pair}"
+    done
+    eval curl -sf -X POST "http://localhost:${PORT}${ROUTE}" ${CURL_ARGS} --output /tmp/omni_response --max-time 300
+else
+    curl -sf -X POST "http://localhost:${PORT}${ROUTE}" \
+      -H "Content-Type: application/json" \
+      -d "${REQUEST}" \
+      --output /tmp/omni_response --max-time 300
+fi
 
 # Validate response
 if [[ "${VALIDATE}" == binary_size_gt:* ]]; then
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index 97130cf9e592..c75b0b34072d 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -4,13 +4,15 @@
 # Request payload and validation are passed as arguments from the model config.
 set -eux
 
-ROUTE="${1:?Usage: $0 <route> <test_request_json> <validate>}"
-REQUEST="${2:?Usage: $0 <route> <test_request_json> <validate>}"
-VALIDATE="${3:?Usage: $0 <route> <test_request_json> <validate>}"
+ROUTE="${1:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+REQUEST="${2:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+VALIDATE="${3:?Usage: $0 <route> <test_request> <validate> [content_type]}"
+CONTENT_TYPE="${4:-application/json}"
 PORT=8080
 
 echo "=== vLLM-Omni SageMaker smoke test ==="
 echo "Route: ${ROUTE}"
+echo "Content-Type: ${CONTENT_TYPE}"
 echo "Validate: ${VALIDATE}"
 
 # Wait for server
@@ -25,11 +27,22 @@ done
 curl -sf http://localhost:${PORT}/ping || { echo "Ping failed"; exit 1; }
 
 # Send request via /invocations with route header
-curl -sf -X POST http://localhost:${PORT}/invocations \
-  -H "Content-Type: application/json" \
-  -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}" \
-  -d "${REQUEST}" \
-  --output /tmp/omni_response --max-time 300
+if [ "${CONTENT_TYPE}" = "multipart/form-data" ]; then
+    CURL_ARGS=""
+    IFS='&' read -ra PAIRS <<< "${REQUEST}"
+    for pair in "${PAIRS[@]}"; do
+        CURL_ARGS="${CURL_ARGS} -F ${pair}"
+    done
+    eval curl -sf -X POST "http://localhost:${PORT}/invocations" \
+      -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}" \
+      ${CURL_ARGS} --output /tmp/omni_response --max-time 300
+else
+    curl -sf -X POST http://localhost:${PORT}/invocations \
+      -H "Content-Type: application/json" \
+      -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}" \
+      -d "${REQUEST}" \
+      --output /tmp/omni_response --max-time 300
+fi
 
 # Validate response
 if [[ "${VALIDATE}" == binary_size_gt:* ]]; then
@@ -43,7 +56,6 @@ elif [[ "${VALIDATE}" == json_field:* ]]; then
     python3 -c "
 import json, sys
 data = json.load(open('/tmp/omni_response'))
-# Navigate nested field like data[0].b64_json
 obj = data
 for part in '${FIELD}'.replace(']','').replace('[','.').split('.'):
     if part.isdigit():

From 1b03a34455909862d036b41ad8b77b17cc175472 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 15:03:22 -0700
Subject: [PATCH 51/58] fix: remove CosyVoice3 - transformers doesn't recognize
 cosyvoice3 model_type

EngineCore subprocess fails at AutoTokenizer.from_pretrained because
AutoConfig can't resolve cosyvoice3. The model uses ONNX tokenizers,
not HuggingFace tokenizers. Only works with offline Omni() API.
---
 .github/config/vllm-omni-model-tests.yml | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 8f2271834a86..a7086a5fff70 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -19,14 +19,6 @@ smoke-test:
     #   test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
     #   validate: "binary_size_gt:1000"
 
-    - name: "cosyvoice3-0.5b"
-      s3_model: "cosyvoice3-0.5b.tar.gz"
-      fleet: "x86-g6e12xl-runner"
-      extra_args: "--trust-remote-code --enforce-eager"
-      route: "/v1/audio/speech"
-      test_request: '{"input": "Hello, this is a test.", "voice": "default"}'
-      validate: "binary_size_gt:1000"
-
     # --- Image generation models (route: /v1/images/generations) ---
     # - name: "flux2-klein-4b"
     #   s3_model: "flux2-klein-4b.tar.gz"
@@ -39,7 +31,7 @@ smoke-test:
     # --- Video generation models (route: /v1/videos) ---
     - name: "wan2.1-t2v-1.3b"
       s3_model: "wan2.1-t2v-1.3b.tar.gz"
-      fleet: "x86-g6e4xl-runner"
+      fleet: "x86-g6exl-runner"
       extra_args: ""
       route: "/v1/videos/sync"
       content_type: "multipart/form-data"

From 5109c99e4324e46439418e8432ff01f0067dcb06 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 15:13:28 -0700
Subject: [PATCH 52/58] fix: use bash array for curl form data to preserve
 header quoting

---
 test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh     |  8 ++++----
 .../scripts/vllm_omni_sagemaker_smoke_test.sh          | 10 +++++-----
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
index 6e05785c8aad..d97fa684f908 100755
--- a/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_ec2_smoke_test.sh
@@ -28,13 +28,13 @@ curl -sf http://localhost:${PORT}/health || { echo "Health check failed"; exit 1
 
 # Send request directly to the API endpoint
 if [ "${CONTENT_TYPE}" = "multipart/form-data" ]; then
-    # Convert key=value&key2=value2 to -F flags
-    CURL_ARGS=""
+    CURL_CMD=(curl -sf -X POST "http://localhost:${PORT}${ROUTE}")
     IFS='&' read -ra PAIRS <<< "${REQUEST}"
     for pair in "${PAIRS[@]}"; do
-        CURL_ARGS="${CURL_ARGS} -F ${pair}"
+        CURL_CMD+=(-F "${pair}")
     done
-    eval curl -sf -X POST "http://localhost:${PORT}${ROUTE}" ${CURL_ARGS} --output /tmp/omni_response --max-time 300
+    CURL_CMD+=(--output /tmp/omni_response --max-time 300)
+    "${CURL_CMD[@]}"
 else
     curl -sf -X POST "http://localhost:${PORT}${ROUTE}" \
       -H "Content-Type: application/json" \
diff --git a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
index c75b0b34072d..c7e63d5f8f91 100755
--- a/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
+++ b/test/vllm-omni/scripts/vllm_omni_sagemaker_smoke_test.sh
@@ -28,14 +28,14 @@ curl -sf http://localhost:${PORT}/ping || { echo "Ping failed"; exit 1; }
 
 # Send request via /invocations with route header
 if [ "${CONTENT_TYPE}" = "multipart/form-data" ]; then
-    CURL_ARGS=""
+    CURL_CMD=(curl -sf -X POST "http://localhost:${PORT}/invocations"
+      -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}")
     IFS='&' read -ra PAIRS <<< "${REQUEST}"
     for pair in "${PAIRS[@]}"; do
-        CURL_ARGS="${CURL_ARGS} -F ${pair}"
+        CURL_CMD+=(-F "${pair}")
     done
-    eval curl -sf -X POST "http://localhost:${PORT}/invocations" \
-      -H "X-Amzn-SageMaker-Custom-Attributes: route=${ROUTE}" \
-      ${CURL_ARGS} --output /tmp/omni_response --max-time 300
+    CURL_CMD+=(--output /tmp/omni_response --max-time 300)
+    "${CURL_CMD[@]}"
 else
     curl -sf -X POST http://localhost:${PORT}/invocations \
       -H "Content-Type: application/json" \

From b15417569249838c1d1d726a24d73aa7703c8d37 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 16:47:24 -0700
Subject: [PATCH 53/58] fix: Wan2.1 use /v1/videos (async), /v1/videos/sync not
 in v0.18.0

Verified on L40S with SM image:
- Model loads and serves on g6e.xlarge (L40S 48GB)
- /v1/videos returns queued job with id
- Middleware routes /invocations -> /v1/videos with form data
---
 .github/config/vllm-omni-model-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index a7086a5fff70..022c1aa5d5e3 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -33,7 +33,7 @@ smoke-test:
       s3_model: "wan2.1-t2v-1.3b.tar.gz"
       fleet: "x86-g6exl-runner"
       extra_args: ""
-      route: "/v1/videos/sync"
+      route: "/v1/videos"
       content_type: "multipart/form-data"
       test_request: 'prompt=a dog running on a beach&num_frames=17&num_inference_steps=4&size=480x320&seed=42'
       validate: "binary_size_gt:1000"

From cd465021600402f352a09bdc60e64b7c72c7972e Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 17:04:42 -0700
Subject: [PATCH 54/58] fix: Wan2.1 validate json_field:id (async API returns
 JSON, not binary)

---
 .github/config/vllm-omni-model-tests.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index 022c1aa5d5e3..bbe013caf024 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -36,7 +36,7 @@ smoke-test:
       route: "/v1/videos"
       content_type: "multipart/form-data"
       test_request: 'prompt=a dog running on a beach&num_frames=17&num_inference_steps=4&size=480x320&seed=42'
-      validate: "binary_size_gt:1000"
+      validate: "json_field:id"
 
     # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
     # - name: "bagel-7b-mot"

From b1d1eac91484a4b3727dba121a2aeeff9651fcbc Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 17:16:17 -0700
Subject: [PATCH 55/58] enable all models

Signed-off-by: Yadan Wei <yadanwei@amazon.com>
---
 .github/config/vllm-omni-model-tests.yml | 43 ++++++++++++------------
 1 file changed, 22 insertions(+), 21 deletions(-)

diff --git a/.github/config/vllm-omni-model-tests.yml b/.github/config/vllm-omni-model-tests.yml
index bbe013caf024..f093bf77c2d8 100644
--- a/.github/config/vllm-omni-model-tests.yml
+++ b/.github/config/vllm-omni-model-tests.yml
@@ -11,22 +11,22 @@ s3_prefix: "s3://dlc-cicd-models/omni-models"
 smoke-test:
   codebuild-fleet:
     # --- TTS models (route: /v1/audio/speech) ---
-    # - name: "qwen3-tts-1.7b-customvoice"
-    #   s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
-    #   fleet: "x86-g6xl-runner"
-    #   extra_args: ""
-    #   route: "/v1/audio/speech"
-    #   test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
-    #   validate: "binary_size_gt:1000"
+    - name: "qwen3-tts-1.7b-customvoice"
+      s3_model: "qwen3-tts-1.7b-customvoice.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/audio/speech"
+      test_request: '{"input": "Hello, how are you?", "voice": "vivian", "language": "English"}'
+      validate: "binary_size_gt:1000"
 
     # --- Image generation models (route: /v1/images/generations) ---
-    # - name: "flux2-klein-4b"
-    #   s3_model: "flux2-klein-4b.tar.gz"
-    #   fleet: "x86-g6xl-runner"
-    #   extra_args: ""
-    #   route: "/v1/images/generations"
-    #   test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
-    #   validate: "json_field:data[0].b64_json"
+    - name: "flux2-klein-4b"
+      s3_model: "flux2-klein-4b.tar.gz"
+      fleet: "x86-g6xl-runner"
+      extra_args: ""
+      route: "/v1/images/generations"
+      test_request: '{"prompt": "a red apple on a white table", "size": "512x512", "n": 1}'
+      validate: "json_field:data[0].b64_json"
 
     # --- Video generation models (route: /v1/videos) ---
     - name: "wan2.1-t2v-1.3b"
@@ -39,6 +39,7 @@ smoke-test:
       validate: "json_field:id"
 
     # --- Omni chat models (route: /v1/chat/completions, fallthrough) ---
+    # model is big, won't run for now
     # - name: "bagel-7b-mot"
     #   s3_model: "bagel-7b-mot.tar.gz"
     #   fleet: "x86-g6e4xl-runner"
@@ -47,10 +48,10 @@ smoke-test:
     #   test_request: '{"messages": [{"role": "user", "content": [{"type": "text", "text": "<|im_start|>A cute cat<|im_end|>"}]}], "modalities": ["image"], "height": 512, "width": 512, "num_inference_steps": 4, "seed": 42}'
     #   validate: "json_field:choices[0].message.content"
 
-    # - name: "qwen2.5-omni-3b"
-    #   s3_model: "qwen2.5-omni-3b.tar.gz"
-    #   fleet: "x86-g6e12xl-runner"
-    #   extra_args: ""
-    #   route: "/v1/chat/completions"
-    #   test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
-    #   validate: "json_field:choices[0].message.content"
+    - name: "qwen2.5-omni-3b"
+      s3_model: "qwen2.5-omni-3b.tar.gz"
+      fleet: "x86-g6e12xl-runner"
+      extra_args: ""
+      route: "/v1/chat/completions"
+      test_request: '{"messages": [{"role": "user", "content": "Say hello in one sentence."}], "max_tokens": 64}'
+      validate: "json_field:choices[0].message.content"

From 98f5f93a562580465b0599c7f92ee999aa2ea277 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 18:29:31 -0700
Subject: [PATCH 56/58] Revert "ci: Disable all non-omni PR workflows"

This reverts commit 8d55aa330565b6849eaae07ebe630a2a763579c8.
---
 .github/workflows/pr-base-v1.yml              | 13 ++++++++++--
 .github/workflows/pr-base-v2.yml              | 13 ++++++++++--
 .github/workflows/pr-docs.yml                 |  7 +++++--
 .github/workflows/pr-lambda.yml               | 14 +++++++++++--
 .github/workflows/pr-pytorch-ec2.yml          | 11 ++++++++--
 .github/workflows/pr-ray-ec2-cpu.yml          |  8 ++++++--
 .github/workflows/pr-ray-ec2-gpu.yml          |  8 ++++++--
 .github/workflows/pr-ray-sagemaker-cpu.yml    |  8 ++++++--
 .github/workflows/pr-ray-sagemaker-gpu.yml    |  8 ++++++--
 .github/workflows/pr-sagemaker-xgboost.yml    | 10 ++++++++--
 .github/workflows/pr-sglang-ec2-amzn2023.yml  | 18 +++++++++++++++--
 .github/workflows/pr-sglang-ec2.yml           |  9 +++++++--
 .../pr-sglang-sagemaker-amzn2023.yml          | 20 +++++++++++++++++--
 .github/workflows/pr-sglang-sagemaker.yml     |  9 +++++++--
 .github/workflows/pr-vllm-ec2-amzn2023.yml    | 18 +++++++++++++++--
 .github/workflows/pr-vllm-ec2.yml             | 10 ++++++++--
 .github/workflows/pr-vllm-rayserve.yml        | 10 ++++++++--
 .../workflows/pr-vllm-sagemaker-amzn2023.yml  | 20 +++++++++++++++++--
 .github/workflows/pr-vllm-sagemaker.yml       | 10 ++++++++--
 19 files changed, 186 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/pr-base-v1.yml b/.github/workflows/pr-base-v1.yml
index 898c3db42494..d86732a69310 100644
--- a/.github/workflows/pr-base-v1.yml
+++ b/.github/workflows/pr-base-v1.yml
@@ -1,8 +1,17 @@
 name: PR - Base v1
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/base/**"
+      - "scripts/common/**"
+      - "test/cuda/**"
+      - "test/security/data/ecr_scan_allowlist/base/**"
+      - ".github/config/base-v1.yml"
+      - ".github/workflows/pr-base-v1.yml"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-base-v2.yml b/.github/workflows/pr-base-v2.yml
index 7d96459c3e1c..6ac4244be451 100644
--- a/.github/workflows/pr-base-v2.yml
+++ b/.github/workflows/pr-base-v2.yml
@@ -1,8 +1,17 @@
 name: PR - Base v2
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/base/**"
+      - "scripts/common/**"
+      - "test/cuda/**"
+      - "test/security/data/ecr_scan_allowlist/base/**"
+      - ".github/config/base-v2.yml"
+      - ".github/workflows/pr-base-v2.yml"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-docs.yml b/.github/workflows/pr-docs.yml
index b12f778ad913..0ef58ad45d12 100644
--- a/.github/workflows/pr-docs.yml
+++ b/.github/workflows/pr-docs.yml
@@ -1,8 +1,11 @@
 name: PR - Documentations
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**docs**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-lambda.yml b/.github/workflows/pr-lambda.yml
index 531c764a0da4..4a1d4989d8d2 100644
--- a/.github/workflows/pr-lambda.yml
+++ b/.github/workflows/pr-lambda.yml
@@ -1,8 +1,18 @@
 name: PR - Lambda
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/lambda/**"
+      - "scripts/lambda/**"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - "test/lambda/**"
+      - "test/security/data/ecr_scan_allowlist/lambda/**"
+      - ".github/workflows/pr-lambda.yml"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-pytorch-ec2.yml b/.github/workflows/pr-pytorch-ec2.yml
index ca3899a1c399..cd9a725a4c80 100644
--- a/.github/workflows/pr-pytorch-ec2.yml
+++ b/.github/workflows/pr-pytorch-ec2.yml
@@ -1,8 +1,15 @@
 name: PR - PyTorch EC2
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/pytorch/**"
+      - "scripts/pytorch/**"
+      - "test/pytorch/**"
+      - ".github/workflows/pr-pytorch-ec2.yml"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-ec2-cpu.yml b/.github/workflows/pr-ray-ec2-cpu.yml
index 90abdd8f4ce4..5216620ae802 100644
--- a/.github/workflows/pr-ray-ec2-cpu.yml
+++ b/.github/workflows/pr-ray-ec2-cpu.yml
@@ -1,8 +1,12 @@
 name: PR - Ray EC2 CPU
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**ray**"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-ec2-gpu.yml b/.github/workflows/pr-ray-ec2-gpu.yml
index 965d2457a59c..4e876c606d3d 100644
--- a/.github/workflows/pr-ray-ec2-gpu.yml
+++ b/.github/workflows/pr-ray-ec2-gpu.yml
@@ -1,8 +1,12 @@
 name: PR - Ray EC2 GPU
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**ray**"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-sagemaker-cpu.yml b/.github/workflows/pr-ray-sagemaker-cpu.yml
index 0349a5a2b048..57f2f3cdc4a8 100644
--- a/.github/workflows/pr-ray-sagemaker-cpu.yml
+++ b/.github/workflows/pr-ray-sagemaker-cpu.yml
@@ -1,8 +1,12 @@
 name: PR - Ray SageMaker CPU
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**ray**"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-ray-sagemaker-gpu.yml b/.github/workflows/pr-ray-sagemaker-gpu.yml
index 72bc343adcd1..c6eb8b9b9d29 100644
--- a/.github/workflows/pr-ray-sagemaker-gpu.yml
+++ b/.github/workflows/pr-ray-sagemaker-gpu.yml
@@ -1,8 +1,12 @@
 name: PR - Ray SageMaker GPU
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**ray**"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sagemaker-xgboost.yml b/.github/workflows/pr-sagemaker-xgboost.yml
index 46a21f5fa038..6880785dc9db 100644
--- a/.github/workflows/pr-sagemaker-xgboost.yml
+++ b/.github/workflows/pr-sagemaker-xgboost.yml
@@ -1,8 +1,14 @@
 name: PR - SageMaker XGBoost
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/xgboost/**"
+      - ".github/config/sagemaker-xgboost.yml"
+      - ".github/workflows/pr-sagemaker-xgboost.yml"
+      - "!docs/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-ec2-amzn2023.yml b/.github/workflows/pr-sglang-ec2-amzn2023.yml
index 2948270065d8..38545fbb5bb2 100644
--- a/.github/workflows/pr-sglang-ec2-amzn2023.yml
+++ b/.github/workflows/pr-sglang-ec2-amzn2023.yml
@@ -1,8 +1,22 @@
 name: PR - SGLang EC2 AMZN2023
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/sglang/Dockerfile.amzn2023"
+      - "scripts/sglang/dockerd_entrypoint.sh"
+      - "scripts/sglang/sagemaker_entrypoint.sh"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/sglang-ec2-amzn2023.yml"
+      - ".github/config/sglang-model-tests.yml"
+      - ".github/workflows/pr-sglang-ec2-amzn2023.yml"
+      - ".github/workflows/reusable-sglang-model-tests.yml"
+      - "test/sanity/**"
+      - "test/telemetry/**"
+      - "test/sglang/scripts/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-ec2.yml b/.github/workflows/pr-sglang-ec2.yml
index f47a1ebc8825..10edece27b60 100644
--- a/.github/workflows/pr-sglang-ec2.yml
+++ b/.github/workflows/pr-sglang-ec2.yml
@@ -1,8 +1,13 @@
 name: PR - SGLang EC2
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**sglang**"
+      - "!docs/**"
+      - "!**amzn2023**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-sagemaker-amzn2023.yml b/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
index e7a6c4192d13..b9f416ff1efe 100644
--- a/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-sglang-sagemaker-amzn2023.yml
@@ -1,8 +1,24 @@
 name: PR - SGLang SageMaker AMZN2023
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/sglang/Dockerfile.amzn2023"
+      - "scripts/sglang/dockerd_entrypoint.sh"
+      - "scripts/sglang/sagemaker_entrypoint.sh"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/sglang-sagemaker-amzn2023.yml"
+      - ".github/workflows/pr-sglang-sagemaker-amzn2023.yml"
+      - ".github/workflows/reusable-sglang-sagemaker-tests.yml"
+      - ".github/workflows/reusable-sglang-model-tests.yml"
+      - ".github/config/sglang-model-tests.yml"
+      - "test/sanity/**"
+      - "test/telemetry/**"
+      - "test/sglang/sagemaker/**"
+      - "test/sglang/scripts/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-sglang-sagemaker.yml b/.github/workflows/pr-sglang-sagemaker.yml
index b7da342e04d8..444f2ea6efa0 100644
--- a/.github/workflows/pr-sglang-sagemaker.yml
+++ b/.github/workflows/pr-sglang-sagemaker.yml
@@ -1,8 +1,13 @@
 name: PR - SGLang SageMaker
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**sglang**"
+      - "!docs/**"
+      - "!**amzn2023**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-ec2-amzn2023.yml b/.github/workflows/pr-vllm-ec2-amzn2023.yml
index f790b145b062..0f314aa6b0d5 100644
--- a/.github/workflows/pr-vllm-ec2-amzn2023.yml
+++ b/.github/workflows/pr-vllm-ec2-amzn2023.yml
@@ -1,8 +1,22 @@
 name: PR - vLLM EC2 AMZN2023
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/vllm/Dockerfile.amzn2023"
+      - "scripts/vllm/amzn2023/**"
+      - "scripts/vllm/dockerd_entrypoint.sh"
+      - "scripts/vllm/sagemaker_entrypoint.sh"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/vllm-ec2-amzn2023.yml"
+      # - ".github/workflows/pr-vllm-ec2-amzn2023.yml"
+      - ".github/workflows/reusable-vllm-upstream-tests.yml"
+      - ".github/workflows/reusable-vllm-model-tests.yml"
+      # - "test/sanity/**"
+      - "test/telemetry/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-ec2.yml b/.github/workflows/pr-vllm-ec2.yml
index 23cfaa6b15e6..1bd1a230deb2 100644
--- a/.github/workflows/pr-vllm-ec2.yml
+++ b/.github/workflows/pr-vllm-ec2.yml
@@ -1,8 +1,14 @@
 name: PR - vLLM EC2
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  # Direct execution on pull requests
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**vllm**"
+      - "!docs/**"
+      - "!**amzn2023**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-rayserve.yml b/.github/workflows/pr-vllm-rayserve.yml
index 3acae56e1294..df61aa89cc06 100644
--- a/.github/workflows/pr-vllm-rayserve.yml
+++ b/.github/workflows/pr-vllm-rayserve.yml
@@ -1,8 +1,14 @@
 name: PR - vLLM RayServe
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  # Direct execution on pull requests
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**vllm**"
+      - "!docs/**"
+      - "!**amzn2023**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-sagemaker-amzn2023.yml b/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
index a615a23a4700..5ba3c3a3d73b 100644
--- a/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
+++ b/.github/workflows/pr-vllm-sagemaker-amzn2023.yml
@@ -1,8 +1,24 @@
 name: PR - vLLM SageMaker AMZN2023
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "docker/vllm/Dockerfile.amzn2023"
+      - "scripts/vllm/amzn2023/**"
+      - "!scripts/vllm/amzn2023/vllm_model_smoke_test.sh"
+      - "scripts/vllm/dockerd_entrypoint.sh"
+      - "scripts/vllm/sagemaker_entrypoint.sh"
+      - "scripts/common/**"
+      - "scripts/telemetry/**"
+      - ".github/config/vllm-sagemaker-amzn2023.yml"
+      # - ".github/workflows/pr-vllm-sagemaker-amzn2023.yml"
+      - ".github/workflows/reusable-vllm-upstream-tests.yml"
+      - ".github/workflows/reusable-vllm-sagemaker-tests.yml"
+      # - "test/sanity/**"
+      - "test/telemetry/**"
+      - "test/vllm/sagemaker/**"
 
 permissions:
   contents: read
diff --git a/.github/workflows/pr-vllm-sagemaker.yml b/.github/workflows/pr-vllm-sagemaker.yml
index 54d05f11b052..467f3986751f 100644
--- a/.github/workflows/pr-vllm-sagemaker.yml
+++ b/.github/workflows/pr-vllm-sagemaker.yml
@@ -1,8 +1,14 @@
 name: PR - vLLM SageMaker
 
-# Disabled: focusing on omni workflows only
 on:
-  workflow_dispatch: {}
+  # Direct execution on pull requests
+  pull_request:
+    branches: [main]
+    types: [opened, reopened, synchronize]
+    paths:
+      - "**vllm**"
+      - "!docs/**"
+      - "!**amzn2023**"
 
 permissions:
   contents: read

From e0e54daaffc66cbe9a16c166cf0658c3614c6081 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 21:01:53 -0700
Subject: [PATCH 57/58] fix: remove CVE-2026-33055 allowlist entry (fixed in uv
 tar crate 0.4.45)

---
 .../data/ecr_scan_allowlist/vllm/framework_allowlist.json  | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/test/security/data/ecr_scan_allowlist/vllm/framework_allowlist.json b/test/security/data/ecr_scan_allowlist/vllm/framework_allowlist.json
index 95591f599e4a..1dae8903a160 100644
--- a/test/security/data/ecr_scan_allowlist/vllm/framework_allowlist.json
+++ b/test/security/data/ecr_scan_allowlist/vllm/framework_allowlist.json
@@ -103,11 +103,6 @@
         "vulnerability_id": "CVE-2026-31812",
         "reason": "Coming in as a dependency from the latest uv 0.10.9"
     },
-    {
-        "vulnerability_id": "CVE-2026-33055",
-        "reason": "Rust tar crate 0.4.44 bundled in uv binary, fix requires uv upstream update to tar>=0.4.45",
-        "review_by": "2026-04-06"
-    },
     {
         "vulnerability_id": "CVE-2026-27893",
         "reason": "vllm 0.10.2 RayServe image - trust_remote_code=True hardcoded, fixed in vllm>=0.18.0. RayServe image not updated in this PR."
@@ -124,4 +119,4 @@
         "vulnerability_id": "CVE-2026-34520",
         "reason": "aiohttp 3.12.15 vendored inside ray/_private/runtime_env/agent/thirdparty_files/, unpatchable without Ray upgrade"
     }
-]
\ No newline at end of file
+]

From 00bb4061e012c9dbec37598af42bd843120dcbb5 Mon Sep 17 00:00:00 2001
From: Yadan Wei <yadanwei@amazon.com>
Date: Mon, 6 Apr 2026 21:14:37 -0700
Subject: [PATCH 58/58] fix: patch aiohttp CVEs in sglang and vllm Dockerfiles

- sglang: add aiohttp>=3.13.4 to CVE patch block
- vllm: remove expired CVE-2026-33055 allowlist (fixed in uv tar 0.4.45)

Fixes: CVE-2026-34520, CVE-2026-34516, CVE-2026-22815
---
 docker/sglang/Dockerfile.amzn2023 | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/docker/sglang/Dockerfile.amzn2023 b/docker/sglang/Dockerfile.amzn2023
index 121ca2d22d2f..901ca7f8d0b4 100644
--- a/docker/sglang/Dockerfile.amzn2023
+++ b/docker/sglang/Dockerfile.amzn2023
@@ -241,7 +241,8 @@ RUN uv pip install --system --no-cache \
   "pillow>=12.1.1" \
   "python_multipart>=0.0.22" \
   "xgrammar>=0.1.32" \
-  "setuptools>=78.1.1"
+  "setuptools>=78.1.1" \
+  "aiohttp>=3.13.4"
 
 # Re-pin NCCL/cuDNN/cuSparseLt after CVE patches (transitive deps may downgrade or remove them)
 # cuSparseLt installed without --no-deps in case it wasn't present from builder