aws
diff --git a/‎dlc_developer_config.toml‎
Lines changed: 4 additions & 1 deletion b/‎dlc_developer_config.toml‎
Lines changed: 4 additions & 1 deletion
diff --git a/‎huggingface/vllm-omni/buildspec.yml‎
Lines changed: 47 additions & 0 deletions b/‎huggingface/vllm-omni/buildspec.yml‎
Lines changed: 47 additions & 0 deletions
diff --git a/‎huggingface/vllm-omni/docker/0.20/cu130/Dockerfile‎
Lines changed: 41 additions & 0 deletions b/‎huggingface/vllm-omni/docker/0.20/cu130/Dockerfile‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎huggingface/vllm-omni/telemetry.sh‎
Lines changed: 12 additions & 0 deletions b/‎huggingface/vllm-omni/telemetry.sh‎
Lines changed: 12 additions & 0 deletions
diff --git a/‎huggingface/vllm/buildspec.yml‎
Lines changed: 1 addition & 0 deletions b/‎huggingface/vllm/buildspec.yml‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎release_images_general.yml‎
Lines changed: 14 additions & 0 deletions b/‎release_images_general.yml‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/constants.py‎
Lines changed: 1 addition & 0 deletions b/‎src/constants.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎src/deep_learning_container.py‎
Lines changed: 11 additions & 1 deletion b/‎src/deep_learning_container.py‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎test/dlc_tests/sanity/test_pre_release.py‎
Lines changed: 25 additions & 6 deletions b/‎test/dlc_tests/sanity/test_pre_release.py‎
Lines changed: 25 additions & 6 deletions
diff --git a/‎test/sagemaker_tests/huggingface/vllm_omni/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎test/sagemaker_tests/huggingface/vllm_omni/__init__.py‎
Lines changed: 13 additions & 0 deletions
@@ -36,7 +36,7 @@ deep_canary_mode = false
 
 [build]
 # Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
-# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_sglang", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
+# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_vllm_omni", "huggingface_sglang", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
 build_frameworks = []
 
 
@@ -189,6 +189,9 @@ dlc-pr-vllm = ""
 # HuggingFace vLLM
 dlc-pr-huggingface-vllm = ""
 
+# HuggingFace vLLM Omni
+dlc-pr-huggingface-vllm-omni = ""
+
 # HuggingFace SGLang
 dlc-pr-huggingface-sglang = ""
 
 
@@ -0,0 +1,47 @@
+account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
+prod_account_id: &PROD_ACCOUNT_ID 763104351884
+region: &REGION <set-$REGION-in-environment>
+base_framework: &BASE_FRAMEWORK vllm-omni
+framework: &FRAMEWORK huggingface_vllm_omni
+version: &VERSION "0.20.0"
+short_version: &SHORT_VERSION "0.20"
+arch_type: &ARCH_TYPE x86
+autopatch_build: "False"
+
+repository_info:
+  build_repository: &BUILD_REPOSITORY
+    image_type: &IMAGE_TYPE inference
+    root: huggingface/vllm-omni
+    repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
+    repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
+    release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
+    release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
+
+context:
+  build_context: &BUILD_CONTEXT
+    deep_learning_container:
+      source: ../../src/deep_learning_container.py
+      target: deep_learning_container.py
+
+images:
+  BuildHuggingFaceVllmOmniGpuPy312Cu130DockerImage:
+    <<: *BUILD_REPOSITORY
+    context:
+      <<: *BUILD_CONTEXT
+    image_size_baseline: 26000
+    device_type: &DEVICE_TYPE gpu
+    cuda_version: &CUDA_VERSION cu130
+    python_version: &DOCKER_PYTHON_VERSION py3
+    tag_python_version: &TAG_PYTHON_VERSION py312
+    os_version: &OS_VERSION amzn2023
+    transformers_version: &TRANSFORMERS_VERSION 5.8.1
+    tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *CUDA_VERSION, '-', *OS_VERSION ]
+    docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
+    target: sagemaker
+    build: true
+    enable_common_stage_build: false
+    test_configs:
+      test_platforms:
+        - sanity
+        - security
+        - sagemaker
@@ -0,0 +1,41 @@
+ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:omni-sagemaker-cuda-v1.1
+FROM ${FINAL_BASE_IMAGE} AS vllm-base
+
+LABEL maintainer="Amazon AI"
+LABEL dlc_major_version="1"
+
+ARG TRANSFORMERS_VERSION=5.8.1
+ARG DIFFUSERS_VERSION=0.38.0
+ARG HUGGINGFACE_HUB_VERSION=1.15.0
+ARG HF_XET_VERSION=1.5.0
+
+RUN uv pip install --no-cache-dir --prerelease=allow\
+    transformers==${TRANSFORMERS_VERSION} \
+    diffusers==${DIFFUSERS_VERSION} \
+    huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
+    hf-xet==${HF_XET_VERSION} \
+    grpcio
+
+FROM vllm-base AS sagemaker
+
+ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm-omni"
+
+RUN HOME_DIR=/root \
+    && uv pip install --system --upgrade pip requests PTable \
+    && curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
+    && unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
+    && cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
+    && chmod +x /usr/local/bin/testOSSCompliance \
+    && chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
+    && ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 \
+    && rm -rf ${HOME_DIR}/oss_compliance*
+
+RUN uv pip install --no-cache-dir prettytable
+
+# Fix Mooncake Go gRPC CVEs
+# NOTE: This won't work, as mooncake-transfer-engine-cuda13 requires GLIBC 2.35, and the base image comes with GLIBC 2.34
+# RUN uv pip install --no-cache-dir mooncake-transfer-engine-cuda13==v0.3.10.post2
+# In the meantime, uninstall the Mooncake transfer engine to avoid dependency conflicts
+RUN uv pip uninstall mooncake-transfer-engine
+
+ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]
@@ -0,0 +1,12 @@
+# telemetry.sh
+#!/bin/bash
+if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
+    (
+        python /usr/local/bin/deep_learning_container.py \
+            --framework "huggingface_vllm_omni" \
+            --framework-version "0.20.0" \
+            --container-type "general" \
+            &>/dev/null &
+    )
+fi
+
@@ -4,6 +4,7 @@ region: &REGION <set-$REGION-in-environment>
 base_framework: &BASE_FRAMEWORK vllm
 framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
 version: &VERSION "0.21.0"
+contributor: huggingface
 short_version: &SHORT_VERSION "0.21"
 arch_type: &ARCH_TYPE x86_64
 autopatch_build: "False"
 
@@ -204,3 +204,17 @@ release_images:
       example: False
       disable_sm_tag: True
       force_release: False
+  15:
+    framework: "huggingface_vllm_omni"
+    version: "0.18.0"
+    arch_type: "x86"
+    hf_transformers: "4.57.6"
+    vllm_omni_version: "0.18.0"
+    general:
+      device_types: [ "gpu" ]
+      python_versions: [ "py312" ]
+      os_version: "amazonlinux2023"
+      cuda_version: "cu129"
+      example: False
+      disable_sm_tag: True
+      force_release: False
@@ -28,6 +28,7 @@
     "vllm",
     "sglang",
     "huggingface_vllm",
+    "huggingface_vllm_omni",
     "huggingface_sglang",
 }
 DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}
 
@@ -239,7 +239,17 @@ def parse_args():
     parser = argparse.ArgumentParser()
     parser.add_argument(
         "--framework",
-        choices=["tensorflow", "mxnet", "pytorch", "base", "vllm", "sglang"],
+        choices=[
+            "tensorflow",
+            "mxnet",
+            "pytorch",
+            "base",
+            "vllm",
+            "sglang",
+            "huggingface_vllm",
+            "huggingface_vllm_omni",
+            "huggingface_sglang",
+        ],
         help="framework of container image.",
         required=True,
     )
 
@@ -189,24 +189,43 @@ def test_python_version(image):
 @pytest.mark.model("N/A")
 def test_ubuntu_version(image):
     """
-    Check that the ubuntu version in the image tag is the same as the one on a running container.
+    Check that the OS version in the image tag is the same as the one on a running container.
 
     :param image: ECR image URI
     """
     ctx = Context()
     container_name = get_container_name("ubuntu-version", image)
 
-    ubuntu_version = ""
+    expected_os = ""
+    expected_os_version = ""
     for tag_split in image.split("-"):
         if tag_split.startswith("ubuntu"):
-            ubuntu_version = tag_split.split("ubuntu")[-1]
+            expected_os = "ubuntu"
+            expected_os_version = tag_split.split("ubuntu")[-1]
+        elif tag_split.startswith("amzn"):
+            expected_os = "amzn"
+            expected_os_version = tag_split.split("amzn")[-1]
 
     start_container(container_name, image, ctx)
     output = run_cmd_on_container(container_name, ctx, "cat /etc/os-release")
-    container_ubuntu_version = output.stdout
+    container_os_release = output.stdout
 
-    assert "Ubuntu" in container_ubuntu_version
-    assert ubuntu_version in container_ubuntu_version
+    is_amazon_linux = (
+        "Amazon Linux" in container_os_release
+        or 'ID="amzn"' in container_os_release
+        or "ID=amzn" in container_os_release
+    )
+    if is_amazon_linux:
+        if expected_os == "amzn":
+            assert expected_os_version in container_os_release
+        return
+
+    if expected_os == "amzn":
+        assert 'ID="amzn"' in container_os_release or "ID=amzn" in container_os_release
+        assert expected_os_version in container_os_release
+    else:
+        assert "Ubuntu" in container_os_release
+        assert expected_os_version in container_os_release
 
 
 @pytest.mark.usefixtures("sagemaker", "functionality_sanity")
 
@@ -0,0 +1,13 @@
+# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"). You
+# may not use this file except in compliance with the License. A copy of
+# the License is located at
+#
+#     http://aws.amazon.com/apache2.0/
+#
+# or in the "license" file accompanying this file. This file is
+# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
+# ANY KIND, either express or implied. See the License for the specific
+# language governing permissions and limitations under the License.
+from __future__ import absolute_import
Original file line number	Diff line number	Diff line change
`@@ -28,6 +28,7 @@`
`28`	`28`	`"vllm",`
`29`	`29`	`"sglang",`
`30`	`30`	`"huggingface_vllm",`
	`31`	`+ "huggingface_vllm_omni",`
`31`	`32`	`"huggingface_sglang",`
`32`	`33`	`}`
`33`	`34`	`DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}`