Skip to content

Commit 5b8001d

Browse files
Add HF vLLM Omni 0.20 DLC (#6055)
* Add vLLM Omni 0.16.0 artifacts including entrypoint, proxy, and Dockerfile * Improve vLLM Omni proxy routing * [WIP] Add tests for vLLM Omni * Add HF vLLM-Omni DLC based on SM version * Update vLLM-Omni to version 0.18.0 * Update dlc_developer_config.toml * Update release_images_general.yml * Update vLLM-Omni to version 0.20.0 with CUDA 13.0 - Additionally, fix some CVEs related to Diffusers and Mooncake packages * Enhance OS version checks in pre-release tests and update buildspec with contributor info * Update buildspec to change OS version from Ubuntu 24.04 to Amazon Linux 2023 * Update buildspec to change architecture type from x86_64 to x86 and adjust OS version from amazonlinux2023 to amzn2023 * Fix imports in test files to use relative paths * Fix black format for vllm-omni tests * Fix vLLM-Omni integration tests * Update vLLM-Omni integration tests to use FLUX.2-klein-4B model and adjust environment variables * Update vLLM-Omni integration tests to use Qwen3-TTS-12Hz-1.7B-CustomVoice model * Refactor vLLM-Omni integration tests to test speech generation API * Update model data file extension and enhance print statements in vLLM-Omni integration tests * Fix black formatting * Replace huggingface-vllm-omni local test instance to `g5.xlarge` * Revert dlc_developer_config.toml --------- Co-authored-by: Sally Seok <49303563+sallyseok@users.noreply.github.com>
1 parent f07e3c6 commit 5b8001d

24 files changed

Lines changed: 1671 additions & 12 deletions

dlc_developer_config.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ deep_canary_mode = false
3636

3737
[build]
3838
# Add in frameworks you would like to build. By default, builds are disabled unless you specify building an image.
39-
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_sglang", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
39+
# available frameworks - ["base", "vllm", "sglang", "autogluon", "huggingface_vllm", "huggingface_vllm_omni", "huggingface_sglang", "huggingface_tensorflow", "huggingface_pytorch", "huggingface_tensorflow_trcomp", "huggingface_pytorch_trcomp", "pytorch_trcomp", "tensorflow", "pytorch", "stabilityai_pytorch"]
4040
build_frameworks = []
4141

4242

@@ -189,6 +189,9 @@ dlc-pr-vllm = ""
189189
# HuggingFace vLLM
190190
dlc-pr-huggingface-vllm = ""
191191

192+
# HuggingFace vLLM Omni
193+
dlc-pr-huggingface-vllm-omni = ""
194+
192195
# HuggingFace SGLang
193196
dlc-pr-huggingface-sglang = ""
194197

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
account_id: &ACCOUNT_ID <set-$ACCOUNT_ID-in-environment>
2+
prod_account_id: &PROD_ACCOUNT_ID 763104351884
3+
region: &REGION <set-$REGION-in-environment>
4+
base_framework: &BASE_FRAMEWORK vllm-omni
5+
framework: &FRAMEWORK huggingface_vllm_omni
6+
version: &VERSION "0.20.0"
7+
short_version: &SHORT_VERSION "0.20"
8+
arch_type: &ARCH_TYPE x86
9+
autopatch_build: "False"
10+
11+
repository_info:
12+
build_repository: &BUILD_REPOSITORY
13+
image_type: &IMAGE_TYPE inference
14+
root: huggingface/vllm-omni
15+
repository_name: &REPOSITORY_NAME !join [ "pr", "-", "huggingface", "-", *BASE_FRAMEWORK ]
16+
repository: &REPOSITORY !join [ *ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *REPOSITORY_NAME ]
17+
release_repository_name: &RELEASE_REPOSITORY_NAME !join [ "huggingface", "-", *BASE_FRAMEWORK ]
18+
release_repository: &RELEASE_REPOSITORY !join [ *PROD_ACCOUNT_ID, .dkr.ecr., *REGION, .amazonaws.com/, *RELEASE_REPOSITORY_NAME ]
19+
20+
context:
21+
build_context: &BUILD_CONTEXT
22+
deep_learning_container:
23+
source: ../../src/deep_learning_container.py
24+
target: deep_learning_container.py
25+
26+
images:
27+
BuildHuggingFaceVllmOmniGpuPy312Cu130DockerImage:
28+
<<: *BUILD_REPOSITORY
29+
context:
30+
<<: *BUILD_CONTEXT
31+
image_size_baseline: 26000
32+
device_type: &DEVICE_TYPE gpu
33+
cuda_version: &CUDA_VERSION cu130
34+
python_version: &DOCKER_PYTHON_VERSION py3
35+
tag_python_version: &TAG_PYTHON_VERSION py312
36+
os_version: &OS_VERSION amzn2023
37+
transformers_version: &TRANSFORMERS_VERSION 5.8.1
38+
tag: !join [ *VERSION, '-', 'transformers', *TRANSFORMERS_VERSION, '-', *DEVICE_TYPE, '-', *TAG_PYTHON_VERSION, '-', *CUDA_VERSION, '-', *OS_VERSION ]
39+
docker_file: !join [ docker/, *SHORT_VERSION, /, *CUDA_VERSION, /Dockerfile ]
40+
target: sagemaker
41+
build: true
42+
enable_common_stage_build: false
43+
test_configs:
44+
test_platforms:
45+
- sanity
46+
- security
47+
- sagemaker
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
ARG FINAL_BASE_IMAGE=763104351884.dkr.ecr.us-west-2.amazonaws.com/vllm:omni-sagemaker-cuda-v1.1
2+
FROM ${FINAL_BASE_IMAGE} AS vllm-base
3+
4+
LABEL maintainer="Amazon AI"
5+
LABEL dlc_major_version="1"
6+
7+
ARG TRANSFORMERS_VERSION=5.8.1
8+
ARG DIFFUSERS_VERSION=0.38.0
9+
ARG HUGGINGFACE_HUB_VERSION=1.15.0
10+
ARG HF_XET_VERSION=1.5.0
11+
12+
RUN uv pip install --no-cache-dir --prerelease=allow\
13+
transformers==${TRANSFORMERS_VERSION} \
14+
diffusers==${DIFFUSERS_VERSION} \
15+
huggingface-hub==${HUGGINGFACE_HUB_VERSION} \
16+
hf-xet==${HF_XET_VERSION} \
17+
grpcio
18+
19+
FROM vllm-base AS sagemaker
20+
21+
ENV HF_HUB_USER_AGENT_ORIGIN="aws:sagemaker:gpu-cuda:inference:hf-vllm-omni"
22+
23+
RUN HOME_DIR=/root \
24+
&& uv pip install --system --upgrade pip requests PTable \
25+
&& curl -o ${HOME_DIR}/oss_compliance.zip https://aws-dlinfra-utilities.s3.amazonaws.com/oss_compliance.zip \
26+
&& unzip ${HOME_DIR}/oss_compliance.zip -d ${HOME_DIR}/ \
27+
&& cp ${HOME_DIR}/oss_compliance/test/testOSSCompliance /usr/local/bin/testOSSCompliance \
28+
&& chmod +x /usr/local/bin/testOSSCompliance \
29+
&& chmod +x ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh \
30+
&& ${HOME_DIR}/oss_compliance/generate_oss_compliance.sh ${HOME_DIR} python3 \
31+
&& rm -rf ${HOME_DIR}/oss_compliance*
32+
33+
RUN uv pip install --no-cache-dir prettytable
34+
35+
# Fix Mooncake Go gRPC CVEs
36+
# NOTE: This won't work, as mooncake-transfer-engine-cuda13 requires GLIBC 2.35, and the base image comes with GLIBC 2.34
37+
# RUN uv pip install --no-cache-dir mooncake-transfer-engine-cuda13==v0.3.10.post2
38+
# In the meantime, uninstall the Mooncake transfer engine to avoid dependency conflicts
39+
RUN uv pip uninstall mooncake-transfer-engine
40+
41+
ENTRYPOINT ["/usr/local/bin/sagemaker_entrypoint.sh"]

huggingface/vllm-omni/telemetry.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# telemetry.sh
2+
#!/bin/bash
3+
if [ -f /usr/local/bin/deep_learning_container.py ] && [[ -z "${OPT_OUT_TRACKING}" || "${OPT_OUT_TRACKING,,}" != "true" ]]; then
4+
(
5+
python /usr/local/bin/deep_learning_container.py \
6+
--framework "huggingface_vllm_omni" \
7+
--framework-version "0.20.0" \
8+
--container-type "general" \
9+
&>/dev/null &
10+
)
11+
fi
12+

huggingface/vllm/buildspec.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ region: &REGION <set-$REGION-in-environment>
44
base_framework: &BASE_FRAMEWORK vllm
55
framework: &FRAMEWORK !join [ "huggingface_", *BASE_FRAMEWORK]
66
version: &VERSION "0.21.0"
7+
contributor: huggingface
78
short_version: &SHORT_VERSION "0.21"
89
arch_type: &ARCH_TYPE x86_64
910
autopatch_build: "False"

release_images_general.yml

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,3 +204,17 @@ release_images:
204204
example: False
205205
disable_sm_tag: True
206206
force_release: False
207+
15:
208+
framework: "huggingface_vllm_omni"
209+
version: "0.18.0"
210+
arch_type: "x86"
211+
hf_transformers: "4.57.6"
212+
vllm_omni_version: "0.18.0"
213+
general:
214+
device_types: [ "gpu" ]
215+
python_versions: [ "py312" ]
216+
os_version: "amazonlinux2023"
217+
cuda_version: "cu129"
218+
example: False
219+
disable_sm_tag: True
220+
force_release: False

src/constants.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
"vllm",
2929
"sglang",
3030
"huggingface_vllm",
31+
"huggingface_vllm_omni",
3132
"huggingface_sglang",
3233
}
3334
DEVICE_TYPES = {"cpu", "gpu", "hpu", "eia", "inf", "neuron", "neuronx"}

src/deep_learning_container.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,17 @@ def parse_args():
239239
parser = argparse.ArgumentParser()
240240
parser.add_argument(
241241
"--framework",
242-
choices=["tensorflow", "mxnet", "pytorch", "base", "vllm", "sglang"],
242+
choices=[
243+
"tensorflow",
244+
"mxnet",
245+
"pytorch",
246+
"base",
247+
"vllm",
248+
"sglang",
249+
"huggingface_vllm",
250+
"huggingface_vllm_omni",
251+
"huggingface_sglang",
252+
],
243253
help="framework of container image.",
244254
required=True,
245255
)

test/dlc_tests/sanity/test_pre_release.py

Lines changed: 25 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -189,24 +189,43 @@ def test_python_version(image):
189189
@pytest.mark.model("N/A")
190190
def test_ubuntu_version(image):
191191
"""
192-
Check that the ubuntu version in the image tag is the same as the one on a running container.
192+
Check that the OS version in the image tag is the same as the one on a running container.
193193
194194
:param image: ECR image URI
195195
"""
196196
ctx = Context()
197197
container_name = get_container_name("ubuntu-version", image)
198198

199-
ubuntu_version = ""
199+
expected_os = ""
200+
expected_os_version = ""
200201
for tag_split in image.split("-"):
201202
if tag_split.startswith("ubuntu"):
202-
ubuntu_version = tag_split.split("ubuntu")[-1]
203+
expected_os = "ubuntu"
204+
expected_os_version = tag_split.split("ubuntu")[-1]
205+
elif tag_split.startswith("amzn"):
206+
expected_os = "amzn"
207+
expected_os_version = tag_split.split("amzn")[-1]
203208

204209
start_container(container_name, image, ctx)
205210
output = run_cmd_on_container(container_name, ctx, "cat /etc/os-release")
206-
container_ubuntu_version = output.stdout
211+
container_os_release = output.stdout
207212

208-
assert "Ubuntu" in container_ubuntu_version
209-
assert ubuntu_version in container_ubuntu_version
213+
is_amazon_linux = (
214+
"Amazon Linux" in container_os_release
215+
or 'ID="amzn"' in container_os_release
216+
or "ID=amzn" in container_os_release
217+
)
218+
if is_amazon_linux:
219+
if expected_os == "amzn":
220+
assert expected_os_version in container_os_release
221+
return
222+
223+
if expected_os == "amzn":
224+
assert 'ID="amzn"' in container_os_release or "ID=amzn" in container_os_release
225+
assert expected_os_version in container_os_release
226+
else:
227+
assert "Ubuntu" in container_os_release
228+
assert expected_os_version in container_os_release
210229

211230

212231
@pytest.mark.usefixtures("sagemaker", "functionality_sanity")
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
# Copyright 2019-2020 Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License"). You
4+
# may not use this file except in compliance with the License. A copy of
5+
# the License is located at
6+
#
7+
# http://aws.amazon.com/apache2.0/
8+
#
9+
# or in the "license" file accompanying this file. This file is
10+
# distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF
11+
# ANY KIND, either express or implied. See the License for the specific
12+
# language governing permissions and limitations under the License.
13+
from __future__ import absolute_import

0 commit comments

Comments
 (0)