Skip to content

Commit 426ae19

Browse files
authored
chore: bump cuda to 13.0 (#1631)
Signed-off-by: AlpinDale <alpindale@gmail.com>
1 parent f6af616 commit 426ae19

5 files changed

Lines changed: 10 additions & 9 deletions

File tree

CMakeLists.txt

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,12 +87,15 @@ find_package(Torch REQUIRED)
8787
# This check must happen after find_package(Torch) because that's when CMAKE_CUDA_COMPILER_VERSION gets defined
8888
if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
8989
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
90+
# starting from CUDA 12.9 and Blackwell (10.0), we use family-specific targets (10.0f, 12.0f, etc)
91+
# to support the whole generation without specifying all sub-architectures
92+
# see: https://developer.nvidia.com/blog/nvidia-blackwell-and-nvidia-cuda-12-9-introduce-family-specific-architecture-features/
9093
set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;11.0;12.0")
9194
elseif(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
9295
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
93-
set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0")
96+
set(CUDA_SUPPORTED_ARCHS "7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;10.3;12.0;12.1")
9497
else()
95-
set(CUDA_SUPPORTED_ARCHS "6.0;6.1;7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0")
98+
set(CUDA_SUPPORTED_ARCHS "7.0;7.5;8.0;8.6;8.7;8.9;9.0")
9699
endif()
97100

98101
#

aphrodite/envs.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@
8181
APHRODITE_MEDIA_CONNECTOR: str = "http"
8282
APHRODITE_MM_HASHER_ALGORITHM: str = "blake3"
8383
APHRODITE_TARGET_DEVICE: str = "cuda"
84-
APHRODITE_MAIN_CUDA_VERSION: str = "12.9"
84+
APHRODITE_MAIN_CUDA_VERSION: str = "13.0"
8585
APHRODITE_FLOAT32_MATMUL_PRECISION: Literal["highest", "high", "medium"] = "highest"
8686
APHRODITE_BATCH_INVARIANT: bool = False
8787
MAX_JOBS: str | None = None
@@ -483,7 +483,7 @@ def _get_or_set_default() -> str:
483483
# rocm, cpu]
484484
"APHRODITE_TARGET_DEVICE": lambda: os.getenv("APHRODITE_TARGET_DEVICE", "cuda").lower(),
485485
# Main CUDA version of Aphrodite. This follows PyTorch but can be overridden.
486-
"APHRODITE_MAIN_CUDA_VERSION": lambda: (os.getenv("APHRODITE_MAIN_CUDA_VERSION", "").lower() or "12.9"),
486+
"APHRODITE_MAIN_CUDA_VERSION": lambda: (os.getenv("APHRODITE_MAIN_CUDA_VERSION", "").lower() or "13.0"),
487487
# Controls PyTorch float32 matmul precision mode within Aphrodite workers.
488488
# Valid options mirror torch.set_float32_matmul_precision
489489
"APHRODITE_FLOAT32_MATMUL_PRECISION": env_with_choices(

docker/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -188,7 +188,7 @@ RUN --mount=type=cache,target=/root/.cache/uv \
188188
# Explicitly set the list to avoid issues with torch 2.2
189189
# See https://github.com/pytorch/pytorch/pull/123243
190190
# From versions.json: .torch.cuda_arch_list
191-
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
191+
ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX'
192192
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
193193
#################### BUILD BASE IMAGE ####################
194194

@@ -615,7 +615,7 @@ ARG PIP_EXTRA_INDEX_URL UV_EXTRA_INDEX_URL
615615
ENV UV_HTTP_TIMEOUT=500
616616

617617
# install kv_connectors if requested
618-
ARG torch_cuda_arch_list='7.0 7.5 8.0 8.9 9.0 10.0 12.0'
618+
ARG torch_cuda_arch_list='7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX'
619619
ENV TORCH_CUDA_ARCH_LIST=${torch_cuda_arch_list}
620620
RUN --mount=type=cache,target=/root/.cache/uv \
621621
--mount=type=bind,source=requirements/kv_connectors.txt,target=/tmp/kv_connectors.txt,ro \

docker/docker-bake.hcl

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -88,7 +88,6 @@ target "test-ubuntu2404" {
8888
args = {
8989
UBUNTU_VERSION = "24.04"
9090
GDRCOPY_OS_VERSION = "Ubuntu24_04"
91-
FLASHINFER_AOT_COMPILE = "true"
9291
}
9392
output = ["type=docker"]
9493
}
@@ -100,7 +99,6 @@ target "openai-ubuntu2404" {
10099
args = {
101100
UBUNTU_VERSION = "24.04"
102101
GDRCOPY_OS_VERSION = "Ubuntu24_04"
103-
FLASHINFER_AOT_COMPILE = "true"
104102
}
105103
output = ["type=docker"]
106104
}

docker/versions.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"default": "false"
3333
},
3434
"TORCH_CUDA_ARCH_LIST": {
35-
"default": "7.0 7.5 8.0 8.9 9.0 10.0 12.0"
35+
"default": "7.5 8.0 8.6 8.9 9.0 10.0 12.0+PTX"
3636
},
3737
"DEEPEP_COMMIT_HASH": {
3838
"default": "73b6ea4"

0 commit comments

Comments
 (0)