Skip to content
This repository was archived by the owner on Apr 6, 2026. It is now read-only.

Commit e6ffbc3

Browse files
authored
Fix handling of the 9.0a and 12.0a capabilities (#202)
The code that computes the intersection between a kernel's capabilities and the supported capabilities failed because e.g. the base capability of 9.0a (9.0) did not match 9.0+PTX. Instead we should just leave the supported capabilities at regular version numbers. Instead, make a list of default kernel capabilities and use that and intersect with the supported capabilities if no capabilities are specified for a kernel.
1 parent b4accba commit e6ffbc3

4 files changed

Lines changed: 11 additions & 16 deletions

File tree

Lines changed: 1 addition & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1 @@
1-
[
2-
"7.0",
3-
"7.2",
4-
"7.5",
5-
"8.0",
6-
"8.6",
7-
"8.7",
8-
"8.9",
9-
"9.0+PTX",
10-
"10.0",
11-
"10.1",
12-
"12.0+PTX"
13-
]
1+
["7.0", "7.2", "7.5", "8.0", "8.6", "8.7", "8.9", "9.0", "10.0", "10.1", "12.0"]

build2cmake/src/templates/cuda/kernel.cmake

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ if(GPU_LANG STREQUAL "CUDA")
1818
{% if cuda_capabilities %}
1919
cuda_archs_loose_intersection({{kernel_name}}_ARCHS "{{ cuda_capabilities|join(";") }}" "${CUDA_ARCHS}")
2020
{% else %}
21-
cuda_archs_loose_intersection({{kernel_name}}_ARCHS "${CUDA_SUPPORTED_ARCHS}" "${CUDA_ARCHS}")
21+
cuda_archs_loose_intersection({{kernel_name}}_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}" "${CUDA_ARCHS}")
2222
{% endif %}
2323
message(STATUS "Capabilities for kernel {{kernel_name}}: {{ '${' + kernel_name + '_ARCHS}'}}")
2424
set_gencode_flags_for_srcs(SRCS {{'"${' + kernel_name + '_SRC}"'}} CUDA_ARCHS "{{ '${' + kernel_name + '_ARCHS}'}}")

build2cmake/src/templates/cuda/preamble.cmake

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,13 @@ if (NOT TARGET_DEVICE STREQUAL "cuda" AND
3434
return()
3535
endif()
3636

37+
if(DEFINED CMAKE_CUDA_COMPILER_VERSION AND
38+
CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 12.8)
39+
set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0;10.0;10.1;12.0+PTX")
40+
else()
41+
set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0+PTX")
42+
endif()
43+
3744
if (NOT HIP_FOUND AND CUDA_FOUND)
3845
set(GPU_LANG "CUDA")
3946

lib/torch-extension/default.nix

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -132,9 +132,9 @@ stdenv.mkDerivation (prevAttrs: {
132132
CUDAToolkit_ROOT = "${lib.getDev cudaPackages.cuda_nvcc}";
133133
TORCH_CUDA_ARCH_LIST =
134134
if cudaPackages.cudaOlder "12.8" then
135-
"7.0;7.5;8.0;8.6;8.9;9.0+PTX"
135+
"7.0;7.5;8.0;8.6;8.9;9.0"
136136
else
137-
"7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0+PTX";
137+
"7.0;7.5;8.0;8.6;8.9;9.0;10.0;10.1;12.0";
138138
}
139139
// lib.optionalAttrs rocmSupport {
140140
PYTORCH_ROCM_ARCH = lib.concatStringsSep ";" torch.rocmArchs;

0 commit comments

Comments
 (0)