Skip to content

Commit b2d628c

Browse files
authored
CMake: merge two condition blocks (#273)
1 parent 256c96c commit b2d628c

1 file changed

Lines changed: 34 additions & 42 deletions

File tree

build2cmake/src/templates/preamble.cmake

Lines changed: 34 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ else()
7878
set(CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0+PTX")
7979
endif()
8080

81-
8281
# Basic checks for each GPU language.
8382
if(GPU_LANG STREQUAL "CUDA")
8483
if(NOT CUDA_FOUND)
@@ -99,6 +98,31 @@ if(GPU_LANG STREQUAL "CUDA")
9998
endif()
10099
{% endif %}
101100

101+
# This clears out -gencode arguments from `CMAKE_CUDA_FLAGS`, which we need
102+
# to set our own set of capabilities.
103+
clear_gencode_flags()
104+
105+
# Get the capabilities without +PTX suffixes, so that we can use them as
106+
# the target archs in the loose intersection with a kernel's capabilities.
107+
cuda_remove_ptx_suffixes(CUDA_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
108+
message(STATUS "CUDA supported base architectures: ${CUDA_ARCHS}")
109+
110+
if(BUILD_ALL_SUPPORTED_ARCHS)
111+
set(CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
112+
else()
113+
try_run_python(CUDA_KERNEL_ARCHS SUCCESS "import torch; cc=torch.cuda.get_device_capability(); print(f\"{cc[0]}.{cc[1]}\")" "Failed to get CUDA capability")
114+
if(NOT SUCCESS)
115+
message(WARNING "Failed to detect CUDA capability, using default capabilities.")
116+
set(CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
117+
endif()
118+
endif()
119+
120+
message(STATUS "CUDA supported kernel architectures: ${CUDA_KERNEL_ARCHS}")
121+
122+
if(NVCC_THREADS AND GPU_LANG STREQUAL "CUDA")
123+
list(APPEND GPU_FLAGS "--threads=${NVCC_THREADS}")
124+
endif()
125+
102126
# TODO: deprecate one of these settings.
103127
add_compile_definitions(USE_CUDA=1)
104128
add_compile_definitions(CUDA_KERNEL)
@@ -112,6 +136,10 @@ elseif(GPU_LANG STREQUAL "HIP")
112136
# .hip extension automatically, HIP must be enabled explicitly.
113137
enable_language(HIP)
114138

139+
override_gpu_arches(GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS})
140+
set(ROCM_ARCHS ${GPU_ARCHES})
141+
message(STATUS "ROCM supported target architectures: ${ROCM_ARCHS}")
142+
115143
# TODO: deprecate one of these settings.
116144
add_compile_definitions(USE_ROCM=1)
117145
add_compile_definitions(ROCM_KERNEL)
@@ -132,44 +160,6 @@ elseif(GPU_LANG STREQUAL "SYCL")
132160
message(FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit.")
133161
endif()
134162

135-
add_compile_definitions(XPU_KERNEL)
136-
add_compile_definitions(USE_XPU)
137-
else()
138-
message(FATAL_ERROR "Unsupported GPU language: ${GPU_LANG}")
139-
endif()
140-
141-
# CUDA build options.
142-
if(GPU_LANG STREQUAL "CUDA")
143-
# This clears out -gencode arguments from `CMAKE_CUDA_FLAGS`, which we need
144-
# to set our own set of capabilities.
145-
clear_gencode_flags()
146-
147-
# Get the capabilities without +PTX suffixes, so that we can use them as
148-
# the target archs in the loose intersection with a kernel's capabilities.
149-
cuda_remove_ptx_suffixes(CUDA_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
150-
message(STATUS "CUDA supported base architectures: ${CUDA_ARCHS}")
151-
152-
if(BUILD_ALL_SUPPORTED_ARCHS)
153-
set(CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
154-
else()
155-
try_run_python(CUDA_KERNEL_ARCHS SUCCESS "import torch; cc=torch.cuda.get_device_capability(); print(f\"{cc[0]}.{cc[1]}\")" "Failed to get CUDA capability")
156-
if(NOT SUCCESS)
157-
message(WARNING "Failed to detect CUDA capability, using default capabilities.")
158-
set(CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS}")
159-
endif()
160-
endif()
161-
162-
message(STATUS "CUDA supported kernel architectures: ${CUDA_KERNEL_ARCHS}")
163-
164-
if(NVCC_THREADS AND GPU_LANG STREQUAL "CUDA")
165-
list(APPEND GPU_FLAGS "--threads=${NVCC_THREADS}")
166-
endif()
167-
168-
elseif(GPU_LANG STREQUAL "HIP")
169-
override_gpu_arches(GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS})
170-
set(ROCM_ARCHS ${GPU_ARCHES})
171-
message(STATUS "ROCM supported target architectures: ${ROCM_ARCHS}")
172-
elseif(GPU_LANG STREQUAL "SYCL")
173163
execute_process(
174164
COMMAND ${ICPX_COMPILER} --version
175165
OUTPUT_VARIABLE ICPX_VERSION_OUTPUT
@@ -191,10 +181,12 @@ elseif(GPU_LANG STREQUAL "SYCL")
191181
set(sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;")
192182
set(GPU_FLAGS "${sycl_flags}")
193183
set(GPU_ARCHES "")
184+
185+
186+
add_compile_definitions(XPU_KERNEL)
187+
add_compile_definitions(USE_XPU)
194188
else()
195-
override_gpu_arches(GPU_ARCHES
196-
${GPU_LANG}
197-
"${${GPU_LANG}_SUPPORTED_ARCHS}")
189+
message(FATAL_ERROR "Unsupported GPU language: ${GPU_LANG}")
198190
endif()
199191

200192
# Initialize SRC list for kernel and binding sources

0 commit comments

Comments
 (0)