7878 set (CUDA_DEFAULT_KERNEL_ARCHS "7.0;7.2;7.5;8.0;8.6;8.7;8.9;9.0+PTX" )
7979endif ()
8080
81-
8281# Basic checks for each GPU language.
8382if (GPU_LANG STREQUAL "CUDA" )
8483 if (NOT CUDA_FOUND)
@@ -99,6 +98,31 @@ if(GPU_LANG STREQUAL "CUDA")
9998 endif ()
10099 {% endif %}
101100
101+ # This clears out -gencode arguments from `CMAKE_CUDA_FLAGS`, which we need
102+ # to set our own set of capabilities.
103+ clear_gencode_flags ()
104+
105+ # Get the capabilities without +PTX suffixes, so that we can use them as
106+ # the target archs in the loose intersection with a kernel's capabilities.
107+ cuda_remove_ptx_suffixes (CUDA_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
108+ message (STATUS "CUDA supported base architectures: ${CUDA_ARCHS} " )
109+
110+ if (BUILD_ALL_SUPPORTED_ARCHS)
111+ set (CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
112+ else ()
113+ try_run_python (CUDA_KERNEL_ARCHS SUCCESS "import torch; cc=torch.cuda.get_device_capability(); print(f\" {cc[0]}.{cc[1]}\" )" "Failed to get CUDA capability" )
114+ if (NOT SUCCESS)
115+ message (WARNING "Failed to detect CUDA capability, using default capabilities." )
116+ set (CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
117+ endif ()
118+ endif ()
119+
120+ message (STATUS "CUDA supported kernel architectures: ${CUDA_KERNEL_ARCHS} " )
121+
122+ if (NVCC_THREADS AND GPU_LANG STREQUAL "CUDA" )
123+ list (APPEND GPU_FLAGS "--threads=${NVCC_THREADS} " )
124+ endif ()
125+
102126 # TODO: deprecate one of these settings.
103127 add_compile_definitions (USE_CUDA=1 )
104128 add_compile_definitions (CUDA_KERNEL )
@@ -112,6 +136,10 @@ elseif(GPU_LANG STREQUAL "HIP")
112136 # .hip extension automatically, HIP must be enabled explicitly.
113137 enable_language (HIP )
114138
139+ override_gpu_arches (GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS} )
140+ set (ROCM_ARCHS ${GPU_ARCHES} )
141+ message (STATUS "ROCM supported target architectures: ${ROCM_ARCHS} " )
142+
115143 # TODO: deprecate one of these settings.
116144 add_compile_definitions (USE_ROCM=1 )
117145 add_compile_definitions (ROCM_KERNEL )
@@ -132,44 +160,6 @@ elseif(GPU_LANG STREQUAL "SYCL")
132160 message (FATAL_ERROR "Intel SYCL C++ compiler (icpx) and/or C compiler (icx) not found. Please install Intel oneAPI toolkit." )
133161 endif ()
134162
135- add_compile_definitions (XPU_KERNEL )
136- add_compile_definitions (USE_XPU )
137- else ()
138- message (FATAL_ERROR "Unsupported GPU language: ${GPU_LANG} " )
139- endif ()
140-
141- # CUDA build options.
142- if (GPU_LANG STREQUAL "CUDA" )
143- # This clears out -gencode arguments from `CMAKE_CUDA_FLAGS`, which we need
144- # to set our own set of capabilities.
145- clear_gencode_flags ()
146-
147- # Get the capabilities without +PTX suffixes, so that we can use them as
148- # the target archs in the loose intersection with a kernel's capabilities.
149- cuda_remove_ptx_suffixes (CUDA_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
150- message (STATUS "CUDA supported base architectures: ${CUDA_ARCHS} " )
151-
152- if (BUILD_ALL_SUPPORTED_ARCHS)
153- set (CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
154- else ()
155- try_run_python (CUDA_KERNEL_ARCHS SUCCESS "import torch; cc=torch.cuda.get_device_capability(); print(f\" {cc[0]}.{cc[1]}\" )" "Failed to get CUDA capability" )
156- if (NOT SUCCESS)
157- message (WARNING "Failed to detect CUDA capability, using default capabilities." )
158- set (CUDA_KERNEL_ARCHS "${CUDA_DEFAULT_KERNEL_ARCHS} " )
159- endif ()
160- endif ()
161-
162- message (STATUS "CUDA supported kernel architectures: ${CUDA_KERNEL_ARCHS} " )
163-
164- if (NVCC_THREADS AND GPU_LANG STREQUAL "CUDA" )
165- list (APPEND GPU_FLAGS "--threads=${NVCC_THREADS} " )
166- endif ()
167-
168- elseif (GPU_LANG STREQUAL "HIP" )
169- override_gpu_arches (GPU_ARCHES HIP ${HIP_SUPPORTED_ARCHS} )
170- set (ROCM_ARCHS ${GPU_ARCHES} )
171- message (STATUS "ROCM supported target architectures: ${ROCM_ARCHS} " )
172- elseif (GPU_LANG STREQUAL "SYCL" )
173163 execute_process (
174164 COMMAND ${ICPX_COMPILER} --version
175165 OUTPUT_VARIABLE ICPX_VERSION_OUTPUT
@@ -191,10 +181,12 @@ elseif(GPU_LANG STREQUAL "SYCL")
191181 set (sycl_flags "-fsycl;-fhonor-nans;-fhonor-infinities;-fno-associative-math;-fno-approx-func;-fno-sycl-instrument-device-code;--offload-compress;-fsycl-targets=spir64_gen,spir64;" )
192182 set (GPU_FLAGS "${sycl_flags} " )
193183 set (GPU_ARCHES "" )
184+
185+
186+ add_compile_definitions (XPU_KERNEL )
187+ add_compile_definitions (USE_XPU )
194188else ()
195- override_gpu_arches (GPU_ARCHES
196- ${GPU_LANG}
197- "${${GPU_LANG} _SUPPORTED_ARCHS}" )
189+ message (FATAL_ERROR "Unsupported GPU language: ${GPU_LANG} " )
198190endif ()
199191
200192# Initialize SRC list for kernel and binding sources
0 commit comments