Fix build pipeline for CUDA 13.0 (microsoft#26983)

tianleiwu · web-flow · commit de99059ef686 · 2026-01-14T07:38:18.000-08:00
This PR fixes the build pipeline failures observed with CUDA 13.0. ### Changes: 1. **Compiler Flags**: Fixed check_nvcc_compiler_flag macro in adjust_global_compile_flags.cmake to correctly pass flags to the host compiler using -Xcompiler, preventing 'nvcc fatal' errors. Added cmake/empty.c for this check. 2. **Node.js Bindings**: Fixed warn_unused_result warnings in session_options_helper.cc by wrapping CreateCUDAProviderOptions calls with Ort::ThrowOnError. 3. **CCCL Header Location**: Updated `onnxruntime_providers_cuda.cmake` to handle the relocation of CCCL headers in CUDA 13.0. It now adds the `cccl` subdirectory to include paths when detected, ensuring headers like `<cuda/std/utility>` are correctly found. 4. Update nuget pipeline to pass cudnn home to build command line for cuda 13. See "cudnn not found" issue below. 5. Diable a step 'Test C API application for GPU package' in nuget pipeline, since "Onnxruntime-Linux-GPU-A10" pool has old cuda driver which cannot run cuda 13. This is a temporary walkaround to unblock the pipeline before we have a pool with new cuda driver. #### Issue: cuda/std/utility not found See microsoft#26362. I tried two ways to fix it, one is to change CMAKE_CUDA_COMPILER_FRONTEND_VARIANT, another is to add include cccl path for cuda 13. Both can fix the error in build pipeline. Since the second approach more straightforward, it is kept. #### Issue: cudnn not found The Windows packaging builds ("Windows_Packaging_CUDA" and "Windows_Packaging_TensorRT") were failing for CUDA 13.0 because the --cudnn_home argument was missing from the build.py command line. Note that CUDA 12.8 build has no such problem, since we put cudnn with cuda togother. For cuda 13, we separate cuda and cudnn to two directories, so we need add --cudnn_home in build commands for CUDA 13. Fix: * Defined win_cudnn_home in c-api-noopenmp-packaging-pipelines.yml and cuda-packaging-pipeline.yml (set to $(Agent.TempDirectory)\9.14.0.64_cuda13 for CUDA 13.0). * Propagated this variable through nuget-combine-cuda-stage.yml to nuget-win-cuda-packaging-stage.yml * Updated nuget-win-cuda-packaging-stage.yml to conditionally add --cudnn_home=${{ parameters.win_cudnn_home }} to the build parameters when the variable is set.
diff --git a/cmake/adjust_global_compile_flags.cmake b/cmake/adjust_global_compile_flags.cmake
@@ -208,10 +208,8 @@ endif()
 
 
 macro(check_nvcc_compiler_flag _FLAG _RESULT)
-    execute_process(COMMAND ${CUDAToolkit_BIN_DIR}/nvcc "${_FLAG}" RESULT_VARIABLE NVCC_OUT ERROR_VARIABLE NVCC_ERROR)
-    message("NVCC_ERROR = ${NVCC_ERROR}")
-    message("NVCC_OUT = ${NVCC_OUT}")
-    if ("${NVCC_OUT}" MATCHES "0")
+    execute_process(COMMAND ${CMAKE_CUDA_COMPILER} --compiler-options "${_FLAG}" -c ${REPO_ROOT}/cmake/empty.c -o ${CMAKE_CURRENT_BINARY_DIR}/empty.o RESULT_VARIABLE NVCC_OUT ERROR_QUIET OUTPUT_QUIET)
+    if (NVCC_OUT EQUAL 0)
         set(${_RESULT} 1)
     else()
         set(${_RESULT} 0)
diff --git a/cmake/empty.c b/cmake/empty.c
@@ -0,0 +1,2 @@
+// This file is used by the check_nvcc_compiler_flag macro in adjust_global_compile_flags.cmake to test nvcc compiler flags.
+void empty() {}
diff --git a/cmake/onnxruntime_providers_cuda.cmake b/cmake/onnxruntime_providers_cuda.cmake
@@ -257,6 +257,17 @@
     target_include_directories(${target} PRIVATE ${cutlass_SOURCE_DIR}/include ${cutlass_SOURCE_DIR}/examples ${cutlass_SOURCE_DIR}/tools/util/include)
     target_link_libraries(${target} PRIVATE Eigen3::Eigen)
     target_include_directories(${target} PRIVATE ${ONNXRUNTIME_ROOT} ${CMAKE_CURRENT_BINARY_DIR} PUBLIC ${CUDAToolkit_INCLUDE_DIRS})
+
+    # Handle CUDA 13.0 CCCL header directory move
+    if (CMAKE_CUDA_COMPILER_VERSION VERSION_GREATER_EQUAL 13.0)
+      foreach(inc_dir ${CUDAToolkit_INCLUDE_DIRS})
+        if (EXISTS "${inc_dir}/cccl")
+          # Add the cccl subdirectory to the include path so <cuda/std/utility> can be found
+          target_include_directories(${target} PRIVATE "${inc_dir}/cccl")
+        endif()
+      endforeach()
+    endif()
+
     # ${CMAKE_CURRENT_BINARY_DIR} is so that #include "onnxruntime_config.h" inside tensor_shape.h is found
     set_target_properties(${target} PROPERTIES LINKER_LANGUAGE CUDA)
     set_target_properties(${target} PROPERTIES FOLDER "ONNXRuntime")
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
@@ -143,15 +143,15 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions& sess
 #ifdef USE_CUDA
     } else if (name == "cuda") {
       OrtCUDAProviderOptionsV2* options;
-      Ort::GetApi().CreateCUDAProviderOptions(&options);
+      Ort::ThrowOnError(Ort::GetApi().CreateCUDAProviderOptions(&options));
       options->device_id = deviceId;
       sessionOptions.AppendExecutionProvider_CUDA_V2(*options);
       Ort::GetApi().ReleaseCUDAProviderOptions(options);
 #endif
 #ifdef USE_TENSORRT
     } else if (name == "tensorrt") {
       OrtTensorRTProviderOptionsV2* options;
-      Ort::GetApi().CreateTensorRTProviderOptions(&options);
+      Ort::ThrowOnError(Ort::GetApi().CreateTensorRTProviderOptions(&options));
       options->device_id = deviceId;
       sessionOptions.AppendExecutionProvider_TensorRT_V2(*options);
       Ort::GetApi().ReleaseTensorRTProviderOptions(options);
diff --git a/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml b/tools/ci_build/github/azure-pipelines/c-api-noopenmp-packaging-pipelines.yml
@@ -88,6 +88,11 @@ variables:
     value: $(Agent.TempDirectory)\v12.8
   ${{ if eq(parameters.CudaVersion, '13.0') }}:
     value: $(Agent.TempDirectory)\v13.0
+- name: win_cudnn_home
+  ${{ if eq(parameters.CudaVersion, '12.8') }}:
+    value: ''
+  ${{ if eq(parameters.CudaVersion, '13.0') }}:
+    value: $(Agent.TempDirectory)\9.14.0.64_cuda13
 - name: CudaArchs
   ${{ if eq(parameters.CudaVersion, '12.8') }}:
     value: '75-real;86-real;89-real;90-virtual'
@@ -171,6 +176,7 @@ extends:
         PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }}
         PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }}
         CudaArchs: ${{ variables.CudaArchs }}
+        win_cudnn_home: ${{ variables.win_cudnn_home }}
 
     - template: stages/nodejs-win-packaging-stage.yml
       parameters:
diff --git a/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/cuda-packaging-pipeline.yml
@@ -67,6 +67,11 @@ variables:
     value: $(Agent.TempDirectory)\v12.8
   ${{ if eq(parameters.CudaVersion, '13.0') }}:
     value: $(Agent.TempDirectory)\v13.0
+- name: win_cudnn_home
+  ${{ if eq(parameters.CudaVersion, '12.8') }}:
+    value: ''
+  ${{ if eq(parameters.CudaVersion, '13.0') }}:
+    value: $(Agent.TempDirectory)\9.14.0.64_cuda13
 # CMAKE_CUDA_ARCHITECTURES for Windows nuget packaging
 - name: CudaArchs
   ${{ if eq(parameters.CudaVersion, '12.8') }}:
@@ -133,6 +138,7 @@ extends:
         PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }}
         PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }}
         CudaArchs: ${{ variables.CudaArchs }}
+        win_cudnn_home: ${{ variables.win_cudnn_home }}
 
     - template: stages/download-java-tools-stage.yml
 
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-combine-cuda-stage.yml
@@ -47,6 +47,9 @@ parameters:
 
 - name: CudaArchs
   type: string
+- name: win_cudnn_home
+  type: string
+  default: ''
 
 stages:
 - template: nuget-linux-cuda-packaging-stage.yml
@@ -71,6 +74,7 @@ stages:
     PreReleaseVersionSuffixString: ${{ parameters.PreReleaseVersionSuffixString }}
     PreReleaseVersionSuffixNumber: ${{ parameters.PreReleaseVersionSuffixNumber }}
     CudaArchs: ${{ parameters.CudaArchs }}
+    win_cudnn_home: ${{ parameters.win_cudnn_home }}
 
 - template: nuget-cuda-packaging-stage.yml
   parameters:
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-linux-cuda-packaging-stage.yml
@@ -207,6 +207,7 @@ stages:
 
     - task: CmdLine@2
       displayName: 'Test C API application for GPU package'
+      condition: and(succeeded(), ne(variables['CUDA_VERSION_MAJOR'], '13'))
       inputs:
         script: |
           docker run -e SYSTEM_COLLECTIONURI --gpus all -e CFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e CXXFLAGS="-Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -fstack-clash-protection -fcf-protection -O3 -Wl,--strip-all" -e NVIDIA_VISIBLE_DEVICES=all --rm --volume /data/models:/data/models --volume $(Build.SourcesDirectory):/src_dir \
diff --git a/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/nuget-win-cuda-packaging-stage.yml
@@ -52,6 +52,9 @@ parameters:
 
 - name: CudaArchs
   type: string
+- name: win_cudnn_home
+  type: string
+  default: ''
 
 stages:
 # Windows CUDA without TensorRT Packaging
@@ -66,7 +69,10 @@ stages:
     msbuildPlatform: x64
     packageName: x64-cuda
     CudaVersion: ${{ parameters.CudaVersion }}
-    buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
+    ${{ if ne(parameters.win_cudnn_home, '') }}:
+      buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --cudnn_home=${{ parameters.win_cudnn_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
+    ${{ else }}:
+      buildparameter: --use_cuda --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
     runTests: ${{ parameters.RunOnnxRuntimeTests }}
     buildJava: ${{ parameters.buildJava }}
     java_artifact_id: onnxruntime_gpu
@@ -86,7 +92,10 @@ stages:
     msbuildPlatform: x64
     CudaVersion: ${{ parameters.CudaVersion }}
     packageName: x64-tensorrt
-    buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
+    ${{ if ne(parameters.win_cudnn_home, '') }}:
+      buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --cudnn_home=${{ parameters.win_cudnn_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
+    ${{ else }}:
+      buildparameter: --use_tensorrt --tensorrt_home=${{ parameters.win_trt_home }} --cuda_home=${{ parameters.win_cuda_home }} --enable_onnx_tests --enable_wcos --parallel 4 --nvcc_threads 1 --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=${{ parameters.CudaArchs }}"
     runTests: ${{ parameters.RunOnnxRuntimeTests }}
     buildJava: ${{ parameters.buildJava }}
     java_artifact_id: onnxruntime_gpu

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+// This file is used by the check_nvcc_compiler_flag macro in adjust_global_compile_flags.cmake to test nvcc compiler flags.`
	`2`	`+void empty() {}`