Add CUDA 13.2 support to CI and release workflows (#5610)

gchalump · meta-codesync[bot] · commit 1acffdbaef5e · 2026-04-16T08:04:07.000-07:00
Summary: Pull Request resolved: #5610 X-link: https://github.com/facebookresearch/FBGEMM/pull/2563 Add CUDA 13.2.0 to the FBGEMM GPU CI and release build matrices. Default publish version remains 13.0.2. Changes: - generate_ci_matrix.py: Add 13.2.0 to cuda_versions() for default and genai targets - fbgemm_gpu_release_cuda.yml: Add 13.2.0 to release options - fbgemm_gpu_release_genai.yml: Add 13.2.0 to genai release options and build/test matrices Differential Revision: D100229467
diff --git a/.github/scripts/fbgemm_gpu_build.bash b/.github/scripts/fbgemm_gpu_build.bash
@@ -315,7 +315,7 @@ __configure_fbgemm_gpu_build_cuda () {
         local arch_list="9.0a"
       fi
 
-    elif  [[ $cuda_version_nvcc == *"V13.0"* ]] ||
+    elif  [[ $cuda_version_nvcc == *"V13"* ]] ||
           [[ $cuda_version_nvcc == *"V12.9"* ]] ||
           [[ $cuda_version_nvcc == *"V12.8"* ]]; then
       # NOTE: If we reach this point, then we are building the package for
diff --git a/.github/scripts/fbgemm_gpu_integration.bash b/.github/scripts/fbgemm_gpu_integration.bash
@@ -284,12 +284,14 @@ integration_fbgemm_gpu_install_matrix_run () {
       12.8.1
       12.9.1
       13.0.2
+      13.2.0
     )
   elif [ "$variant_type" == "genai" ]; then
     local variant_versions=(
       12.6.3
       12.8.1
       13.0.2
+      13.2.0
     )
   elif [ "$variant_type" == "rocm" ]; then
     local variant_versions=(
diff --git a/.github/scripts/generate_ci_matrix.py b/.github/scripts/generate_ci_matrix.py
@@ -304,10 +304,10 @@ def cuda_versions(self) -> List[str]:
             # FBGEMM HSTU is expensive, so conserve CI resources
             return ["12.8.1"]
         elif self.target == TARGET_GENAI:
-            return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"]
+            return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"]
         else:
             # GenAI is unable to support 11.8.0 anymore as of https://github.com/pytorch/FBGEMM/pull/4138
-            return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"]
+            return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"]
 
     def rocm_versions(self) -> List[str]:
         if GitRepo.ref() == REFS_MAIN and GitRepo.event_name() == EVENT_NAME_PUSH:
diff --git a/.github/scripts/nova_dir.bash b/.github/scripts/nova_dir.bash
@@ -22,7 +22,8 @@ fi
 ## Overwrite existing ENV VAR in Nova
 if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi
 
-if [[ "$CU_VERSION" == "cu130" ]] ||
+if [[ "$CU_VERSION" == "cu132" ]] ||
+     [[ "$CU_VERSION" == "cu130" ]] ||
      [[ "$CU_VERSION" == "cu129" ]] ||
      [[ "$CU_VERSION" == "cu128" ]]; then
     export TORCH_CUDA_ARCH_LIST="8.0;9.0a;10.0a;12.0a"
diff --git a/.github/scripts/utils_cuda.bash b/.github/scripts/utils_cuda.bash
@@ -35,9 +35,21 @@ __set_cuda_symlinks_envvars () {
 
     echo "[INSTALL] Copying nvtx3 headers ..."
     # shellcheck disable=SC2086
-    print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/
-    # shellcheck disable=SC2086
-    print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/
+    if compgen -G "${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/*" > /dev/null 2>&1; then
+      # Copy nvtx3 headers from nsight-compute if available
+      # shellcheck disable=SC2086
+      print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/
+      # shellcheck disable=SC2086
+      print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/
+    elif [ -d "${conda_prefix}/include/nvtx3" ]; then
+      # nvtx3 headers already available from cuda-nvtx package
+      echo "[INSTALL] nvtx3 headers already present in ${conda_prefix}/include/nvtx3 (from cuda-nvtx)"
+      if [ ! -d "${new_cuda_home}/include/nvtx3" ]; then
+        print_exec cp -r "${conda_prefix}/include/nvtx3" "${new_cuda_home}/include/"
+      fi
+    else
+      echo "[INSTALL] WARNING: nvtx3 headers not found in nsight-compute or cuda-nvtx"
+    fi
   fi
 
   echo "[INSTALL] Appending libcuda.so path to LD_LIBRARY_PATH ..."
@@ -220,8 +232,17 @@ install_cuda () {
       cuda-nvrtc-dev \
       cuda-cupti-dev \
       cuda-profiler-api \
-      cuda-opencl-dev \
-      nsight-compute) || return 1
+      cuda-opencl-dev) || return 1
+
+    # NOTE: nsight-compute is installed separately as best-effort because for
+    # newer CUDA versions (e.g. 13.2+), it may have unresolvable dependency
+    # conflicts on conda-forge (libxkbcommon -> libxml2-16 vs clangxx ->
+    # libllvm16 -> libxml2 <2.14).  The nvtx3 headers it provides are handled
+    # in __set_cuda_symlinks_envvars with a fallback to cuda-nvtx.
+    # shellcheck disable=SC2086
+    (exec_with_retries 3 conda install ${env_prefix} -c conda-forge --override-channels -y \
+      "cuda-version=${cuda_version%.*}" \
+      nsight-compute) || echo "[INSTALL] WARNING: nsight-compute could not be installed, skipping (nvtx3 headers will be sourced from cuda-nvtx)"
   fi
 
   # Set the symlinks and environment variables not covered by conda install
diff --git a/.github/workflows/fbgemm_gpu_release_cuda.yml b/.github/workflows/fbgemm_gpu_release_cuda.yml
@@ -34,7 +34,7 @@ on:
         description: CUDA Version to Use for Building Artifact
         type: choice
         required: false
-        options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ]
+        options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ]
         default: "13.0.2"
       publish-to-pypi:
         description: Publish Artifact to PyPI
diff --git a/.github/workflows/fbgemm_gpu_release_genai.yml b/.github/workflows/fbgemm_gpu_release_genai.yml
@@ -34,7 +34,7 @@ on:
         description: CUDA Version to Use for Building Artifact
         type: choice
         required: false
-        options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ]
+        options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ]
         default: "13.0.2"
       publish-to-pypi:
         description: Publish Artifact to PyPI
@@ -72,7 +72,7 @@ jobs:
           { arch: x86, instance: "linux.12xlarge.memory" },
         ]
         python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ]
-        cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ]
+        cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ]
 
     steps:
     - name: Setup Build Container
@@ -146,7 +146,7 @@ jobs:
           { arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
         ]
         python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ]
-        cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ]
+        cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ]
     needs: build_artifact
 
     steps: