Skip to content

Commit 1acffdb

Browse files
gchalumpmeta-codesync[bot]
authored andcommitted
Add CUDA 13.2 support to CI and release workflows (#5610)
Summary: Pull Request resolved: #5610 X-link: https://github.com/facebookresearch/FBGEMM/pull/2563 Add CUDA 13.2.0 to the FBGEMM GPU CI and release build matrices. Default publish version remains 13.0.2. Changes: - generate_ci_matrix.py: Add 13.2.0 to cuda_versions() for default and genai targets - fbgemm_gpu_release_cuda.yml: Add 13.2.0 to release options - fbgemm_gpu_release_genai.yml: Add 13.2.0 to genai release options and build/test matrices Differential Revision: D100229467
1 parent 434db14 commit 1acffdb

7 files changed

Lines changed: 37 additions & 13 deletions

File tree

.github/scripts/fbgemm_gpu_build.bash

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -315,7 +315,7 @@ __configure_fbgemm_gpu_build_cuda () {
315315
local arch_list="9.0a"
316316
fi
317317

318-
elif [[ $cuda_version_nvcc == *"V13.0"* ]] ||
318+
elif [[ $cuda_version_nvcc == *"V13"* ]] ||
319319
[[ $cuda_version_nvcc == *"V12.9"* ]] ||
320320
[[ $cuda_version_nvcc == *"V12.8"* ]]; then
321321
# NOTE: If we reach this point, then we are building the package for

.github/scripts/fbgemm_gpu_integration.bash

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -284,12 +284,14 @@ integration_fbgemm_gpu_install_matrix_run () {
284284
12.8.1
285285
12.9.1
286286
13.0.2
287+
13.2.0
287288
)
288289
elif [ "$variant_type" == "genai" ]; then
289290
local variant_versions=(
290291
12.6.3
291292
12.8.1
292293
13.0.2
294+
13.2.0
293295
)
294296
elif [ "$variant_type" == "rocm" ]; then
295297
local variant_versions=(

.github/scripts/generate_ci_matrix.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,10 +304,10 @@ def cuda_versions(self) -> List[str]:
304304
# FBGEMM HSTU is expensive, so conserve CI resources
305305
return ["12.8.1"]
306306
elif self.target == TARGET_GENAI:
307-
return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"]
307+
return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"]
308308
else:
309309
# GenAI is unable to support 11.8.0 anymore as of https://github.com/pytorch/FBGEMM/pull/4138
310-
return ["12.6.3", "12.8.1", "12.9.1", "13.0.2"]
310+
return ["12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0"]
311311

312312
def rocm_versions(self) -> List[str]:
313313
if GitRepo.ref() == REFS_MAIN and GitRepo.event_name() == EVENT_NAME_PUSH:

.github/scripts/nova_dir.bash

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,8 @@ fi
2222
## Overwrite existing ENV VAR in Nova
2323
if [[ "$CONDA_ENV" != "" ]]; then export CONDA_RUN="conda run --no-capture-output -p ${CONDA_ENV}" && echo "$CONDA_RUN"; fi
2424

25-
if [[ "$CU_VERSION" == "cu130" ]] ||
25+
if [[ "$CU_VERSION" == "cu132" ]] ||
26+
[[ "$CU_VERSION" == "cu130" ]] ||
2627
[[ "$CU_VERSION" == "cu129" ]] ||
2728
[[ "$CU_VERSION" == "cu128" ]]; then
2829
export TORCH_CUDA_ARCH_LIST="8.0;9.0a;10.0a;12.0a"

.github/scripts/utils_cuda.bash

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,21 @@ __set_cuda_symlinks_envvars () {
3535

3636
echo "[INSTALL] Copying nvtx3 headers ..."
3737
# shellcheck disable=SC2086
38-
print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/
39-
# shellcheck disable=SC2086
40-
print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/
38+
if compgen -G "${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/*" > /dev/null 2>&1; then
39+
# Copy nvtx3 headers from nsight-compute if available
40+
# shellcheck disable=SC2086
41+
print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${conda_prefix}/include/
42+
# shellcheck disable=SC2086
43+
print_exec cp -r ${conda_prefix}/nsight-compute*/host/*/nvtx/include/nvtx3/* ${new_cuda_home}/include/
44+
elif [ -d "${conda_prefix}/include/nvtx3" ]; then
45+
# nvtx3 headers already available from cuda-nvtx package
46+
echo "[INSTALL] nvtx3 headers already present in ${conda_prefix}/include/nvtx3 (from cuda-nvtx)"
47+
if [ ! -d "${new_cuda_home}/include/nvtx3" ]; then
48+
print_exec cp -r "${conda_prefix}/include/nvtx3" "${new_cuda_home}/include/"
49+
fi
50+
else
51+
echo "[INSTALL] WARNING: nvtx3 headers not found in nsight-compute or cuda-nvtx"
52+
fi
4153
fi
4254

4355
echo "[INSTALL] Appending libcuda.so path to LD_LIBRARY_PATH ..."
@@ -220,8 +232,17 @@ install_cuda () {
220232
cuda-nvrtc-dev \
221233
cuda-cupti-dev \
222234
cuda-profiler-api \
223-
cuda-opencl-dev \
224-
nsight-compute) || return 1
235+
cuda-opencl-dev) || return 1
236+
237+
# NOTE: nsight-compute is installed separately as best-effort because for
238+
# newer CUDA versions (e.g. 13.2+), it may have unresolvable dependency
239+
# conflicts on conda-forge (libxkbcommon -> libxml2-16 vs clangxx ->
240+
# libllvm16 -> libxml2 <2.14). The nvtx3 headers it provides are handled
241+
# in __set_cuda_symlinks_envvars with a fallback to cuda-nvtx.
242+
# shellcheck disable=SC2086
243+
(exec_with_retries 3 conda install ${env_prefix} -c conda-forge --override-channels -y \
244+
"cuda-version=${cuda_version%.*}" \
245+
nsight-compute) || echo "[INSTALL] WARNING: nsight-compute could not be installed, skipping (nvtx3 headers will be sourced from cuda-nvtx)"
225246
fi
226247

227248
# Set the symlinks and environment variables not covered by conda install

.github/workflows/fbgemm_gpu_release_cuda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ on:
3434
description: CUDA Version to Use for Building Artifact
3535
type: choice
3636
required: false
37-
options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ]
37+
options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ]
3838
default: "13.0.2"
3939
publish-to-pypi:
4040
description: Publish Artifact to PyPI

.github/workflows/fbgemm_gpu_release_genai.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ on:
3434
description: CUDA Version to Use for Building Artifact
3535
type: choice
3636
required: false
37-
options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2" ]
37+
options: [ "12.6.3", "12.8.1", "12.9.1", "13.0.2", "13.2.0" ]
3838
default: "13.0.2"
3939
publish-to-pypi:
4040
description: Publish Artifact to PyPI
@@ -72,7 +72,7 @@ jobs:
7272
{ arch: x86, instance: "linux.12xlarge.memory" },
7373
]
7474
python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ]
75-
cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ]
75+
cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ]
7676

7777
steps:
7878
- name: Setup Build Container
@@ -146,7 +146,7 @@ jobs:
146146
{ arch: x86, instance: "linux.g5.4xlarge.nvidia.gpu" },
147147
]
148148
python-version: [ "3.10", "3.11", "3.12", "3.13", "3.14" ]
149-
cuda-version: [ "12.6.3", "12.8.1", "13.0.2" ]
149+
cuda-version: [ "12.6.3", "12.8.1", "13.0.2", "13.2.0" ]
150150
needs: build_artifact
151151

152152
steps:

0 commit comments

Comments
 (0)