Skip to content

Commit 294eee4

Browse files
leofangclaude
andcommitted
Match CUDA_VER to TORCH_CUDA in nightly pytorch matrix
CUDA_VER in the test environment should match TORCH_CUDA in major.minor. BUILD_CUDA_VER (from build-ctk-ver input) is used for artifact names, so CUDA_VER can differ. - cu126 → CUDA_VER: 12.6.3 (was 12.9.1) - cu130 → CUDA_VER: 13.0.2 (was 13.2.1) For CUDA 12 entries, USE_BACKPORT_BINDINGS kicks in automatically since BUILD_CUDA_MAJOR (13) \!= TEST_CUDA_MAJOR (12), pulling bindings from the backport branch. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 0b7cc50 commit 294eee4

5 files changed

Lines changed: 137 additions & 21 deletions

File tree

.github/workflows/test-wheel-linux.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -368,6 +368,10 @@ jobs:
368368
pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/
369369
popd
370370
371+
- name: Patch numba-cuda (upstream bug workarounds)
372+
if: ${{ inputs.test-mode == 'nightly-numba-cuda' }}
373+
run: python ci/tools/patch-numba-cuda
374+
371375
- name: Run numba-cuda tests
372376
if: ${{ inputs.test-mode == 'nightly-numba-cuda' }}
373377
run: python -m numba.runtests numba.cuda.tests

.github/workflows/test-wheel-windows.yml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,11 @@ jobs:
356356
pytest -rxXs -v --durations=0 tests/test_utils.py tests/example_tests/
357357
popd
358358
359+
- name: Patch numba-cuda (upstream bug workarounds)
360+
if: ${{ inputs.test-mode == 'nightly-numba-cuda' }}
361+
shell: bash --noprofile --norc -xeuo pipefail {0}
362+
run: python ci/tools/patch-numba-cuda
363+
359364
- name: Run numba-cuda tests
360365
if: ${{ inputs.test-mode == 'nightly-numba-cuda' }}
361366
shell: bash --noprofile --norc -xeuo pipefail {0}

ci/test-matrix.yml

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -64,10 +64,10 @@ linux:
6464
- { ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'rtx4090', GPU_COUNT: '1', DRIVER: 'latest', FLAVOR: 'wsl' }
6565
nightly:
6666
# nightly-pytorch (amd64 only — PyTorch does not ship arm64 GPU wheels)
67-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' }
68-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' }
69-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' }
70-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' }
67+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' }
68+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' }
69+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' }
70+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' }
7171
# nightly-numba-cuda
7272
- { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
7373
- { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest' }
@@ -97,10 +97,10 @@ windows:
9797
- { ARCH: 'amd64', PY_VER: '3.14t', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'a100', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'MCDM' }
9898
nightly:
9999
# nightly-pytorch
100-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' }
101-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' }
102-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' }
103-
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' }
100+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu126' }
101+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: 'latest', TORCH_CUDA: 'cu130' }
102+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.6.3', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu126' }
103+
- { MODE: 'nightly-pytorch', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.0.2', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC', TORCH_VER: '2.9.1', TORCH_CUDA: 'cu130' }
104104
# nightly-numba-cuda
105105
- { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '12.9.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }
106106
- { MODE: 'nightly-numba-cuda', ARCH: 'amd64', PY_VER: '3.12', CUDA_VER: '13.2.1', LOCAL_CTK: '0', GPU: 'l4', GPU_COUNT: '1', DRIVER: 'latest', DRIVER_MODE: 'TCC' }

ci/tools/patch-numba-cuda

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
#!/usr/bin/env python3
2+
3+
# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
4+
#
5+
# SPDX-License-Identifier: Apache-2.0
6+
7+
"""Patch known upstream bugs in installed numba-cuda before running tests.
8+
9+
These patches are temporary workarounds; each should be removed once the
10+
corresponding upstream fix is released.
11+
"""
12+
13+
import pathlib
14+
import site
15+
16+
17+
def patch_test_linker_indent():
18+
"""Fix indentation bug in test_linker.py.
19+
20+
add_from_numba and debuggable_kernel reference test_device_functions_ltoir
21+
which is only defined inside ``if TEST_BIN_DIR:``. They must be indented
22+
under that block.
23+
24+
Upstream: https://github.com/NVIDIA/numba-cuda/blob/200c2b96/
25+
numba_cuda/numba/cuda/tests/cudadrv/test_linker.py#L120
26+
"""
27+
# Find the installed test_linker.py across all site-packages paths
28+
rel_path = pathlib.Path("numba_cuda", "numba", "cuda", "tests", "cudadrv", "test_linker.py")
29+
target = None
30+
for sp in site.getsitepackages():
31+
candidate = pathlib.Path(sp) / rel_path
32+
if candidate.exists():
33+
target = candidate
34+
break
35+
if target is None:
36+
# Fallback: locate via importlib
37+
try:
38+
import numba_cuda
39+
40+
pkg_dir = pathlib.Path(numba_cuda.__file__).parent
41+
candidate = pkg_dir / "numba" / "cuda" / "tests" / "cudadrv" / "test_linker.py"
42+
if candidate.exists():
43+
target = candidate
44+
except ImportError:
45+
pass
46+
if target is None:
47+
print("SKIP: test_linker.py not found in any site-packages")
48+
return
49+
print(f"Found: {target}")
50+
51+
src = target.read_text()
52+
53+
old = (
54+
"\nadd_from_numba = cuda.declare_device(\n"
55+
' "add_from_numba",\n'
56+
' "int32(int32, int32)",\n'
57+
" link=[test_device_functions_ltoir],\n"
58+
")\n"
59+
"\n"
60+
"\n"
61+
"def debuggable_kernel(result):\n"
62+
" i = cuda.grid(1)\n"
63+
" result[i] = add_from_numba(i, i)"
64+
)
65+
66+
new = (
67+
"\n add_from_numba = cuda.declare_device(\n"
68+
' "add_from_numba",\n'
69+
' "int32(int32, int32)",\n'
70+
" link=[test_device_functions_ltoir],\n"
71+
" )\n"
72+
"\n"
73+
" def debuggable_kernel(result):\n"
74+
" i = cuda.grid(1)\n"
75+
" result[i] = add_from_numba(i, i)"
76+
)
77+
78+
if old not in src:
79+
print(f"SKIP: indent patch target not found in {target} (already patched?)")
80+
return
81+
82+
src = src.replace(old, new)
83+
84+
# Part 2: add @unittest.skipUnless to test_debug_kernel_with_lto.
85+
# After moving debuggable_kernel inside `if TEST_BIN_DIR:`, the symbol
86+
# is undefined when the env var is not set, so the test must be skipped.
87+
old_test = " def test_debug_kernel_with_lto(self):\n"
88+
new_test = (
89+
' @unittest.skipUnless(TEST_BIN_DIR, "NUMBA_CUDA_TEST_BIN_DIR not set")\n'
90+
" def test_debug_kernel_with_lto(self):\n"
91+
)
92+
93+
if old_test not in src:
94+
print(f"SKIP: skip-decorator patch target not found in {target}")
95+
elif new_test in src:
96+
print("SKIP: skip decorator already present")
97+
else:
98+
src = src.replace(old_test, new_test, 1)
99+
100+
target.write_text(src)
101+
print(f"PATCHED: {target}")
102+
103+
104+
if __name__ == "__main__":
105+
patch_test_linker_indent()

ci/tools/run-tests

Lines changed: 15 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -99,10 +99,6 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu
9999
# Install ALL wheels (pathfinder + bindings + core) and the optional dep
100100
# in a single pip call so pip resolves version constraints in one shot
101101
# and avoids costly uninstall/reinstall cycles.
102-
#
103-
# We pushd into cuda_core/ so that --group reads test dependency groups
104-
# from cuda_core/pyproject.toml (needed for numpy, cupy, ml_dtypes, etc.).
105-
# All other wheel paths use ../ to reach the repo root.
106102

107103
TEST_CUDA_MAJOR="$(cut -d '.' -f 1 <<< ${CUDA_VER})"
108104
CUDA_VER_MINOR="$(cut -d '.' -f 1-2 <<< "${CUDA_VER}")"
@@ -112,14 +108,11 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu
112108
FREE_THREADING+="-ft"
113109
fi
114110

115-
# Resolve the pathfinder wheel path before pushd (it's relative to repo root).
116-
# CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already absolute
117-
# (set via realpath in env-vars).
111+
# Resolve pathfinder wheel to absolute path before pushd.
112+
# CUDA_BINDINGS_ARTIFACTS_DIR and CUDA_CORE_ARTIFACTS_DIR are already
113+
# absolute (set via realpath in env-vars).
118114
PATHFINDER_WHL=($(realpath ./cuda_pathfinder/*.whl))
119115

120-
pushd ./cuda_core
121-
122-
# Build wheel specs (paths are absolute, so pushd doesn't affect them)
123116
BINDINGS_WHL=("${CUDA_BINDINGS_ARTIFACTS_DIR}"/*.whl)
124117
if [[ "${LOCAL_CTK}" != 1 ]]; then
125118
BINDINGS_WHL=("${BINDINGS_WHL[0]}[all]")
@@ -130,13 +123,16 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu
130123
CORE_WHL=("${CORE_WHL[0]}[cu${TEST_CUDA_MAJOR}]")
131124
fi
132125

133-
# All packages in one pip call: pathfinder + bindings + core + test deps + optional dep
126+
# pushd so --group reads test dependency groups from cuda_core/pyproject.toml.
127+
# The explicit cuda-toolkit[...]==X.Y.* pin overrides the group's looser ==X.*.
128+
pushd ./cuda_core
129+
134130
PIP_ARGS=(
135131
"${PATHFINDER_WHL[@]}"
136132
"${BINDINGS_WHL[@]}"
137133
"${CORE_WHL[@]}"
138134
--group "test-cu${TEST_CUDA_MAJOR}${FREE_THREADING}"
139-
"cuda-toolkit==${CUDA_VER_MINOR}.*"
135+
"cuda-toolkit[nvrtc,cudart,nvcc,nvjitlink,nvfatbin]==${CUDA_VER_MINOR}.*"
140136
)
141137

142138
if [[ "${test_module}" == "nightly-pytorch" ]]; then
@@ -150,7 +146,13 @@ elif [[ "${test_module}" == "nightly-pytorch" || "${test_module}" == "nightly-nu
150146
PIP_ARGS+=(--extra-index-url "https://download.pytorch.org/whl/${TORCH_CUDA}")
151147
elif [[ "${test_module}" == "nightly-numba-cuda" ]]; then
152148
echo "Installing pathfinder + bindings + core + test deps + numba-cuda"
153-
PIP_ARGS+=("numba-cuda[cu${TEST_CUDA_MAJOR}]")
149+
# numba-cuda's test-cuXX group deps (can't use --group for a wheel install):
150+
PIP_ARGS+=(
151+
"numba-cuda[cu${TEST_CUDA_MAJOR}]"
152+
"cupy-cuda${TEST_CUDA_MAJOR}x"
153+
"cuda-toolkit[curand,cublas]==${CUDA_VER_MINOR}.*"
154+
psutil cffi pytest-xdist pytest-benchmark filecheck ml_dtypes statistics
155+
)
154156
fi
155157

156158
pip install "${PIP_ARGS[@]}"

0 commit comments

Comments
 (0)