Skip to content

Commit e996d7b

Browse files
committed
Merge remote-tracking branch 'upstream/main' into rparolin/managed_mem_advise_prefetch
# Conflicts: # cuda_core/cuda/core/_memory/_memory_pool.pyx
2 parents a4a4ab3 + eac59b6 commit e996d7b

28 files changed

Lines changed: 1004 additions & 258 deletions

.github/actions/sccache-summary/action.yml

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,8 +6,6 @@ name: sccache summary
66
description: Parse sccache stats JSON and write a summary table to GITHUB_STEP_SUMMARY
77

88
# Inspired by NVIDIA/cccl's prepare-execution-summary.py (PR #3621).
9-
# Only counts C/C++ and CUDA language hits (excludes PTX/CUBIN which are
10-
# not included in sccache's compile_requests counter).
119

1210
inputs:
1311
json-file:
@@ -47,10 +45,11 @@ runs:
4745
with open(json_file) as f:
4846
stats = json.load(f)["stats"]
4947
50-
# compile_requests includes non-compilation calls (linker, etc).
51-
# Use cache_hits + cache_misses as the denominator to match sccache's
52-
# own "Cache hits rate" which only counts actual compilation requests.
53-
counted_languages = {"C/C++", "CUDA"}
48+
# compile_requests only counts top-level nvcc invocations, but each
49+
# invocation spawns sub-tool compilations (cudafe++, cicc, ptxas) that
50+
# sccache tracks under separate language keys. Count all of them so
51+
# the reported rate matches sccache's own "Cache hits rate".
52+
counted_languages = {"C/C++", "CUDA", "CUDA (Device code)", "PTX", "CUBIN"}
5453
hits = sum(
5554
v for k, v in stats.get("cache_hits", {}).get("counts", {}).items()
5655
if k in counted_languages

.github/workflows/build-wheel.yml

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -401,11 +401,7 @@ jobs:
401401
402402
OLD_BRANCH=$(yq '.backport_branch' ci/versions.yml)
403403
OLD_BASENAME="cuda-bindings-python${PYTHON_VERSION_FORMATTED}-cuda*-${{ inputs.host-platform }}*"
404-
LATEST_PRIOR_RUN_ID=$(gh run list -b ${OLD_BRANCH} -L 1 -w "ci.yml" -s success -R NVIDIA/cuda-python --json databaseId | jq '.[]| .databaseId')
405-
if [[ "$LATEST_PRIOR_RUN_ID" == "" ]]; then
406-
echo "LATEST_PRIOR_RUN_ID not found!"
407-
exit 1
408-
fi
404+
LATEST_PRIOR_RUN_ID=$(./ci/tools/lookup-run-id --branch "${OLD_BRANCH}" NVIDIA/cuda-python "CI")
409405
410406
gh run download $LATEST_PRIOR_RUN_ID -p ${OLD_BASENAME} -R NVIDIA/cuda-python
411407
rm -rf ${OLD_BASENAME}-tests # exclude cython test artifacts

.github/workflows/ci-nightly.yml

Lines changed: 257 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,257 @@
1+
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# SPDX-License-Identifier: Apache-2.0
4+
5+
# Nightly CI pipeline that tests optional dependencies (PyTorch, numba-cuda)
6+
# against the latest cuda-python wheels built on main, and runs the standard
7+
# test suite on runners reserved for nightly-only use (e.g. arm64 l4×2).
8+
#
9+
# This workflow does NOT build wheels — it downloads them from the latest
10+
# successful CI run on main and runs integration/standard tests.
11+
12+
name: "CI: Nightly optional-deps"
13+
14+
concurrency:
15+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.event_name }}
16+
cancel-in-progress: true
17+
18+
on:
19+
schedule:
20+
# 2:17 AM UTC daily, after the midnight main CI build finishes.
21+
# Avoid minute 0 because GitHub documents high scheduled-workflow load
22+
# at the start of every hour, where queued jobs may be delayed or dropped.
23+
- cron: "17 2 * * *"
24+
workflow_dispatch:
25+
inputs:
26+
run-id:
27+
description: >
28+
Override the CI run ID to download artifacts from.
29+
Leave empty to auto-detect the latest successful main run.
30+
type: string
31+
default: ''
32+
33+
jobs:
34+
find-wheels:
35+
runs-on: ubuntu-latest
36+
outputs:
37+
RUN_ID: ${{ steps.find.outputs.run_id }}
38+
HEAD_SHA: ${{ steps.find.outputs.head_sha }}
39+
CUDA_BUILD_VER: ${{ steps.find.outputs.cuda_build_ver }}
40+
steps:
41+
- name: Checkout repository
42+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
43+
with:
44+
fetch-depth: 1
45+
46+
- name: Find latest successful CI run on main
47+
id: find
48+
env:
49+
GH_TOKEN: ${{ secrets.GITHUB_TOKEN }}
50+
run: |
51+
if [[ -n "${{ inputs.run-id }}" ]]; then
52+
RUN_ID="${{ inputs.run-id }}"
53+
HEAD_SHA=$(gh run view "$RUN_ID" \
54+
-R "${{ github.repository }}" \
55+
--json headSha | jq -r '.headSha')
56+
else
57+
# lookup-run-id --branch --head-sha prints two lines: run_id then head_sha
58+
OUTPUT=$(./ci/tools/lookup-run-id --branch main --head-sha "${{ github.repository }}" "CI")
59+
RUN_ID=$(echo "$OUTPUT" | sed -n '1p')
60+
HEAD_SHA=$(echo "$OUTPUT" | sed -n '2p')
61+
fi
62+
63+
if [[ -z "$HEAD_SHA" || "$HEAD_SHA" == "null" ]]; then
64+
echo "::error::Could not resolve head SHA for CI run $RUN_ID"
65+
exit 1
66+
fi
67+
68+
CUDA_BUILD_VER=$(gh api \
69+
"repos/${{ github.repository }}/contents/ci/versions.yml?ref=$HEAD_SHA" \
70+
--jq '.content' \
71+
| base64 -d \
72+
| yq '.cuda.build.version')
73+
74+
if [[ -z "$CUDA_BUILD_VER" || "$CUDA_BUILD_VER" == "null" ]]; then
75+
echo "::error::Could not resolve CUDA build version from $HEAD_SHA"
76+
exit 1
77+
fi
78+
79+
echo "run_id=$RUN_ID" >> $GITHUB_OUTPUT
80+
echo "head_sha=$HEAD_SHA" >> $GITHUB_OUTPUT
81+
echo "cuda_build_ver=$CUDA_BUILD_VER" >> $GITHUB_OUTPUT
82+
83+
# ── PyTorch interop tests ──
84+
85+
test-pytorch-linux:
86+
name: "Nightly PyTorch (linux-64)"
87+
if: ${{ github.repository_owner == 'nvidia' }}
88+
needs: find-wheels
89+
permissions:
90+
contents: read
91+
actions: read
92+
secrets: inherit
93+
uses: ./.github/workflows/test-wheel-linux.yml
94+
with:
95+
build-type: nightly
96+
host-platform: linux-64
97+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
98+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
99+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
100+
test-mode: nightly-pytorch
101+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
102+
103+
test-pytorch-linux-aarch64:
104+
name: "Nightly PyTorch (linux-aarch64)"
105+
if: ${{ github.repository_owner == 'nvidia' }}
106+
needs: find-wheels
107+
permissions:
108+
contents: read
109+
actions: read
110+
secrets: inherit
111+
uses: ./.github/workflows/test-wheel-linux.yml
112+
with:
113+
build-type: nightly
114+
host-platform: linux-aarch64
115+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
116+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
117+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
118+
test-mode: nightly-pytorch
119+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
120+
121+
test-pytorch-windows:
122+
name: "Nightly PyTorch (win-64)"
123+
if: ${{ github.repository_owner == 'nvidia' }}
124+
needs: find-wheels
125+
permissions:
126+
contents: read
127+
actions: read
128+
secrets: inherit
129+
uses: ./.github/workflows/test-wheel-windows.yml
130+
with:
131+
build-type: nightly
132+
host-platform: win-64
133+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
134+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
135+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
136+
test-mode: nightly-pytorch
137+
matrix_filter: 'map(select(.MODE == "nightly-pytorch"))'
138+
139+
# ── numba-cuda tests ──
140+
141+
test-numba-cuda-linux-64:
142+
name: "Nightly numba-cuda (linux-64)"
143+
if: ${{ github.repository_owner == 'nvidia' }}
144+
needs: find-wheels
145+
permissions:
146+
contents: read
147+
actions: read
148+
secrets: inherit
149+
uses: ./.github/workflows/test-wheel-linux.yml
150+
with:
151+
build-type: nightly
152+
host-platform: linux-64
153+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
154+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
155+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
156+
test-mode: nightly-numba-cuda
157+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
158+
159+
test-numba-cuda-linux-aarch64:
160+
name: "Nightly numba-cuda (linux-aarch64)"
161+
if: ${{ github.repository_owner == 'nvidia' }}
162+
needs: find-wheels
163+
permissions:
164+
contents: read
165+
actions: read
166+
secrets: inherit
167+
uses: ./.github/workflows/test-wheel-linux.yml
168+
with:
169+
build-type: nightly
170+
host-platform: linux-aarch64
171+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
172+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
173+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
174+
test-mode: nightly-numba-cuda
175+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
176+
177+
test-numba-cuda-windows:
178+
name: "Nightly numba-cuda (win-64)"
179+
if: ${{ github.repository_owner == 'nvidia' }}
180+
needs: find-wheels
181+
permissions:
182+
contents: read
183+
actions: read
184+
secrets: inherit
185+
uses: ./.github/workflows/test-wheel-windows.yml
186+
with:
187+
build-type: nightly
188+
host-platform: win-64
189+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
190+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
191+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
192+
test-mode: nightly-numba-cuda
193+
matrix_filter: 'map(select(.MODE == "nightly-numba-cuda"))'
194+
195+
# ── Standard tests on nightly-only runners ──
196+
197+
test-standard-linux-aarch64:
198+
name: "Nightly standard (linux-aarch64)"
199+
if: ${{ github.repository_owner == 'nvidia' }}
200+
needs: find-wheels
201+
permissions:
202+
contents: read
203+
actions: read
204+
secrets: inherit
205+
uses: ./.github/workflows/test-wheel-linux.yml
206+
with:
207+
build-type: nightly
208+
host-platform: linux-aarch64
209+
build-ctk-ver: ${{ needs.find-wheels.outputs.CUDA_BUILD_VER }}
210+
run-id: ${{ needs.find-wheels.outputs.RUN_ID }}
211+
sha: ${{ needs.find-wheels.outputs.HEAD_SHA }}
212+
test-mode: standard
213+
matrix_filter: 'map(select(.MODE == "nightly-standard"))'
214+
215+
# ── Status check ──
216+
217+
checks:
218+
name: Nightly check status
219+
if: always()
220+
runs-on: ubuntu-latest
221+
needs:
222+
- find-wheels
223+
- test-pytorch-linux
224+
- test-pytorch-linux-aarch64
225+
- test-pytorch-windows
226+
- test-numba-cuda-linux-64
227+
- test-numba-cuda-linux-aarch64
228+
- test-numba-cuda-windows
229+
- test-standard-linux-aarch64
230+
steps:
231+
- name: Exit
232+
run: |
233+
# If any dependency was cancelled or failed, that's a failure.
234+
#
235+
# See ci.yml for the full rationale on why we must use always()
236+
# and explicitly check each result rather than relying on the
237+
# default behaviour.
238+
if ${{ needs.find-wheels.result != 'success' }}; then
239+
exit 1
240+
fi
241+
if ${{ needs.test-pytorch-linux.result == 'cancelled' ||
242+
needs.test-pytorch-linux.result == 'failure' ||
243+
needs.test-pytorch-linux-aarch64.result == 'cancelled' ||
244+
needs.test-pytorch-linux-aarch64.result == 'failure' ||
245+
needs.test-pytorch-windows.result == 'cancelled' ||
246+
needs.test-pytorch-windows.result == 'failure' ||
247+
needs.test-numba-cuda-linux-64.result == 'cancelled' ||
248+
needs.test-numba-cuda-linux-64.result == 'failure' ||
249+
needs.test-numba-cuda-linux-aarch64.result == 'cancelled' ||
250+
needs.test-numba-cuda-linux-aarch64.result == 'failure' ||
251+
needs.test-numba-cuda-windows.result == 'cancelled' ||
252+
needs.test-numba-cuda-windows.result == 'failure' ||
253+
needs.test-standard-linux-aarch64.result == 'cancelled' ||
254+
needs.test-standard-linux-aarch64.result == 'failure' }}; then
255+
exit 1
256+
fi
257+
exit 0

0 commit comments

Comments
 (0)