Skip to content

Commit ea811c1

Browse files
authored
Merge branch 'main' into ci-nightly-optdeps
2 parents 660f408 + edb1901 commit ea811c1

49 files changed

Lines changed: 1753 additions & 268 deletions

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.gitattributes

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
*.h binary
66
*.hpp binary
77
# Exception: headers we own
8+
benchmarks/cuda_bindings/benchmarks/cpp/*.hpp -binary text diff
89
cuda_bindings/cuda/bindings/_bindings/*.h -binary text diff
910
cuda_bindings/cuda/bindings/_lib/*.h -binary text diff
1011
cuda_core/cuda/core/_cpp/*.h -binary text diff

.github/workflows/ci.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -343,7 +343,7 @@ jobs:
343343
build-type: pull-request
344344
host-platform: ${{ matrix.host-platform }}
345345
build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
346-
nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
346+
nruns: ${{ (github.event_name == 'schedule' && 5) || 1}}
347347
skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
348348

349349
# See test-linux-64 for why test jobs are split by platform.
@@ -368,7 +368,7 @@ jobs:
368368
build-type: pull-request
369369
host-platform: ${{ matrix.host-platform }}
370370
build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
371-
nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
371+
nruns: ${{ (github.event_name == 'schedule' && 5) || 1}}
372372
skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
373373

374374
# See test-linux-64 for why test jobs are split by platform.
@@ -393,7 +393,7 @@ jobs:
393393
build-type: pull-request
394394
host-platform: ${{ matrix.host-platform }}
395395
build-ctk-ver: ${{ needs.ci-vars.outputs.CUDA_BUILD_VER }}
396-
nruns: ${{ (github.event_name == 'schedule' && 100) || 1}}
396+
nruns: ${{ (github.event_name == 'schedule' && 5) || 1}}
397397
skip-bindings-test: ${{ !fromJSON(needs.detect-changes.outputs.test_bindings) }}
398398

399399
doc:

.github/workflows/release.yml

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ on:
2020
- cuda-bindings
2121
- cuda-pathfinder
2222
- cuda-python
23-
- all
2423
git-tag:
2524
description: "The release git tag"
2625
required: true
@@ -89,6 +88,30 @@ jobs:
8988
gh release create "${{ inputs.git-tag }}" --draft --repo "${{ github.repository }}" --title "Release ${{ inputs.git-tag }}" --notes "Release ${{ inputs.git-tag }}"
9089
fi
9190
91+
check-release-notes:
92+
runs-on: ubuntu-latest
93+
steps:
94+
- name: Checkout Source
95+
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
96+
with:
97+
ref: ${{ inputs.git-tag }}
98+
99+
- name: Set up Python
100+
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
101+
with:
102+
python-version: "3.12"
103+
104+
- name: Self-test release-notes checker
105+
run: |
106+
pip install pytest
107+
pytest ci/tools/tests
108+
109+
- name: Check versioned release notes exist
110+
run: |
111+
python ci/tools/check_release_notes.py \
112+
--git-tag "${{ inputs.git-tag }}" \
113+
--component "${{ inputs.component }}"
114+
92115
doc:
93116
name: Build release docs
94117
if: ${{ github.repository_owner == 'nvidia' }}
@@ -99,6 +122,7 @@ jobs:
99122
pull-requests: write
100123
needs:
101124
- check-tag
125+
- check-release-notes
102126
- determine-run-id
103127
secrets: inherit
104128
uses: ./.github/workflows/build-docs.yml
@@ -114,6 +138,7 @@ jobs:
114138
contents: write
115139
needs:
116140
- check-tag
141+
- check-release-notes
117142
- determine-run-id
118143
- doc
119144
secrets: inherit
@@ -128,11 +153,12 @@ jobs:
128153
runs-on: ubuntu-latest
129154
needs:
130155
- check-tag
156+
- check-release-notes
131157
- determine-run-id
132158
- doc
133159
environment:
134160
name: testpypi
135-
url: https://test.pypi.org/${{ inputs.component != 'all' && format('p/{0}/', inputs.component) || '' }}
161+
url: https://test.pypi.org/p/${{ inputs.component }}/
136162
permissions:
137163
id-token: write
138164
steps:
@@ -162,7 +188,7 @@ jobs:
162188
- publish-testpypi
163189
environment:
164190
name: pypi
165-
url: https://pypi.org/${{ inputs.component != 'all' && format('p/{0}/', inputs.component) || '' }}
191+
url: https://pypi.org/p/${{ inputs.component }}/
166192
permissions:
167193
id-token: write
168194
steps:

.spdx-ignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,5 +10,7 @@ cuda_bindings/examples/*
1010

1111
# Vendored
1212
cuda_core/cuda/core/_include/dlpack.h
13+
cuda_core/cuda/core/_include/aoti_shim.h
14+
cuda_core/cuda/core/_include/aoti_shim.def
1315

1416
qa/ctk-next.drawio.svg

benchmarks/cuda_bindings/README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,12 +47,14 @@ To run the benchmarks combine the environment and task:
4747
```bash
4848
# Run the Python benchmarks in the wheel environment
4949
pixi run -e wheel bench
50+
pixi run -e wheel bench --min-time 0.1
5051

5152
# Run the Python benchmarks in the source environment
5253
pixi run -e source bench
5354

5455
# Run the C++ benchmarks
5556
pixi run -e wheel bench-cpp
57+
pixi run -e wheel bench-cpp --min-time 0.1
5658
```
5759

5860
Both runners automatically save results to JSON files in the benchmarks

benchmarks/cuda_bindings/benchmarks/cpp/bench_event.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ int main(int argc, char** argv) {
4545
check_cu(cuStreamSynchronize(stream), "cuStreamSynchronize failed");
4646

4747
bench::BenchmarkSuite suite(options);
48+
// Drain the persistent stream after calibration so event_record (which
49+
// enqueues onto the stream) and event_synchronize start from a known state.
50+
suite.set_post_calibrate([&]() {
51+
check_cu(cuStreamSynchronize(stream), "post-calibrate sync failed");
52+
});
4853

4954
// --- event_create_destroy ---
5055
{

benchmarks/cuda_bindings/benchmarks/cpp/bench_launch.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -238,6 +238,12 @@ int main(int argc, char** argv) {
238238
void* struct_params[] = {&struct_2048B};
239239

240240
bench::BenchmarkSuite suite(options);
241+
// After calibration, drain the persistent stream so the first measured
242+
// sample does not start on a backlogged stream. Calibration for enqueue-
243+
// style ops (kernel launches) may queue many thousands of operations.
244+
suite.set_post_calibrate([&]() {
245+
check_cu(cuStreamSynchronize(stream), "post-calibrate sync failed");
246+
});
241247

242248
suite.run("launch.launch_empty_kernel", [&]() {
243249
check_cu(cuLaunchKernel(empty_kernel, 1, 1, 1, 1, 1, 1, 0, stream, nullptr, nullptr),

benchmarks/cuda_bindings/benchmarks/cpp/bench_memory.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,11 @@ int main(int argc, char** argv) {
5252
uint8_t host_dst[COPY_SIZE] = {};
5353

5454
bench::BenchmarkSuite suite(options);
55+
// Drain the persistent stream after calibration so async benchmarks
56+
// (mem_alloc_async_free_async) don't start measurement on a backlogged stream.
57+
suite.set_post_calibrate([&]() {
58+
check_cu(cuStreamSynchronize(stream), "post-calibrate sync failed");
59+
});
5560

5661
// --- mem_alloc_free ---
5762
{

benchmarks/cuda_bindings/benchmarks/cpp/bench_stream.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ int main(int argc, char** argv) {
3838
check_cu(cuStreamCreate(&stream, CU_STREAM_NON_BLOCKING), "cuStreamCreate failed");
3939

4040
bench::BenchmarkSuite suite(options);
41+
// Drain the persistent stream after calibration for completeness.
42+
// stream_create_destroy uses a local stream, but stream_query/synchronize
43+
// observe the persistent one.
44+
suite.set_post_calibrate([&]() {
45+
check_cu(cuStreamSynchronize(stream), "post-calibrate sync failed");
46+
});
4147

4248
// --- stream_create_destroy ---
4349
{

0 commit comments

Comments
 (0)