Skip to content

Commit 5a3452b

Browse files
committed
feat: move to a cuda example
Assisted-by: ClaudeCode:claude-opus-4.8 Signed-off-by: Henry Schreiner <henryfs@princeton.edu>
1 parent fec5995 commit 5a3452b

19 files changed

Lines changed: 273 additions & 77 deletions

File tree

.github/workflows/cibw-cuda.yaml

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
name: CIBW CUDA
2+
3+
on:
4+
workflow_dispatch:
5+
pull_request:
6+
branches:
7+
- master
8+
9+
concurrency:
10+
group: ${{ github.workflow }}-${{ github.ref }}
11+
cancel-in-progress: true
12+
13+
jobs:
14+
build:
15+
name: CUDA ${{ matrix.cuda-version }} wheels on ${{ matrix.target.arch }}
16+
runs-on: ${{ matrix.target.runner }}
17+
strategy:
18+
fail-fast: false
19+
matrix:
20+
# The CUDA Toolkit lives inside the custom manylinux images below, so
21+
# any runner works. See the list of available images at
22+
# https://github.com/pypa/cibuildwheel/pull/2896
23+
manylinux-base: [manylinux_2_28]
24+
cuda-version: [12_9, 13_1]
25+
target:
26+
- arch: x86_64
27+
runner: ubuntu-24.04
28+
- arch: aarch64
29+
runner: ubuntu-24.04-arm
30+
31+
steps:
32+
- uses: actions/checkout@v6
33+
34+
- uses: pypa/cibuildwheel@v3.4
35+
env:
36+
CIBW_BUILD: cp312-manylinux_${{ matrix.target.arch }}
37+
CIBW_MANYLINUX_X86_64_IMAGE: >-
38+
quay.io/manylinux_cuda/${{ matrix.manylinux-base }}_x86_64_cuda${{ matrix.cuda-version }}:latest
39+
CIBW_MANYLINUX_AARCH64_IMAGE: >-
40+
quay.io/manylinux_cuda/${{ matrix.manylinux-base }}_aarch64_cuda${{ matrix.cuda-version }}:latest
41+
42+
- uses: actions/upload-artifact@v7
43+
with:
44+
name: cibw-cuda-${{ matrix.manylinux-base }}-cuda${{ matrix.cuda-version }}-${{ matrix.target.arch }}
45+
path: wheelhouse/*.whl

.github/workflows/conda.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,7 @@ jobs:
3838
run: conda build conda.recipe
3939

4040
- name: Install
41-
run: conda install -c ${CONDA_PREFIX}/conda-bld/ scikit_build_example
41+
run: conda install -c ${CONDA_PREFIX}/conda-bld/ cuda_example
4242

4343
- name: Test
4444
run: pytest tests

.pre-commit-config.yaml

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -49,15 +49,6 @@ repos:
4949
- id: remove-tabs
5050
exclude: ^(docs)
5151

52-
# CMake formatting
53-
- repo: https://github.com/cheshirekow/cmake-format-precommit
54-
rev: v0.6.13
55-
hooks:
56-
- id: cmake-format
57-
additional_dependencies: [pyyaml]
58-
types: [file]
59-
files: (\.cmake|CMakeLists.txt)(.in)?$
60-
6152
# Suggested hook if you add a .clang-format file
6253
# - repo: https://github.com/pre-commit/mirrors-clang-format
6354
# rev: v13.0.0

CMakeLists.txt

Lines changed: 29 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
1-
# Require CMake 3.15+ (matching scikit-build-core) Use new versions of all
2-
# policies up to CMake 4.0
3-
cmake_minimum_required(VERSION 3.15...4.0)
1+
# Require CMake 3.18+ (matching scikit-build-core and supporting modern CUDA).
2+
# Use new versions of all policies up to CMake 4.0
3+
cmake_minimum_required(VERSION 3.18...4.0)
44

55
# Scikit-build-core sets these values for you, or you can just hard-code the
66
# name and version.
@@ -12,13 +12,37 @@ project(
1212
# Find pybind11 and Python
1313
find_package(pybind11 CONFIG REQUIRED)
1414

15+
# Enable CUDA if a compiler (nvcc) is available. This lets the example build
16+
# everywhere: with the CUDA Toolkit it compiles the real kernel, and without it
17+
# it falls back to a CPU implementation.
18+
include(CheckLanguage)
19+
check_language(CUDA)
20+
if(CMAKE_CUDA_COMPILER)
21+
enable_language(CUDA)
22+
message(STATUS "CUDA found: building the GPU implementation")
23+
set(ADD_SOURCE src/add.cu)
24+
else()
25+
message(STATUS "CUDA not found: building the CPU fallback")
26+
set(ADD_SOURCE src/add_cpu.cpp)
27+
endif()
28+
1529
# Add a library using FindPython's tooling (pybind11 also provides a helper like
1630
# this)
17-
python_add_library(_core MODULE src/main.cpp WITH_SOABI)
31+
python_add_library(_core MODULE src/main.cpp ${ADD_SOURCE} WITH_SOABI)
1832
target_link_libraries(_core PRIVATE pybind11::headers)
1933

2034
# This is passing in the version as a define just as an example
2135
target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
2236

37+
if(CMAKE_CUDA_COMPILER)
38+
# Build for all major architectures so the wheel is portable, and link the
39+
# CUDA runtime statically so the wheel does not depend on libcudart.
40+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
41+
set_property(TARGET _core PROPERTY CUDA_ARCHITECTURES all-major)
42+
endif()
43+
set_property(TARGET _core PROPERTY CUDA_RUNTIME_LIBRARY Static)
44+
target_compile_definitions(_core PRIVATE WITH_CUDA)
45+
endif()
46+
2347
# The install directory is the output (wheel) directory
24-
install(TARGETS _core DESTINATION scikit_build_example)
48+
install(TARGETS _core DESTINATION cuda_example)

README.md

Lines changed: 57 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# scikit_build_example
1+
# cuda_example
22

33
[![Gitter][gitter-badge]][gitter-link]
44

@@ -8,42 +8,78 @@
88
| pip builds | [![Pip Actions Status][actions-pip-badge]][actions-pip-link] |
99

1010

11-
An example project built with [pybind11][] and [scikit-build-core][]. Python
12-
3.9+ (see older commits for 3.7+, or even older versions of Python using [scikit-build (classic)][]).
11+
An example project built with [pybind11][], [CUDA][], and
12+
[scikit-build-core][]. Python 3.9+.
13+
14+
The extension exposes a tiny `add` function that runs on the GPU with a CUDA
15+
kernel when a device is available, and otherwise falls back to the CPU. The
16+
build is conditional: if the CUDA Toolkit (`nvcc`) is found at configure time,
17+
the real kernel in `src/add.cu` is compiled; otherwise the CPU implementation in
18+
`src/add_cpu.cpp` is used. This keeps the example buildable everywhere (macOS,
19+
Windows, PyPy, Pyodide, ...) while still demonstrating a real CUDA build on
20+
Linux.
1321

1422

1523
[gitter-badge]: https://badges.gitter.im/pybind/Lobby.svg
1624
[gitter-link]: https://gitter.im/pybind/Lobby
17-
[actions-badge]: https://github.com/pybind/scikit_build_example/workflows/Tests/badge.svg
18-
[actions-conda-link]: https://github.com/pybind/scikit_build_example/actions?query=workflow%3AConda
19-
[actions-conda-badge]: https://github.com/pybind/scikit_build_example/workflows/Conda/badge.svg
20-
[actions-pip-link]: https://github.com/pybind/scikit_build_example/actions?query=workflow%3APip
21-
[actions-pip-badge]: https://github.com/pybind/scikit_build_example/workflows/Pip/badge.svg
22-
[actions-wheels-link]: https://github.com/pybind/scikit_build_example/actions?query=workflow%3AWheels
23-
[actions-wheels-badge]: https://github.com/pybind/scikit_build_example/workflows/Wheels/badge.svg
25+
[actions-badge]: https://github.com/pybind/cuda_example/workflows/Tests/badge.svg
26+
[actions-conda-link]: https://github.com/pybind/cuda_example/actions?query=workflow%3AConda
27+
[actions-conda-badge]: https://github.com/pybind/cuda_example/workflows/Conda/badge.svg
28+
[actions-pip-link]: https://github.com/pybind/cuda_example/actions?query=workflow%3APip
29+
[actions-pip-badge]: https://github.com/pybind/cuda_example/workflows/Pip/badge.svg
30+
[actions-wheels-link]: https://github.com/pybind/cuda_example/actions?query=workflow%3AWheels
31+
[actions-wheels-badge]: https://github.com/pybind/cuda_example/workflows/Wheels/badge.svg
2432

2533
## Installation
2634

2735
- Clone this repository
28-
- `pip install ./scikit_build_example`
36+
- `pip install ./cuda_example`
37+
38+
If the CUDA Toolkit is installed, the GPU implementation is built automatically.
2939

3040
## Test call
3141

3242
```python
33-
import scikit_build_example
43+
import cuda_example
3444

35-
scikit_build_example.add(1, 2)
45+
cuda_example.add(1, 2) # 3 (on the GPU if one is available)
46+
cuda_example.cuda_available() # True if a CUDA device is visible at runtime
47+
cuda_example.WITH_CUDA # True if the wheel was compiled with CUDA
3648
```
3749

50+
## Building CUDA wheels
51+
52+
The default `Wheels` workflow builds the CPU fallback on every platform using
53+
[cibuildwheel][]. To build CUDA-enabled Linux wheels, the
54+
`.github/workflows/cibw-cuda.yaml` workflow points cibuildwheel at the custom
55+
manylinux images that ship the CUDA Toolkit (see
56+
[pypa/cibuildwheel#2896][cibw-cuda]):
57+
58+
```yaml
59+
- uses: pypa/cibuildwheel@v3.4
60+
env:
61+
CIBW_MANYLINUX_X86_64_IMAGE: quay.io/manylinux_cuda/manylinux_2_28_x86_64_cuda13_1:latest
62+
CIBW_MANYLINUX_AARCH64_IMAGE: quay.io/manylinux_cuda/manylinux_2_28_aarch64_cuda13_1:latest
63+
```
64+
65+
The CUDA runtime is linked statically (`CUDA_RUNTIME_LIBRARY Static`), so the
66+
resulting wheels do not depend on `libcudart` and remain importable on machines
67+
without a GPU (where `add` transparently falls back to the CPU). GitHub-hosted
68+
runners have no GPU, so the wheels are compiled and imported, but the kernel
69+
itself only runs on a machine with a CUDA device.
70+
3871
## Files
3972

4073
This example has several files that are a good idea, but aren't strictly
4174
necessary. The necessary files are:
4275

4376
* `pyproject.toml`: The Python project file
44-
* `CMakeLists.txt`: The CMake configuration file
45-
* `src/main.cpp`: The source file for the C++ build
46-
* `src/scikit_build_example/__init__.py`: The Python portion of the module. The root of the module needs to be `<package_name>`, `src/<package_name>`, or `python/<package_name>` to be auto-discovered.
77+
* `CMakeLists.txt`: The CMake configuration file, which conditionally enables CUDA
78+
* `src/main.cpp`: The pybind11 bindings
79+
* `src/add.cu`: The CUDA kernel implementation (built when CUDA is available)
80+
* `src/add_cpu.cpp`: The CPU fallback (built when CUDA is not available)
81+
* `src/add.h`: The shared declarations
82+
* `src/cuda_example/__init__.py`: The Python portion of the module. The root of the module needs to be `<package_name>`, `src/<package_name>`, or `python/<package_name>` to be auto-discovered.
4783

4884
These files are also expected and highly recommended:
4985

@@ -71,9 +107,10 @@ choice.
71107

72108
### CI Examples
73109

74-
There are examples for CI in `.github/workflows`. A simple way to produces
110+
There are examples for CI in `.github/workflows`. A simple way to produce
75111
binary "wheels" for all platforms is illustrated in the "wheels.yml" file,
76-
using [cibuildwheel][].
112+
using [cibuildwheel][]. The "cibw-cuda.yaml" file shows how to build
113+
CUDA-enabled wheels on Linux.
77114

78115
## License
79116

@@ -82,11 +119,12 @@ file. By using, distributing, or contributing to this project, you agree to the
82119
terms and conditions of this license.
83120

84121
[cibuildwheel]: https://cibuildwheel.readthedocs.io
122+
[cibw-cuda]: https://github.com/pypa/cibuildwheel/pull/2896
123+
[cuda]: https://developer.nvidia.com/cuda-toolkit
85124
[scientific-python development guide]: https://learn.scientific-python.org/development
86125
[dependabot]: https://docs.github.com/en/code-security/dependabot
87126
[github actions]: https://docs.github.com/en/actions
88127
[pre-commit]: https://pre-commit.com
89128
[nox]: https://nox.thea.codes
90129
[pybind11]: https://pybind11.readthedocs.io
91130
[scikit-build-core]: https://scikit-build-core.readthedocs.io
92-
[scikit-build (classic)]: https://scikit-build.readthedocs.io

conda.recipe/meta.yaml

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
package:
2-
name: scikit_build_example
2+
name: cuda_example
33
version: 0.0.1
44

55
source:
@@ -22,15 +22,15 @@ requirements:
2222
- python
2323
- pip
2424
- scikit-build-core
25-
- pybind11 >=2.10.0
25+
- pybind11 >=3.0.0
2626

2727
run:
2828
- python
2929

3030

3131
test:
3232
imports:
33-
- scikit_build_example
33+
- cuda_example
3434
requires:
3535
- pytest
3636
source_files:
@@ -39,5 +39,5 @@ test:
3939
- pytest tests
4040

4141
about:
42-
summary: An example project built with pybind11 and scikit-build.
42+
summary: An example project built with pybind11, CUDA, and scikit-build-core.
4343
license_file: LICENSE

docs/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,7 @@
4949
master_doc = 'index'
5050

5151
# General information about the project.
52-
project = 'python_example'
52+
project = 'cuda_example'
5353
copyright = '2016, Sylvain Corlay'
5454
author = 'Sylvain Corlay'
5555

docs/index.rst

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
python_example Documentation
2-
============================
1+
cuda_example Documentation
2+
==========================
33

44
Contents:
55

docs/python_example.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1 @@
1-
.. automodule:: scikit_build_example
1+
.. automodule:: cuda_example

pyproject.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@ build-backend = "scikit_build_core.build"
44

55

66
[project]
7-
name = "scikit_build_example"
7+
name = "cuda_example"
88
version = "0.0.1"
99
license = "MIT"
1010
license-files = ["LICENSE"]
11-
description="A minimal example package (with pybind11)"
11+
description="A minimal example package (with pybind11 and CUDA)"
1212
readme = "README.md"
1313
authors = [
1414
{ name = "My Name", email = "me@email.com" },

0 commit comments

Comments
 (0)