Skip to content

Commit f9e67a7

Browse files
committed
refactor: make this a pure CUDA example
Remove the CPU fallback so the package is a pure CUDA example: building now hard-requires the CUDA Toolkit (CMake declares CUDA as a required language). - add/subtract are both CUDA kernels; cuda_available() queries the device - drop src/add_cpu.cpp, the WITH_CUDA flag, and the conditional CMake path - skip the GPU tests when no device is present - remove the non-CUDA CI/config (conda, Android/iOS/Pyodide cross-compile) - fold CUDA wheel building into wheels.yml; pip.yml is a build-and-import check - link the CUDA runtime statically so wheels don't depend on libcudart Assisted-by: ClaudeCode:claude-opus-4.8
1 parent 5a3452b commit f9e67a7

18 files changed

Lines changed: 167 additions & 346 deletions

.github/workflows/cibw-cc.yaml

Lines changed: 0 additions & 65 deletions
This file was deleted.

.github/workflows/cibw-cuda.yaml

Lines changed: 0 additions & 45 deletions
This file was deleted.

.github/workflows/conda.yml

Lines changed: 0 additions & 44 deletions
This file was deleted.

.github/workflows/pip.yml

Lines changed: 18 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,25 +8,31 @@ on:
88
- master
99

1010
jobs:
11-
build:
12-
name: Build with Pip
13-
runs-on: ${{ matrix.platform }}
11+
# Build the CUDA extension inside the manylinux images that ship the CUDA
12+
# Toolkit (see https://github.com/pypa/cibuildwheel/pull/2896). GitHub's
13+
# runners have no GPU, so this compiles the kernels and imports the module;
14+
# the GPU tests are skipped (`cuda_available()` is False).
15+
cuda:
16+
name: Build with CUDA (${{ matrix.arch }})
17+
runs-on: ${{ matrix.runner }}
18+
container: quay.io/manylinux_cuda/manylinux_2_28_${{ matrix.arch }}_cuda13_1:latest
1419
strategy:
1520
fail-fast: false
1621
matrix:
17-
platform: [windows-latest, macos-latest, ubuntu-latest]
18-
python-version: ["3.9", "3.14", "pypy-3.10"]
22+
include:
23+
- arch: x86_64
24+
runner: ubuntu-24.04
25+
- arch: aarch64
26+
runner: ubuntu-24.04-arm
1927

2028
steps:
2129
- uses: actions/checkout@v6
2230

23-
- uses: actions/setup-python@v6
24-
with:
25-
python-version: ${{ matrix.python-version }}
26-
allow-prereleases: true
27-
2831
- name: Build and install
29-
run: pip install --verbose . --group test
32+
run: |
33+
/opt/python/cp312-cp312/bin/python -m venv /tmp/venv
34+
/tmp/venv/bin/pip install --upgrade pip
35+
/tmp/venv/bin/pip install --verbose . --group test
3036
3137
- name: Test
32-
run: pytest
38+
run: /tmp/venv/bin/pytest

.github/workflows/wheels.yml

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@ jobs:
2323
runs-on: ubuntu-latest
2424
steps:
2525
- uses: actions/checkout@v6
26-
with:
27-
submodules: true
2826

2927
- name: Build SDist
3028
run: pipx run build --sdist
@@ -39,29 +37,42 @@ jobs:
3937

4038

4139
build_wheels:
42-
name: Wheels on ${{ matrix.os }}
43-
runs-on: ${{ matrix.os }}
40+
name: Wheels on ${{ matrix.arch }}
41+
runs-on: ${{ matrix.runner }}
4442
strategy:
4543
fail-fast: false
4644
matrix:
47-
os: [ubuntu-latest, macos-15-intel, macos-latest, windows-latest, ubuntu-24.04-arm, windows-11-arm]
45+
include:
46+
- arch: x86_64
47+
runner: ubuntu-24.04
48+
- arch: aarch64
49+
runner: ubuntu-24.04-arm
50+
51+
# The CUDA Toolkit ships inside these custom manylinux images, so nvcc is
52+
# available during the build (see https://github.com/pypa/cibuildwheel/pull/2896).
53+
# Bump the CUDA version here; to publish wheels for several CUDA versions you
54+
# would also need distinct local version labels (e.g. 0.0.1+cu131).
55+
env:
56+
CUDA_VERSION: "13_1"
57+
MANYLINUX_BASE: manylinux_2_28
4858

4959
steps:
5060
- uses: actions/checkout@v6
51-
with:
52-
submodules: true
53-
54-
- uses: astral-sh/setup-uv@v8.0.0
5561

5662
- uses: pypa/cibuildwheel@v3.4
63+
env:
64+
CIBW_MANYLINUX_X86_64_IMAGE: >-
65+
quay.io/manylinux_cuda/${{ env.MANYLINUX_BASE }}_x86_64_cuda${{ env.CUDA_VERSION }}:latest
66+
CIBW_MANYLINUX_AARCH64_IMAGE: >-
67+
quay.io/manylinux_cuda/${{ env.MANYLINUX_BASE }}_aarch64_cuda${{ env.CUDA_VERSION }}:latest
5768
5869
- name: Verify clean directory
5970
run: git diff --exit-code
6071
shell: bash
6172

6273
- uses: actions/upload-artifact@v7
6374
with:
64-
name: cibw-wheels-${{ matrix.os }}
75+
name: cibw-wheels-${{ matrix.arch }}
6576
path: wheelhouse/*.whl
6677

6778

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ parts/
2222
sdist/
2323
var/
2424
wheels/
25+
wheelhouse/
2526
share/python-wheels/
2627
*.egg-info/
2728
.installed.cfg

.pre-commit-config.yaml

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ repos:
2626
- id: check-merge-conflict
2727
- id: check-symlinks
2828
- id: check-yaml
29-
exclude: ^conda\.recipe/meta\.yaml$
3029
- id: debug-statements
3130
- id: end-of-file-fixer
3231
- id: mixed-line-ending

CMakeLists.txt

Lines changed: 10 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -3,46 +3,30 @@
33
cmake_minimum_required(VERSION 3.18...4.0)
44

55
# Scikit-build-core sets these values for you, or you can just hard-code the
6-
# name and version.
6+
# name and version. CUDA is a required language: configuration fails without a
7+
# CUDA compiler (nvcc).
78
project(
89
${SKBUILD_PROJECT_NAME}
910
VERSION ${SKBUILD_PROJECT_VERSION}
10-
LANGUAGES CXX)
11+
LANGUAGES CXX CUDA)
1112

1213
# Find pybind11 and Python
1314
find_package(pybind11 CONFIG REQUIRED)
1415

15-
# Enable CUDA if a compiler (nvcc) is available. This lets the example build
16-
# everywhere: with the CUDA Toolkit it compiles the real kernel, and without it
17-
# it falls back to a CPU implementation.
18-
include(CheckLanguage)
19-
check_language(CUDA)
20-
if(CMAKE_CUDA_COMPILER)
21-
enable_language(CUDA)
22-
message(STATUS "CUDA found: building the GPU implementation")
23-
set(ADD_SOURCE src/add.cu)
24-
else()
25-
message(STATUS "CUDA not found: building the CPU fallback")
26-
set(ADD_SOURCE src/add_cpu.cpp)
27-
endif()
28-
2916
# Add a library using FindPython's tooling (pybind11 also provides a helper like
30-
# this)
31-
python_add_library(_core MODULE src/main.cpp ${ADD_SOURCE} WITH_SOABI)
17+
# this), combining the pybind11 bindings with the CUDA kernels.
18+
python_add_library(_core MODULE src/main.cpp src/add.cu WITH_SOABI)
3219
target_link_libraries(_core PRIVATE pybind11::headers)
3320

3421
# This is passing in the version as a define just as an example
3522
target_compile_definitions(_core PRIVATE VERSION_INFO=${PROJECT_VERSION})
3623

37-
if(CMAKE_CUDA_COMPILER)
38-
# Build for all major architectures so the wheel is portable, and link the
39-
# CUDA runtime statically so the wheel does not depend on libcudart.
40-
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
41-
set_property(TARGET _core PROPERTY CUDA_ARCHITECTURES all-major)
42-
endif()
43-
set_property(TARGET _core PROPERTY CUDA_RUNTIME_LIBRARY Static)
44-
target_compile_definitions(_core PRIVATE WITH_CUDA)
24+
# Build for all major architectures so the wheel is portable, and link the CUDA
25+
# runtime statically so the wheel does not depend on libcudart.
26+
if(NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
27+
set_property(TARGET _core PROPERTY CUDA_ARCHITECTURES all-major)
4528
endif()
29+
set_property(TARGET _core PROPERTY CUDA_RUNTIME_LIBRARY Static)
4630

4731
# The install directory is the output (wheel) directory
4832
install(TARGETS _core DESTINATION cuda_example)

0 commit comments

Comments
 (0)