Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/test-wheel-linux.yml
Original file line number Diff line number Diff line change
Expand Up @@ -100,8 +100,8 @@ jobs:
uses: ./.github/actions/install_unix_deps
continue-on-error: false
with:
# for artifact fetching, graphics libs
dependencies: "jq wget libgl1 libegl1"
# for artifact fetching, graphics libs, g++ required for cffi in example
dependencies: "jq wget libgl1 libegl1 g++"
dependent_exes: "jq wget"

- name: Set environment variables
Expand Down
9 changes: 8 additions & 1 deletion cuda_core/examples/cuda_graphs.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,6 +10,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///

import sys
import time

Expand Down Expand Up @@ -121,6 +125,9 @@ def main():
end_time = time.time()

graph_execution_time = end_time - start_time
if graph_execution_time == 0.0:
print("Graph execution time is too fast to measure accurately.")
graph_execution_time = 1e-9 # Assign a small value to avoid division by zero in speedup calculation
print(f"Graph execution time: {graph_execution_time:.6f} seconds")

# Verify results
Expand Down
7 changes: 4 additions & 3 deletions cuda_core/examples/gl_interop_plasma.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,10 @@
# effect popular in the demoscene). The window title shows the current FPS.
# Close the window or press Escape to exit.
#
# Requirements
# ============
# pip install pyglet

# /// script
# dependencies = ["cuda_bindings", "cuda_core>0.6.0", "pyglet"]
# ///

import ctypes
import sys
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/jit_lto_fractal.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -12,6 +12,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///

import argparse
import sys

Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/memory_ops.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,6 +10,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///

import sys

import cupy as cp
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/pytorch_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "torch"]
# ///

import sys

import torch
Expand Down
11 changes: 10 additions & 1 deletion cuda_core/examples/saxpy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,8 +10,17 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///


import sys

from cuda import pathfinder

print(pathfinder.load_nvidia_dynamic_lib("nvrtc"))

import cupy as cp

from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/show_device_properties.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core"]
# ///

import sys

from cuda.core import Device, system
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/simple_multi_gpu_example.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "cupy-cuda13x"]
# ///

import sys

import cupy as cp
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/strided_memory_view_cpu.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "cffi", "setuptools"]
# ///

import importlib
import string
import sys
Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/strided_memory_view_gpu.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///

import string
import sys

Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/thread_block_cluster.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -10,6 +10,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core"]
# ///

import os
import sys

Expand Down
4 changes: 4 additions & 0 deletions cuda_core/examples/tma_tensor_map.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core>0.6.0", "cupy-cuda13x"]
# ///

import os
import sys

Expand Down
6 changes: 5 additions & 1 deletion cuda_core/examples/vector_add.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# SPDX-License-Identifier: Apache-2.0

Expand All @@ -9,6 +9,10 @@
#
# ################################################################################

# /// script
# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "cupy-cuda13x"]
# ///

import cupy as cp

from cuda.core import Device, LaunchConfig, Program, ProgramOptions, launch
Expand Down
2 changes: 1 addition & 1 deletion cuda_core/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ cu12 = ["cuda-bindings[all]==12.*"]
cu13 = ["cuda-bindings[all]==13.*"]

[dependency-groups]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-benchmark", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures", "cloudpickle", "psutil"]
test = ["cython>=3.2,<3.3", "setuptools", "pytest>=6.2.4", "pytest-benchmark", "pytest-randomly", "pytest-repeat", "pytest-rerunfailures", "cloudpickle", "psutil", "cffi"]
ml-dtypes = ["ml-dtypes>=0.5.4,<0.6.0"]
test-cu12 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda12x; python_version < '3.14'", "cuda-toolkit[cudart]==12.*"] # runtime headers needed by CuPy
test-cu13 = [ {include-group = "ml-dtypes" }, {include-group = "test" }, "cupy-cuda13x; python_version < '3.14'", "cuda-toolkit[cudart]==13.*"] # runtime headers needed by CuPy
Expand Down
103 changes: 94 additions & 9 deletions cuda_core/tests/example_tests/test_basic_examples.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,109 @@
# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

# If we have subcategories of examples in the future, this file can be split along those lines

import glob
import importlib.metadata
import os
import platform
import re
import subprocess
import sys

import pytest

from cuda.core import Device
from cuda.core import Device, system


def has_compute_capability_9_or_higher() -> bool:
return Device().compute_capability >= (9, 0)


def has_multiple_devices() -> bool:
return system.get_num_devices() >= 2


def has_display() -> bool:
# We assume that we don't want to open any windows during testing,
# so we always return False
return False


def is_not_windows() -> bool:
return sys.platform != "win32"


def is_x86_64() -> bool:
return platform.machine() == "x86_64"


def has_cuda_path() -> bool:
return os.environ.get("CUDA_PATH", os.environ.get("CUDA_HOME")) is not None


# Specific system requirements for each of the examples.


SYSTEM_REQUIREMENTS = {
"gl_interop_plasma.py": has_display,
"pytorch_example.py": lambda: (
has_compute_capability_9_or_higher() and is_x86_64()
), # PyTorch only provides CUDA support for x86_64
"saxpy.py": has_compute_capability_9_or_higher,
"simple_multi_gpu_example.py": has_multiple_devices,
"strided_memory_view_cpu.py": is_not_windows,
"thread_block_cluster.py": lambda: has_compute_capability_9_or_higher() and has_cuda_path(),
"tma_tensor_map.py": has_cuda_path,
}

from .utils import run_example

samples_path = os.path.join(os.path.dirname(__file__), "..", "..", "examples")
sample_files = glob.glob(samples_path + "**/*.py", recursive=True)
sample_files = [os.path.basename(x) for x in glob.glob(samples_path + "**/*.py", recursive=True)]


def has_package_requirements_or_skip(example):
with open(example, encoding="utf-8") as f:
content = f.read()

# The canonical regex as defined in PEP 723
pep723 = re.search(r"(?m)^# /// (?P<type>[a-zA-Z0-9-]+)$\s(?P<content>(^#(| .*)$\s)+)^# ///$", content)
if not pep723:
return
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could/should we enforce here that the PEP 723 information is available?


metadata = {}
for line in pep723.group("content").splitlines():
line = line.lstrip("# ").rstrip()
if not line:
continue
key, value = line.split("=", 1)
key = key.strip()
value = value.strip()
metadata[key] = value

if "dependencies" in metadata:
dependencies = eval(metadata["dependencies"]) # noqa: S307
for dependency in dependencies:
name = re.match("[a-zA-Z0-9_-]+", dependency)
try:
importlib.metadata.distribution(name.string)
except importlib.metadata.PackageNotFoundError:
pytest.skip(f"Skipping {example} due to missing package requirement: {name}")
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

To avoid multiple roundtrips adding missing packages: Maybe collect all names here and move the pytest.skip after the loop, showing all names together?



@pytest.mark.parametrize("example", sample_files)
class TestExamples:
def test_example(self, example, deinit_cuda):
run_example(samples_path, example)
if Device().device_id != 0:
Device(0).set_current()
def test_example(example):
example_path = os.path.join(samples_path, example)
has_package_requirements_or_skip(example_path)

system_requirement = SYSTEM_REQUIREMENTS.get(example, lambda: True)
if not system_requirement():
pytest.skip(f"Skipping {example} due to unmet system requirement")

process = subprocess.run([sys.executable, example_path], capture_output=True) # noqa: S603
if process.returncode != 0:
if process.stdout:
print(process.stdout.decode())
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

.decode(error="replace") to minimize distraction from the primary error

also below

if process.stderr:
print(process.stderr.decode(), file=sys.stderr)
raise AssertionError(f"`{example}` failed ({process.returncode})")
Loading
Loading