Skip to content

Commit 3ad8389

Browse files
authored
cuda-core: add pixi examples env and fix runnable examples (NVIDIA#1718)
* cuda-core: add pixi examples env and fix example run regressions Add a dedicated Pixi `examples` environment with the optional runtime dependencies needed by the example suite and CUDA include-path activation. Also fix two runnable examples so they execute correctly under the new env. Made-with: Cursor * cuda-core: require pytorch-gpu in examples env Use the explicit `pytorch-gpu` metapackage for the examples environment so PyTorch resolves to a CUDA-enabled build by design. Made-with: Cursor * cuda-core: simplify strided memory CPU example flow Refactor the cffi CPU example into small helpers so setup/teardown is easier to follow and avoid global state for compiled symbols. Also direct progress prints to stderr to match example logging behavior. Made-with: Cursor * cuda-core: apply pre-commit simplification in CPU example Adopt ruff's SIM117 recommendation by collapsing nested context managers in the CPU strided memory example run path. Made-with: Cursor
1 parent 244738a commit 3ad8389

4 files changed

Lines changed: 3719 additions & 37 deletions

File tree

cuda_core/examples/jit_lto_fractal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ def __init__(self):
7676

7777
# Setup the launch configuration such that each thread will be generating one pixel, and subdivide
7878
# the problem into 16x16 chunks.
79-
self.grid = (self.width / 16, self.height / 16, 1.0)
79+
self.grid = (self.width // 16, self.height // 16, 1)
8080
self.block = (16, 16, 1)
8181
self.config = LaunchConfig(grid=self.grid, block=self.block)
8282

cuda_core/examples/strided_memory_view_cpu.py

Lines changed: 25 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -15,10 +15,10 @@
1515
# ################################################################################
1616

1717
import importlib
18-
import shutil
1918
import string
2019
import sys
2120
import tempfile
21+
from contextlib import contextmanager
2222

2323
try:
2424
from cffi import FFI
@@ -61,9 +61,7 @@
6161
# We assume the 0-th argument supports either DLPack or CUDA Array Interface (both
6262
# of which are supported by StridedMemoryView).
6363
@args_viewable_as_strided_memory((0,))
64-
def my_func(arr):
65-
global cpu_func
66-
global cpu_prog
64+
def my_func(arr, cpu_prog, cpu_func):
6765
# Create a memory view over arr (assumed to be a 1D array of int32). The stream
6866
# ordering is taken care of, so that arr can be safely accessed on our work
6967
# stream (ordered after a data stream on which arr is potentially prepared).
@@ -79,8 +77,7 @@ def my_func(arr):
7977
cpu_func(cpu_prog.cast("int*", view.ptr), size)
8078

8179

82-
def run():
83-
global my_func
80+
def _create_cpu_program():
8481
# Here is a concrete (very naive!) implementation on CPU:
8582
cpu_code = string.Template(r"""
8683
extern "C"
@@ -101,31 +98,36 @@ def run():
10198
source_extension=".cpp",
10299
extra_compile_args=["-std=c++11"],
103100
)
104-
temp_dir = tempfile.mkdtemp()
101+
return cpu_prog
102+
103+
104+
@contextmanager
105+
def _compiled_cpu_func(cpu_prog, temp_dir):
105106
saved_sys_path = sys.path.copy()
106107
try:
107108
cpu_prog.compile(tmpdir=temp_dir)
108-
109109
sys.path.append(temp_dir)
110110
cpu_func = getattr(importlib.import_module("_cpu_obj.lib"), func_name)
111+
yield cpu_func
112+
finally:
113+
sys.path = saved_sys_path
114+
# Ensure cffi modules are unloadable before removing the temp build dir.
115+
sys.modules.pop("_cpu_obj.lib", None)
116+
sys.modules.pop("_cpu_obj", None)
111117

112-
# Create input array on CPU
113-
arr_cpu = np.zeros(1024, dtype=np.int32)
114-
print(f"before: {arr_cpu[:10]=}")
115118

116-
# Run the workload
117-
my_func(arr_cpu)
119+
def _run_example(cpu_prog, cpu_func):
120+
arr_cpu = np.zeros(1024, dtype=np.int32)
121+
print(f"before: {arr_cpu[:10]=}", file=sys.stderr)
122+
my_func(arr_cpu, cpu_prog, cpu_func)
123+
print(f"after: {arr_cpu[:10]=}", file=sys.stderr)
124+
assert np.allclose(arr_cpu, np.arange(1024, dtype=np.int32))
118125

119-
# Check the result
120-
print(f"after: {arr_cpu[:10]=}")
121-
assert np.allclose(arr_cpu, np.arange(1024, dtype=np.int32))
122-
finally:
123-
sys.path = saved_sys_path
124-
# to allow FFI module to unload, we delete references to
125-
# to cpu_func
126-
del cpu_func, my_func
127-
# clean up temp directory
128-
shutil.rmtree(temp_dir)
126+
127+
def run():
128+
cpu_prog = _create_cpu_program()
129+
with tempfile.TemporaryDirectory() as temp_dir, _compiled_cpu_func(cpu_prog, temp_dir) as cpu_func:
130+
_run_example(cpu_prog, cpu_func)
129131

130132

131133
if __name__ == "__main__":

0 commit comments

Comments
 (0)