|
| 1 | +# SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. |
| 2 | +# |
| 3 | +# SPDX-License-Identifier: Apache-2.0 |
| 4 | + |
| 5 | +# ################################################################################ |
| 6 | +# |
| 7 | +# This example demonstrates the newer memory-pool APIs by combining |
| 8 | +# PinnedMemoryResource, ManagedMemoryResource, and GraphMemoryResource in one |
| 9 | +# workflow. |
| 10 | +# |
| 11 | +# ################################################################################ |
| 12 | + |
| 13 | +# /// script |
| 14 | +# dependencies = ["cuda_bindings", "cuda_core", "nvidia-cuda-nvrtc", "numpy>=2.1"] |
| 15 | +# /// |
| 16 | + |
| 17 | +import sys |
| 18 | + |
| 19 | +import numpy as np |
| 20 | + |
| 21 | +from cuda.core import ( |
| 22 | + Device, |
| 23 | + GraphMemoryResource, |
| 24 | + LaunchConfig, |
| 25 | + ManagedMemoryResource, |
| 26 | + ManagedMemoryResourceOptions, |
| 27 | + PinnedMemoryResource, |
| 28 | + PinnedMemoryResourceOptions, |
| 29 | + Program, |
| 30 | + ProgramOptions, |
| 31 | + launch, |
| 32 | +) |
| 33 | + |
| 34 | +code = """ |
| 35 | +extern "C" __global__ void scale_and_bias(float* data, size_t size, float scale, float bias) { |
| 36 | + const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x; |
| 37 | + const unsigned int stride = blockDim.x * gridDim.x; |
| 38 | + for (size_t i = tid; i < size; i += stride) { |
| 39 | + data[i] = data[i] * scale + bias; |
| 40 | + } |
| 41 | +} |
| 42 | +""" |
| 43 | + |
| 44 | + |
| 45 | +def main(): |
| 46 | + if np.lib.NumpyVersion(np.__version__) < "2.1.0": |
| 47 | + print("This example requires NumPy 2.1.0 or later", file=sys.stderr) |
| 48 | + sys.exit(1) |
| 49 | + |
| 50 | + device = Device() |
| 51 | + device.set_current() |
| 52 | + stream = device.create_stream() |
| 53 | + |
| 54 | + managed_mr = None |
| 55 | + pinned_mr = None |
| 56 | + graph_mr = None |
| 57 | + managed_buffer = None |
| 58 | + pinned_buffer = None |
| 59 | + graph_capture = None |
| 60 | + graph = None |
| 61 | + |
| 62 | + try: |
| 63 | + options = ProgramOptions(std="c++17", arch=f"sm_{device.arch}") |
| 64 | + program = Program(code, code_type="c++", options=options) |
| 65 | + module = program.compile("cubin") |
| 66 | + kernel = module.get_kernel("scale_and_bias") |
| 67 | + |
| 68 | + size = 256 |
| 69 | + dtype = np.float32 |
| 70 | + nbytes = size * dtype().itemsize |
| 71 | + config = LaunchConfig(grid=(size + 127) // 128, block=128) |
| 72 | + |
| 73 | + managed_options = ManagedMemoryResourceOptions( |
| 74 | + preferred_location=device.device_id, |
| 75 | + preferred_location_type="device", |
| 76 | + ) |
| 77 | + managed_mr = ManagedMemoryResource(options=managed_options) |
| 78 | + |
| 79 | + pinned_options = {"ipc_enabled": False} |
| 80 | + host_numa_id = getattr(device.properties, "host_numa_id", -1) |
| 81 | + if host_numa_id >= 0: |
| 82 | + pinned_options["numa_id"] = host_numa_id |
| 83 | + pinned_mr = PinnedMemoryResource(options=PinnedMemoryResourceOptions(**pinned_options)) |
| 84 | + |
| 85 | + graph_mr = GraphMemoryResource(device) |
| 86 | + |
| 87 | + managed_buffer = managed_mr.allocate(nbytes, stream=stream) |
| 88 | + pinned_buffer = pinned_mr.allocate(nbytes, stream=stream) |
| 89 | + |
| 90 | + managed_array = np.from_dlpack(managed_buffer).view(np.float32) |
| 91 | + pinned_array = np.from_dlpack(pinned_buffer).view(np.float32) |
| 92 | + |
| 93 | + managed_array[:] = np.arange(size, dtype=dtype) |
| 94 | + managed_original = managed_array.copy() |
| 95 | + stream.sync() |
| 96 | + |
| 97 | + managed_buffer.copy_to(pinned_buffer, stream=stream) |
| 98 | + stream.sync() |
| 99 | + assert np.array_equal(pinned_array, managed_original) |
| 100 | + |
| 101 | + graph_builder = device.create_graph_builder().begin_building("relaxed") |
| 102 | + scratch_buffer = graph_mr.allocate(nbytes, stream=graph_builder) |
| 103 | + scratch_buffer.copy_from(managed_buffer, stream=graph_builder) |
| 104 | + launch(graph_builder, config, kernel, scratch_buffer, np.uint64(size), np.float32(2.0), np.float32(1.0)) |
| 105 | + managed_buffer.copy_from(scratch_buffer, stream=graph_builder) |
| 106 | + scratch_buffer.close() |
| 107 | + |
| 108 | + graph_capture = graph_builder.end_building() |
| 109 | + graph = graph_capture.complete() |
| 110 | + graph.upload(stream) |
| 111 | + graph.launch(stream) |
| 112 | + stream.sync() |
| 113 | + |
| 114 | + np.testing.assert_allclose(managed_array, managed_original * 2 + 1) |
| 115 | + managed_buffer.copy_to(pinned_buffer, stream=stream) |
| 116 | + stream.sync() |
| 117 | + np.testing.assert_allclose(pinned_array, managed_original * 2 + 1) |
| 118 | + |
| 119 | + print(f"PinnedMemoryResource numa_id: {pinned_mr.numa_id}") |
| 120 | + print(f"ManagedMemoryResource preferred_location: {managed_mr.preferred_location}") |
| 121 | + print(f"GraphMemoryResource reserved high watermark: {graph_mr.attributes.reserved_mem_high}") |
| 122 | + finally: |
| 123 | + if graph is not None: |
| 124 | + graph.close() |
| 125 | + if graph_capture is not None: |
| 126 | + graph_capture.close() |
| 127 | + if pinned_buffer is not None: |
| 128 | + pinned_buffer.close(stream) |
| 129 | + if managed_buffer is not None: |
| 130 | + managed_buffer.close(stream) |
| 131 | + if graph_mr is not None: |
| 132 | + graph_mr.close() |
| 133 | + if pinned_mr is not None: |
| 134 | + pinned_mr.close() |
| 135 | + if managed_mr is not None: |
| 136 | + managed_mr.close() |
| 137 | + stream.close() |
| 138 | + |
| 139 | + |
| 140 | +if __name__ == "__main__": |
| 141 | + main() |
0 commit comments