Skip to content

Commit 4a7c9f0

Browse files
committed
refactor(examples): wrap logic in main() functions
1 parent 0bd28cc commit 4a7c9f0

File tree

10 files changed

+597
-586
lines changed

10 files changed

+597
-586
lines changed

cuda_core/examples/jit_lto_fractal.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -294,8 +294,8 @@ def main():
294294
axs.set_title(title)
295295
axs.axis("off")
296296
plt.show()
297+
print("done!")
297298

298299

299300
if __name__ == "__main__":
300301
main()
301-
print("done!")

cuda_core/examples/memory_ops.py

Lines changed: 111 additions & 107 deletions
Original file line numberDiff line numberDiff line change
@@ -25,111 +25,115 @@
2525
launch,
2626
)
2727

28-
if np.__version__ < "2.1.0":
29-
print("This example requires NumPy 2.1.0 or later", file=sys.stderr)
30-
sys.exit(0)
31-
32-
# Kernel for memory operations
33-
code = """
34-
extern "C"
35-
__global__ void memory_ops(float* device_data,
36-
float* pinned_data,
37-
size_t N) {
38-
const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
39-
if (tid < N) {
40-
// Access device memory
41-
device_data[tid] = device_data[tid] + 1.0f;
42-
43-
// Access pinned memory (zero-copy from GPU)
44-
pinned_data[tid] = pinned_data[tid] * 3.0f;
28+
29+
def main():
30+
if np.__version__ < "2.1.0":
31+
print("This example requires NumPy 2.1.0 or later", file=sys.stderr)
32+
sys.exit(0)
33+
34+
# Kernel for memory operations
35+
code = """
36+
extern "C"
37+
__global__ void memory_ops(float* device_data, float* pinned_data, size_t N) {
38+
const unsigned int tid = threadIdx.x + blockIdx.x * blockDim.x;
39+
if (tid < N) {
40+
// Access device memory
41+
device_data[tid] = device_data[tid] + 1.0f;
42+
43+
// Access pinned memory (zero-copy from GPU)
44+
pinned_data[tid] = pinned_data[tid] * 3.0f;
45+
}
4546
}
46-
}
47-
"""
48-
49-
dev = Device()
50-
dev.set_current()
51-
stream = dev.create_stream()
52-
# tell CuPy to use our stream as the current stream:
53-
cp.cuda.ExternalStream(int(stream.handle)).use()
54-
55-
# Compile kernel
56-
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
57-
prog = Program(code, code_type="c++", options=program_options)
58-
mod = prog.compile("cubin")
59-
kernel = mod.get_kernel("memory_ops")
60-
61-
# Create different memory resources
62-
device_mr = dev.memory_resource
63-
pinned_mr = LegacyPinnedMemoryResource()
64-
65-
# Allocate different types of memory
66-
size = 1024
67-
dtype = cp.float32
68-
element_size = dtype().itemsize
69-
total_size = size * element_size
70-
71-
# 1. Device Memory (GPU-only)
72-
device_buffer = device_mr.allocate(total_size, stream=stream)
73-
device_array = cp.from_dlpack(device_buffer).view(dtype=dtype)
74-
75-
# 2. Pinned Memory (CPU memory, GPU accessible)
76-
pinned_buffer = pinned_mr.allocate(total_size, stream=stream)
77-
pinned_array = np.from_dlpack(pinned_buffer).view(dtype=dtype)
78-
79-
# Initialize data
80-
rng = cp.random.default_rng()
81-
device_array[:] = rng.random(size, dtype=dtype)
82-
pinned_array[:] = rng.random(size, dtype=dtype).get()
83-
84-
# Store original values for verification
85-
device_original = device_array.copy()
86-
pinned_original = pinned_array.copy()
87-
88-
# Sync before kernel launch
89-
stream.sync()
90-
91-
# Launch kernel
92-
block = 256
93-
grid = (size + block - 1) // block
94-
config = LaunchConfig(grid=grid, block=block)
95-
96-
launch(stream, config, kernel, device_buffer, pinned_buffer, cp.uint64(size))
97-
stream.sync()
98-
99-
# Verify kernel operations
100-
assert cp.allclose(device_array, device_original + 1.0), "Device memory operation failed"
101-
assert cp.allclose(pinned_array, pinned_original * 3.0), "Pinned memory operation failed"
102-
103-
# Copy data between different memory types
104-
print("\nCopying data between memory types...")
105-
106-
# Copy from device to pinned memory
107-
device_buffer.copy_to(pinned_buffer, stream=stream)
108-
stream.sync()
109-
110-
# Verify the copy operation
111-
assert cp.allclose(pinned_array, device_array), "Device to pinned copy failed"
112-
113-
# Create a new device buffer and copy from pinned
114-
new_device_buffer = device_mr.allocate(total_size, stream=stream)
115-
new_device_array = cp.from_dlpack(new_device_buffer).view(dtype=dtype)
116-
117-
pinned_buffer.copy_to(new_device_buffer, stream=stream)
118-
stream.sync()
119-
120-
# Verify the copy operation
121-
assert cp.allclose(new_device_array, pinned_array), "Pinned to device copy failed"
122-
123-
# Clean up
124-
device_buffer.close(stream)
125-
pinned_buffer.close(stream)
126-
new_device_buffer.close(stream)
127-
stream.close()
128-
cp.cuda.Stream.null.use() # reset CuPy's current stream to the null stream
129-
130-
# Verify buffers are properly closed
131-
assert device_buffer.handle == 0, "Device buffer should be closed"
132-
assert pinned_buffer.handle == 0, "Pinned buffer should be closed"
133-
assert new_device_buffer.handle == 0, "New device buffer should be closed"
134-
135-
print("Memory management example completed!")
47+
"""
48+
49+
dev = Device()
50+
dev.set_current()
51+
stream = dev.create_stream()
52+
# tell CuPy to use our stream as the current stream:
53+
cp.cuda.ExternalStream(int(stream.handle)).use()
54+
55+
# Compile kernel
56+
program_options = ProgramOptions(std="c++17", arch=f"sm_{dev.arch}")
57+
prog = Program(code, code_type="c++", options=program_options)
58+
mod = prog.compile("cubin")
59+
kernel = mod.get_kernel("memory_ops")
60+
61+
# Create different memory resources
62+
device_mr = dev.memory_resource
63+
pinned_mr = LegacyPinnedMemoryResource()
64+
65+
# Allocate different types of memory
66+
size = 1024
67+
dtype = cp.float32
68+
element_size = dtype().itemsize
69+
total_size = size * element_size
70+
71+
# 1. Device Memory (GPU-only)
72+
device_buffer = device_mr.allocate(total_size, stream=stream)
73+
device_array = cp.from_dlpack(device_buffer).view(dtype=dtype)
74+
75+
# 2. Pinned Memory (CPU memory, GPU accessible)
76+
pinned_buffer = pinned_mr.allocate(total_size, stream=stream)
77+
pinned_array = np.from_dlpack(pinned_buffer).view(dtype=dtype)
78+
79+
# Initialize data
80+
rng = cp.random.default_rng()
81+
device_array[:] = rng.random(size, dtype=dtype)
82+
pinned_array[:] = rng.random(size, dtype=dtype).get()
83+
84+
# Store original values for verification
85+
device_original = device_array.copy()
86+
pinned_original = pinned_array.copy()
87+
88+
# Sync before kernel launch
89+
stream.sync()
90+
91+
# Launch kernel
92+
block = 256
93+
grid = (size + block - 1) // block
94+
config = LaunchConfig(grid=grid, block=block)
95+
96+
launch(stream, config, kernel, device_buffer, pinned_buffer, cp.uint64(size))
97+
stream.sync()
98+
99+
# Verify kernel operations
100+
assert cp.allclose(device_array, device_original + 1.0), "Device memory operation failed"
101+
assert cp.allclose(pinned_array, pinned_original * 3.0), "Pinned memory operation failed"
102+
103+
# Copy data between different memory types
104+
print("\nCopying data between memory types...")
105+
106+
# Copy from device to pinned memory
107+
device_buffer.copy_to(pinned_buffer, stream=stream)
108+
stream.sync()
109+
110+
# Verify the copy operation
111+
assert cp.allclose(pinned_array, device_array), "Device to pinned copy failed"
112+
113+
# Create a new device buffer and copy from pinned
114+
new_device_buffer = device_mr.allocate(total_size, stream=stream)
115+
new_device_array = cp.from_dlpack(new_device_buffer).view(dtype=dtype)
116+
117+
pinned_buffer.copy_to(new_device_buffer, stream=stream)
118+
stream.sync()
119+
120+
# Verify the copy operation
121+
assert cp.allclose(new_device_array, pinned_array), "Pinned to device copy failed"
122+
123+
# Clean up
124+
device_buffer.close(stream)
125+
pinned_buffer.close(stream)
126+
new_device_buffer.close(stream)
127+
stream.close()
128+
cp.cuda.Stream.null.use() # reset CuPy's current stream to the null stream
129+
130+
# Verify buffers are properly closed
131+
assert device_buffer.handle == 0, "Device buffer should be closed"
132+
assert pinned_buffer.handle == 0, "Pinned buffer should be closed"
133+
assert new_device_buffer.handle == 0, "New device buffer should be closed"
134+
135+
print("Memory management example completed!")
136+
137+
138+
if __name__ == "__main__":
139+
main()

0 commit comments

Comments
 (0)