Skip to content

Commit 621d1d4

Browse files
authored
Merge branch 'main' into bump_cuda_core
2 parents 6b1a7bb + 75aea80 commit 621d1d4

File tree

9 files changed

+68
-26
lines changed

9 files changed

+68
-26
lines changed

cuda_core/cuda/core/experimental/_device.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ from cuda.core.experimental._context import Context, ContextOptions
1616
from cuda.core.experimental._event import Event, EventOptions
1717
from cuda.core.experimental._graph import GraphBuilder
1818
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, MemoryResource, _SynchronousMemoryResource
19-
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
19+
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions
2020
from cuda.core.experimental._utils.clear_error_support import assert_type
2121
from cuda.core.experimental._utils.cuda_utils import (
2222
ComputeCapability,
@@ -25,6 +25,7 @@ from cuda.core.experimental._utils.cuda_utils import (
2525
handle_return,
2626
runtime,
2727
)
28+
from cuda.core.experimental._stream cimport default_stream
2829

2930

3031
# TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python,

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from libc.string cimport memset, memcpy
1212
from cuda.bindings cimport cydriver
1313

1414
from cuda.core.experimental._stream cimport Stream as cyStream
15+
from cuda.core.experimental._stream cimport default_stream
1516
from cuda.core.experimental._utils.cuda_utils cimport (
1617
_check_driver_error as raise_if_driver_error,
1718
check_or_create_options,
@@ -30,7 +31,7 @@ import platform
3031
import weakref
3132

3233
from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
33-
from cuda.core.experimental._stream import Stream, default_stream
34+
from cuda.core.experimental._stream import Stream
3435
from cuda.core.experimental._utils.cuda_utils import ( driver, Transaction, get_binding_version )
3536

3637
if platform.system() == "Linux":

cuda_core/cuda/core/experimental/_stream.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ cdef class Stream:
2222
cpdef close(self)
2323
cdef int _get_context(self) except?-1 nogil
2424
cdef int _get_device_and_context(self) except?-1
25+
26+
27+
cdef Stream default_stream()

cuda_core/cuda/core/experimental/_stream.pyx

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from __future__ import annotations
66

77
from libc.stdint cimport uintptr_t, INT32_MIN
8+
from libc.stdlib cimport strtol, getenv
89

910
from cuda.bindings cimport cydriver
1011

@@ -388,11 +389,16 @@ cdef class Stream:
388389
return GraphBuilder._init(stream=self, is_stream_owner=False)
389390

390391

391-
LEGACY_DEFAULT_STREAM = Stream._legacy_default()
392-
PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default()
392+
# c-only python objects, not public
393+
cdef Stream C_LEGACY_DEFAULT_STREAM = Stream._legacy_default()
394+
cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default()
393395

396+
# standard python objects, public
397+
LEGACY_DEFAULT_STREAM = C_LEGACY_DEFAULT_STREAM
398+
PER_THREAD_DEFAULT_STREAM = C_PER_THREAD_DEFAULT_STREAM
394399

395-
def default_stream():
400+
401+
cdef Stream default_stream():
396402
"""Return the default CUDA :obj:`~_stream.Stream`.
397403
398404
The type of default stream returned depends on if the environment
@@ -403,8 +409,14 @@ def default_stream():
403409
404410
"""
405411
# TODO: flip the default
406-
use_ptds = int(os.environ.get("CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM", 0))
412+
cdef const char* use_ptds_raw = getenv("CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM")
413+
414+
cdef int use_ptds = 0
415+
if use_ptds_raw != NULL:
416+
use_ptds = strtol(use_ptds_raw, NULL, 10)
417+
418+
# value is non-zero, including for weird stuff like 123foo
407419
if use_ptds:
408-
return PER_THREAD_DEFAULT_STREAM
420+
return C_PER_THREAD_DEFAULT_STREAM
409421
else:
410-
return LEGACY_DEFAULT_STREAM
422+
return C_LEGACY_DEFAULT_STREAM

cuda_core/tests/memory_ipc/test_memory_ipc.py

Lines changed: 24 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,15 @@ def test_main(self, ipc_device, ipc_memory_resource):
3838

3939
# Verify that the buffer was modified.
4040
helper.verify_buffer(flipped=True)
41+
buffer.close()
4142

4243
def child_main(self, device, mr, queue):
4344
device.set_current()
4445
buffer = queue.get(timeout=CHILD_TIMEOUT_SEC)
4546
helper = IPCBufferTestHelper(device, buffer)
4647
helper.verify_buffer(flipped=False)
4748
helper.fill_buffer(flipped=True)
49+
buffer.close()
4850

4951

5052
class TestIPCMempoolMultiple:
@@ -78,6 +80,8 @@ def test_main(self, ipc_device, ipc_memory_resource):
7880
# Verify that the buffers were modified.
7981
IPCBufferTestHelper(device, buffer1).verify_buffer(flipped=False)
8082
IPCBufferTestHelper(device, buffer2).verify_buffer(flipped=True)
83+
buffer1.close()
84+
buffer2.close()
8185

8286
def child_main(self, device, mr, idx, queue):
8387
# Note: passing the mr registers it so that buffers can be passed
@@ -89,6 +93,8 @@ def child_main(self, device, mr, idx, queue):
8993
IPCBufferTestHelper(device, buffer1).fill_buffer(flipped=False)
9094
elif idx == 2:
9195
IPCBufferTestHelper(device, buffer2).fill_buffer(flipped=True)
96+
buffer1.close()
97+
buffer2.close()
9298

9399

94100
class TestIPCSharedAllocationHandleAndBufferDescriptors:
@@ -110,10 +116,10 @@ def test_main(self, ipc_device, ipc_memory_resource):
110116
p2.start()
111117

112118
# Allocate and share memory.
113-
buf1 = mr.allocate(NBYTES)
114-
buf2 = mr.allocate(NBYTES)
115-
q1.put(buf1.get_ipc_descriptor())
116-
q2.put(buf2.get_ipc_descriptor())
119+
buffer1 = mr.allocate(NBYTES)
120+
buffer2 = mr.allocate(NBYTES)
121+
q1.put(buffer1.get_ipc_descriptor())
122+
q2.put(buffer2.get_ipc_descriptor())
117123

118124
# Wait for children.
119125
p1.join(timeout=CHILD_TIMEOUT_SEC)
@@ -122,8 +128,10 @@ def test_main(self, ipc_device, ipc_memory_resource):
122128
assert p2.exitcode == 0
123129

124130
# Verify results.
125-
IPCBufferTestHelper(device, buf1).verify_buffer(starting_from=1)
126-
IPCBufferTestHelper(device, buf2).verify_buffer(starting_from=2)
131+
IPCBufferTestHelper(device, buffer1).verify_buffer(starting_from=1)
132+
IPCBufferTestHelper(device, buffer2).verify_buffer(starting_from=2)
133+
buffer1.close()
134+
buffer2.close()
127135

128136
def child_main(self, device, alloc_handle, idx, queue):
129137
"""Fills a shared memory buffer."""
@@ -134,6 +142,7 @@ def child_main(self, device, alloc_handle, idx, queue):
134142
buffer_descriptor = queue.get(timeout=CHILD_TIMEOUT_SEC)
135143
buffer = Buffer.from_ipc_descriptor(mr, buffer_descriptor)
136144
IPCBufferTestHelper(device, buffer).fill_buffer(starting_from=idx)
145+
buffer.close()
137146

138147

139148
class TestIPCSharedAllocationHandleAndBufferObjects:
@@ -154,10 +163,10 @@ def test_main(self, ipc_device, ipc_memory_resource):
154163
p2.start()
155164

156165
# Allocate and share memory.
157-
buf1 = mr.allocate(NBYTES)
158-
buf2 = mr.allocate(NBYTES)
159-
q1.put(buf1)
160-
q2.put(buf2)
166+
buffer1 = mr.allocate(NBYTES)
167+
buffer2 = mr.allocate(NBYTES)
168+
q1.put(buffer1)
169+
q2.put(buffer2)
161170

162171
# Wait for children.
163172
p1.join(timeout=CHILD_TIMEOUT_SEC)
@@ -166,8 +175,10 @@ def test_main(self, ipc_device, ipc_memory_resource):
166175
assert p2.exitcode == 0
167176

168177
# Verify results.
169-
IPCBufferTestHelper(device, buf1).verify_buffer(starting_from=1)
170-
IPCBufferTestHelper(device, buf2).verify_buffer(starting_from=2)
178+
IPCBufferTestHelper(device, buffer1).verify_buffer(starting_from=1)
179+
IPCBufferTestHelper(device, buffer2).verify_buffer(starting_from=2)
180+
buffer1.close()
181+
buffer2.close()
171182

172183
def child_main(self, device, alloc_handle, idx, queue):
173184
"""Fills a shared memory buffer."""
@@ -179,3 +190,4 @@ def child_main(self, device, alloc_handle, idx, queue):
179190
# Now get buffers.
180191
buffer = queue.get(timeout=CHILD_TIMEOUT_SEC)
181192
IPCBufferTestHelper(device, buffer).fill_buffer(starting_from=idx)
193+
buffer.close()

cuda_core/tests/memory_ipc/test_send_buffers.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def test_ipc_send_buffers(ipc_device, nmrs):
4747
for buffer in buffers:
4848
helper = IPCBufferTestHelper(device, buffer)
4949
helper.verify_buffer(flipped=True)
50+
buffer.close()
5051

5152

5253
def child_main(device, buffers):
@@ -55,3 +56,4 @@ def child_main(device, buffers):
5556
helper = IPCBufferTestHelper(device, buffer)
5657
helper.verify_buffer(flipped=False)
5758
helper.fill_buffer(flipped=True)
59+
buffer.close()

cuda_core/tests/memory_ipc/test_serialize.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@ def test_main(self, ipc_device, ipc_memory_resource):
4848
# Confirm buffers were modified.
4949
IPCBufferTestHelper(device, buffer1).verify_buffer(flipped=True)
5050
IPCBufferTestHelper(device, buffer2).verify_buffer(flipped=True)
51+
buffer1.close()
52+
buffer2.close()
5153

5254
def child_main(self, conn):
5355
# Set up the device.
@@ -67,6 +69,8 @@ def child_main(self, conn):
6769
# Modify the buffers.
6870
IPCBufferTestHelper(device, buffer1).fill_buffer(flipped=True)
6971
IPCBufferTestHelper(device, buffer2).fill_buffer(flipped=True)
72+
buffer1.close()
73+
buffer2.close()
7074

7175

7276
class TestObjectSerializationWithMR:
@@ -97,6 +101,7 @@ def test_main(self, ipc_device, ipc_memory_resource):
97101

98102
# Confirm buffer was modified.
99103
IPCBufferTestHelper(device, buffer).verify_buffer(flipped=True)
104+
buffer.close()
100105

101106
def child_main(self, pipe, _):
102107
device = Device()
@@ -110,6 +115,7 @@ def child_main(self, pipe, _):
110115
buffer = pipe[0].get(timeout=CHILD_TIMEOUT_SEC)
111116
assert buffer.memory_resource.handle == mr.handle
112117
IPCBufferTestHelper(device, buffer).fill_buffer(flipped=True)
118+
buffer.close()
113119

114120

115121
def test_object_passing(ipc_device, ipc_memory_resource):
@@ -138,6 +144,7 @@ def test_object_passing(ipc_device, ipc_memory_resource):
138144
assert process.exitcode == 0
139145

140146
helper.verify_buffer(flipped=True)
147+
buffer.close()
141148

142149

143150
def child_main(alloc_handle, mr1, buffer_desc, buffer1):
@@ -178,3 +185,6 @@ def child_main(alloc_handle, mr1, buffer_desc, buffer1):
178185
helper1.verify_buffer(flipped=True)
179186
helper2.verify_buffer(flipped=True)
180187
helper3.verify_buffer(flipped=True)
188+
189+
# Close any one buffer.
190+
buffer1.close()

cuda_core/tests/memory_ipc/test_workerpool.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,11 +38,13 @@ def test_main(self, ipc_device, nmrs):
3838

3939
for buffer in buffers:
4040
IPCBufferTestHelper(device, buffer).verify_buffer(flipped=True)
41+
buffer.close()
4142

4243
def process_buffer(self, buffer):
4344
device = Device(buffer.memory_resource.device_id)
4445
device.set_current()
4546
IPCBufferTestHelper(device, buffer).fill_buffer(flipped=True)
47+
buffer.close()
4648

4749

4850
class TestIpcWorkerPoolUsingIPCDescriptors:
@@ -73,13 +75,15 @@ def test_main(self, ipc_device, nmrs):
7375

7476
for buffer in buffers:
7577
IPCBufferTestHelper(device, buffer).verify_buffer(flipped=True)
78+
buffer.close()
7679

7780
def process_buffer(self, mr_idx, buffer_desc):
7881
mr = self.mrs[mr_idx]
7982
device = Device(mr.device_id)
8083
device.set_current()
8184
buffer = Buffer.from_ipc_descriptor(mr, buffer_desc)
8285
IPCBufferTestHelper(device, buffer).fill_buffer(flipped=True)
86+
buffer.close()
8387

8488

8589
class TestIpcWorkerPoolUsingRegistry:
@@ -110,8 +114,10 @@ def test_main(self, ipc_device, nmrs):
110114

111115
for buffer in buffers:
112116
IPCBufferTestHelper(device, buffer).verify_buffer(flipped=True)
117+
buffer.close()
113118

114119
def process_buffer(self, device, buffer_s):
115120
device.set_current()
116121
buffer = pickle.loads(buffer_s) # noqa: S301
117122
IPCBufferTestHelper(device, buffer).fill_buffer(flipped=True)
123+
buffer.close()

cuda_core/tests/test_stream.py

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44
import pytest
55
from cuda.core.experimental import Device, Stream, StreamOptions
66
from cuda.core.experimental._event import Event
7-
from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM, default_stream
7+
from cuda.core.experimental._stream import LEGACY_DEFAULT_STREAM, PER_THREAD_DEFAULT_STREAM
88
from cuda.core.experimental._utils.cuda_utils import driver
99

1010

@@ -107,11 +107,6 @@ def test_per_thread_default_stream():
107107
assert isinstance(PER_THREAD_DEFAULT_STREAM, Stream)
108108

109109

110-
def test_default_stream():
111-
stream = default_stream()
112-
assert isinstance(stream, Stream)
113-
114-
115110
def test_stream_subclassing(init_cuda):
116111
class MyStream(Stream):
117112
pass

0 commit comments

Comments
 (0)