Skip to content

Commit 747bf60

Browse files
authored
fix: support zero sized allocation (#1516)
* test: add Device test for zero sized allocation * test: handle is zero * test: fix handle assertion * fix: ensure that zero sized allocations work * chore: zero out last device ptr * fix: h_stream * test: add graph memory resource test * chore: handle legacy allocators * test: add zero byte alloc for other resources * chore: revert cpp changes * chore: remove invalid check * chore: create managed memory resource helper
1 parent dc30b3d commit 747bf60

4 files changed

Lines changed: 69 additions & 8 deletions

File tree

cuda_core/cuda/core/_memory/_legacy.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -46,8 +46,11 @@ def allocate(self, size, stream=None) -> Buffer:
4646
from cuda.core._stream import default_stream
4747

4848
stream = default_stream()
49-
err, ptr = driver.cuMemAllocHost(size)
50-
raise_if_driver_error(err)
49+
if size:
50+
err, ptr = driver.cuMemAllocHost(size)
51+
raise_if_driver_error(err)
52+
else:
53+
ptr = 0
5154
return Buffer._init(ptr, size, self, stream)
5255

5356
def deallocate(self, ptr: DevicePointerT, size, stream):
@@ -64,8 +67,10 @@ def deallocate(self, ptr: DevicePointerT, size, stream):
6467
"""
6568
if stream is not None:
6669
stream.sync()
67-
(err,) = driver.cuMemFreeHost(ptr)
68-
raise_if_driver_error(err)
70+
71+
if size:
72+
(err,) = driver.cuMemFreeHost(ptr)
73+
raise_if_driver_error(err)
6974

7075
@property
7176
def is_device_accessible(self) -> bool:
@@ -96,15 +101,19 @@ def allocate(self, size, stream=None) -> Buffer:
96101
from cuda.core._stream import default_stream
97102

98103
stream = default_stream()
99-
err, ptr = driver.cuMemAlloc(size)
100-
raise_if_driver_error(err)
104+
if size:
105+
err, ptr = driver.cuMemAlloc(size)
106+
raise_if_driver_error(err)
107+
else:
108+
ptr = 0
101109
return Buffer._init(ptr, size, self, stream)
102110

103111
def deallocate(self, ptr, size, stream):
104112
if stream is not None:
105113
stream.sync()
106-
(err,) = driver.cuMemFree(ptr)
107-
raise_if_driver_error(err)
114+
if size:
115+
(err,) = driver.cuMemFree(ptr)
116+
raise_if_driver_error(err)
108117

109118
@property
110119
def is_device_accessible(self) -> bool:

cuda_core/tests/test_device.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,16 @@ def test_device_alloc(deinit_cuda):
4747
assert buffer.device_id == int(device)
4848

4949

50+
def test_device_alloc_zero_bytes(deinit_cuda):
51+
device = Device()
52+
device.set_current()
53+
buffer = device.allocate(0)
54+
device.sync()
55+
assert buffer.handle >= 0
56+
assert buffer.size == 0
57+
assert buffer.device_id == int(device)
58+
59+
5060
def test_device_id(deinit_cuda):
5161
for device in Device.get_all_devices():
5262
device.set_current()

cuda_core/tests/test_graph_mem.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,23 @@ def test_graph_alloc_with_output(mempool_device, mode):
182182
assert compare_buffer_to_constant(out, 6)
183183

184184

185+
@pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
186+
def test_graph_mem_alloc_zero(mempool_device, mode):
187+
device = mempool_device
188+
gb = device.create_graph_builder().begin_building(mode)
189+
stream = device.create_stream()
190+
gmr = GraphMemoryResource(device)
191+
buffer = gmr.allocate(0, stream=gb)
192+
graph = gb.end_building().complete()
193+
graph.upload(stream)
194+
graph.launch(stream)
195+
stream.sync()
196+
197+
assert buffer.handle >= 0
198+
assert buffer.size == 0
199+
assert buffer.device_id == int(device)
200+
201+
185202
@pytest.mark.parametrize("mode", ["global", "thread_local", "relaxed"])
186203
def test_graph_mem_set_attributes(mempool_device, mode):
187204
device = mempool_device

cuda_core/tests/test_memory.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1240,3 +1240,28 @@ def test_graph_memory_resource_object(init_cuda):
12401240
# These objects are interned.
12411241
assert gmr1 is gmr2 is gmr3
12421242
assert gmr1 == gmr2 == gmr3
1243+
1244+
1245+
def test_memory_resource_alloc_zero_bytes(init_cuda, memory_resource_factory):
1246+
MR, MROps = memory_resource_factory
1247+
1248+
device = Device()
1249+
device.set_current()
1250+
1251+
if MR is DeviceMemoryResource and not device.properties.memory_pools_supported:
1252+
pytest.skip("Device does not support mempool operations")
1253+
elif MR is PinnedMemoryResource:
1254+
skip_if_pinned_memory_unsupported(device)
1255+
mr = MR()
1256+
elif MR is ManagedMemoryResource:
1257+
skip_if_managed_memory_unsupported(device)
1258+
mr = create_managed_memory_resource_or_skip(MROps(preferred_location=device.device_id))
1259+
else:
1260+
assert MR is DeviceMemoryResource
1261+
mr = MR(device)
1262+
1263+
buffer = mr.allocate(0)
1264+
device.sync()
1265+
assert buffer.handle >= 0
1266+
assert buffer.size == 0
1267+
assert buffer.device_id == mr.device_id

0 commit comments

Comments
 (0)