Skip to content

Commit 87b49b7

Browse files
authored
Restore v0.3.x MemoryResource behaviors + MR minor follow-ups (NVIDIA#1388)
* add back missing MR attributes to restore/mimick v0.3.x behavior * update tests to avoid deprecation warnings * consolidate common mempool attributes + protect multi-threading warnings * fix linter
1 parent c82e0d8 commit 87b49b7

6 files changed

Lines changed: 91 additions & 105 deletions

File tree

cuda_core/cuda/core/experimental/_memory/_buffer.pyx

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@ from cuda.core.experimental._memory cimport _ipc
1616
from cuda.core.experimental._stream cimport Stream_accept, Stream
1717
from cuda.core.experimental._utils.cuda_utils cimport HANDLE_RETURN
1818

19-
import abc
2019
import sys
2120
from typing import TypeVar, Union
2221

@@ -492,7 +491,6 @@ cdef class MemoryResource:
492491
resource's respective property.)
493492
"""
494493

495-
@abc.abstractmethod
496494
def allocate(self, size_t size, stream: Stream | GraphBuilder | None = None) -> Buffer:
497495
"""Allocate a buffer of the requested size.
498496

@@ -511,9 +509,8 @@ cdef class MemoryResource:
511509
The allocated buffer object, which can be used for device or host operations
512510
depending on the resource's properties.
513511
"""
514-
...
512+
raise TypeError("MemoryResource.allocate must be implemented by subclasses.")
515513

516-
@abc.abstractmethod
517514
def deallocate(self, ptr: DevicePointerT, size_t size, stream: Stream | GraphBuilder | None = None):
518515
"""Deallocate a buffer previously allocated by this resource.
519516
@@ -528,4 +525,19 @@ cdef class MemoryResource:
528525
If None, it is up to each memory resource implementation to decide
529526
and document the behavior.
530527
"""
531-
...
528+
raise TypeError("MemoryResource.deallocate must be implemented by subclasses.")
529+
530+
@property
531+
def is_device_accessible(self) -> bool:
532+
"""Whether buffers allocated by this resource are device-accessible."""
533+
raise TypeError("MemoryResource.is_device_accessible must be implemented by subclasses.")
534+
535+
@property
536+
def is_host_accessible(self) -> bool:
537+
"""Whether buffers allocated by this resource are host-accessible."""
538+
raise TypeError("MemoryResource.is_host_accessible must be implemented by subclasses.")
539+
540+
@property
541+
def device_id(self) -> int:
542+
"""Device ID associated with this memory resource, or -1 if not applicable."""
543+
raise TypeError("MemoryResource.device_id must be implemented by subclasses.")

cuda_core/cuda/core/experimental/_memory/_device_memory_resource.pyx

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ from cuda.core.experimental._utils.cuda_utils cimport (
1515

1616
from dataclasses import dataclass
1717
import multiprocessing
18-
from typing import Optional, TYPE_CHECKING
18+
from typing import TYPE_CHECKING
1919
import platform # no-cython-lint
2020
import uuid
2121

@@ -228,27 +228,6 @@ cdef class DeviceMemoryResource(_MemPool):
228228
"""Return False. This memory resource does not provide host-accessible buffers."""
229229
return False
230230

231-
@property
232-
def is_ipc_enabled(self) -> bool:
233-
"""Whether this memory resource has IPC enabled."""
234-
return self._ipc_data is not None
235-
236-
@property
237-
def is_mapped(self) -> bool:
238-
"""
239-
Whether this is a mapping of an IPC-enabled memory resource from
240-
another process. If True, allocation is not permitted.
241-
"""
242-
return self._ipc_data is not None and self._ipc_data._is_mapped
243-
244-
@property
245-
def uuid(self) -> Optional[uuid.UUID]:
246-
"""
247-
A universally unique identifier for this memory resource. Meaningful
248-
only for IPC-enabled memory resources.
249-
"""
250-
return getattr(self._ipc_data, 'uuid', None)
251-
252231

253232
# Note: this is referenced in instructions to debug nvbug 5698116.
254233
cpdef DMR_mempool_get_access(DeviceMemoryResource dmr, int device_id):

cuda_core/cuda/core/experimental/_memory/_managed_memory_resource.pyx

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,16 +104,3 @@ cdef class ManagedMemoryResource(_MemPool):
104104
def is_host_accessible(self) -> bool:
105105
"""Return True. This memory resource provides host-accessible buffers."""
106106
return True
107-
108-
@property
109-
def is_ipc_enabled(self) -> bool:
110-
"""Whether this memory resource has IPC enabled."""
111-
return self._ipc_data is not None
112-
113-
@property
114-
def is_mapped(self) -> bool:
115-
"""
116-
Whether this is a mapping of an IPC-enabled memory resource from
117-
another process. If True, allocation is not permitted.
118-
"""
119-
return self._ipc_data is not None and self._ipc_data._is_mapped

cuda_core/cuda/core/experimental/_memory/_memory_pool.pyx

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,27 @@ cdef class _MemPool(MemoryResource):
278278

279279
self._peer_accessible_by = tuple(target_ids)
280280

281+
@property
282+
def is_ipc_enabled(self) -> bool:
283+
"""Whether this memory resource has IPC enabled."""
284+
return self._ipc_data is not None
285+
286+
@property
287+
def is_mapped(self) -> bool:
288+
"""
289+
Whether this is a mapping of an IPC-enabled memory resource from
290+
another process. If True, allocation is not permitted.
291+
"""
292+
return self._ipc_data is not None and self._ipc_data._is_mapped
293+
294+
@property
295+
def uuid(self) -> Optional[uuid.UUID]:
296+
"""
297+
A universally unique identifier for this memory resource. Meaningful
298+
only for IPC-enabled memory resources.
299+
"""
300+
return getattr(self._ipc_data, 'uuid', None)
301+
281302

282303
# _MemPool Implementation
283304
# -----------------------
@@ -406,7 +427,7 @@ cdef inline Buffer _MP_allocate(_MemPool self, size_t size, Stream stream):
406427

407428
cdef inline void _MP_deallocate(
408429
_MemPool self, uintptr_t ptr, size_t size, Stream stream
409-
) noexcept:
430+
) noexcept nogil:
410431
cdef cydriver.CUstream s = stream._handle
411432
cdef cydriver.CUdeviceptr devptr = <cydriver.CUdeviceptr>ptr
412433
cdef cydriver.CUresult r

cuda_core/cuda/core/experimental/_memory/_pinned_memory_resource.pyx

Lines changed: 45 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,11 @@ from cuda.core.experimental._utils.cuda_utils cimport (
1414
)
1515

1616
from dataclasses import dataclass
17-
from typing import Optional
1817
import multiprocessing
18+
import os
1919
import platform # no-cython-lint
2020
import subprocess
21+
import threading
2122
import uuid
2223
import warnings
2324

@@ -26,6 +27,7 @@ from cuda.core.experimental._utils.cuda_utils import check_multiprocessing_start
2627

2728
# Cache to ensure NUMA warning is only raised once per process
2829
cdef bint _numa_warning_shown = False
30+
cdef object _lock = threading.Lock()
2931

3032

3133
def _check_numa_nodes():
@@ -34,49 +36,53 @@ def _check_numa_nodes():
3436
if _numa_warning_shown:
3537
return
3638

37-
if platform.system() != "Linux":
38-
return
39+
with _lock:
40+
if _numa_warning_shown:
41+
return
42+
43+
if platform.system() != "Linux":
44+
_numa_warning_shown = True
45+
return
3946

40-
numa_count = None
41-
42-
# Try /sys filesystem first (most reliable and doesn't require external tools)
43-
try:
44-
import os
45-
node_path = "/sys/devices/system/node"
46-
if os.path.exists(node_path):
47-
# Count directories named "node[0-9]+"
48-
nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()]
49-
numa_count = len(nodes)
50-
except (OSError, PermissionError):
51-
pass
52-
53-
# Fallback to lscpu if /sys check didn't work
54-
if numa_count is None:
47+
numa_count = None
48+
49+
# Try /sys filesystem first (most reliable and doesn't require external tools)
5550
try:
56-
result = subprocess.run(
57-
["lscpu"],
58-
capture_output=True,
59-
text=True,
60-
timeout=1
61-
)
62-
for line in result.stdout.splitlines():
63-
if line.startswith("NUMA node(s):"):
64-
numa_count = int(line.split(":")[1].strip())
65-
break
66-
except (subprocess.SubprocessError, ValueError, FileNotFoundError):
51+
node_path = "/sys/devices/system/node"
52+
if os.path.exists(node_path):
53+
# Count directories named "node[0-9]+"
54+
nodes = [d for d in os.listdir(node_path) if d.startswith("node") and d[4:].isdigit()]
55+
numa_count = len(nodes)
56+
except (OSError, PermissionError):
6757
pass
6858

69-
# Warn if multiple NUMA nodes detected
70-
if numa_count is not None and numa_count > 1:
71-
warnings.warn(
72-
f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory "
73-
f"uses location ID 0, which may not work correctly with multiple "
74-
f"NUMA nodes.",
75-
UserWarning,
76-
stacklevel=3
77-
)
59+
# Fallback to lscpu if /sys check didn't work
60+
if numa_count is None:
61+
try:
62+
result = subprocess.run(
63+
["lscpu"],
64+
capture_output=True,
65+
text=True,
66+
timeout=1
67+
)
68+
for line in result.stdout.splitlines():
69+
if line.startswith("NUMA node(s):"):
70+
numa_count = int(line.split(":")[1].strip())
71+
break
72+
except (subprocess.SubprocessError, ValueError, FileNotFoundError):
73+
pass
74+
75+
# Warn if multiple NUMA nodes detected
76+
if numa_count is not None and numa_count > 1:
77+
warnings.warn(
78+
f"System has {numa_count} NUMA nodes. IPC-enabled pinned memory "
79+
f"uses location ID 0, which may not work correctly with multiple "
80+
f"NUMA nodes.",
81+
UserWarning,
82+
stacklevel=3
83+
)
7884

79-
_numa_warning_shown = True
85+
_numa_warning_shown = True
8086

8187

8288
__all__ = ['PinnedMemoryResource', 'PinnedMemoryResourceOptions']
@@ -243,27 +249,6 @@ cdef class PinnedMemoryResource(_MemPool):
243249
"""Return True. This memory resource provides host-accessible buffers."""
244250
return True
245251

246-
@property
247-
def is_ipc_enabled(self) -> bool:
248-
"""Whether this memory resource has IPC enabled."""
249-
return self._ipc_data is not None
250-
251-
@property
252-
def is_mapped(self) -> bool:
253-
"""
254-
Whether this is a mapping of an IPC-enabled memory resource from
255-
another process. If True, allocation is not permitted.
256-
"""
257-
return self._ipc_data is not None and self._ipc_data._is_mapped
258-
259-
@property
260-
def uuid(self) -> Optional[uuid.UUID]:
261-
"""
262-
A universally unique identifier for this memory resource. Meaningful
263-
only for IPC-enabled memory resources.
264-
"""
265-
return getattr(self._ipc_data, 'uuid', None)
266-
267252

268253
def _deep_reduce_pinned_memory_resource(mr):
269254
check_multiprocessing_start_method()

cuda_core/tests/test_memory.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -172,6 +172,8 @@ def test_buffer_initialization():
172172
buffer_initialization(DummyHostMemoryResource())
173173
buffer_initialization(DummyUnifiedMemoryResource(device))
174174
buffer_initialization(DummyPinnedMemoryResource(device))
175+
with pytest.raises(TypeError):
176+
buffer_initialization(MemoryResource())
175177

176178

177179
def buffer_copy_to(dummy_mr: MemoryResource, device: Device, check=False):
@@ -365,7 +367,7 @@ def test_buffer_external_host():
365367

366368
@pytest.mark.parametrize("change_device", [True, False])
367369
def test_buffer_external_device(change_device):
368-
n = ccx_system.num_devices
370+
n = ccx_system.get_num_devices()
369371
if n < 1:
370372
pytest.skip("No devices found")
371373
dev_id = n - 1
@@ -389,7 +391,7 @@ def test_buffer_external_device(change_device):
389391

390392
@pytest.mark.parametrize("change_device", [True, False])
391393
def test_buffer_external_pinned_alloc(change_device):
392-
n = ccx_system.num_devices
394+
n = ccx_system.get_num_devices()
393395
if n < 1:
394396
pytest.skip("No devices found")
395397
dev_id = n - 1
@@ -414,7 +416,7 @@ def test_buffer_external_pinned_alloc(change_device):
414416

415417
@pytest.mark.parametrize("change_device", [True, False])
416418
def test_buffer_external_pinned_registered(change_device):
417-
n = ccx_system.num_devices
419+
n = ccx_system.get_num_devices()
418420
if n < 1:
419421
pytest.skip("No devices found")
420422
dev_id = n - 1
@@ -447,7 +449,7 @@ def test_buffer_external_pinned_registered(change_device):
447449

448450
@pytest.mark.parametrize("change_device", [True, False])
449451
def test_buffer_external_managed(change_device):
450-
n = ccx_system.num_devices
452+
n = ccx_system.get_num_devices()
451453
if n < 1:
452454
pytest.skip("No devices found")
453455
dev_id = n - 1

0 commit comments

Comments
 (0)