Skip to content

Commit e34aacf

Browse files
cuda.core: move peer-access internals into a single _peer_access_utils.pyx
The previous commit left DeviceMemoryResource carrying three pass-through def methods (_query_peer_access_ids, _peer_access_includes, _apply_peer_access_diff) whose only purpose was to give the pure-Python proxy in _peer_access_utils.py a way to call cdef helpers in _device_memory_resource.pyx. These methods served no public role and cluttered the class API. Promote _peer_access_utils.py to a Cython module so the proxy and the driver-touching helpers can live together: - Convert _peer_access_utils.py to _peer_access_utils.pyx. cimports cydriver and DeviceMemoryResource from the .pxd; uses nogil and direct CUmemAccessDesc packing identically to before. - Move _DMR_query_peer_access_ids, _DMR_peer_access_includes, _DMR_apply_peer_access_diff, and _DMR_replace_peer_accessible_by from _device_memory_resource.pyx into the new module as cdef helpers (and a cpdef replace_peer_accessible_by entry point used by the property setter). - Drop the three pass-through def methods from DeviceMemoryResource. The class is left with the property getter and setter only; everything else is module-level in _peer_access_utils. - The proxy now calls the module-level cdef helpers directly instead of routing through methods on mr. No behavior change. The public surface (PeerAccessibleBySetProxy, plan_peer_access_update, normalize_peer_access_targets, PeerAccessPlan) is preserved at the same import paths. Co-authored-by: Cursor <cursoragent@cursor.com>
1 parent 73d2d89 commit e34aacf

2 files changed

Lines changed: 134 additions & 162 deletions

File tree

cuda_core/cuda/core/_memory/_device_memory_resource.pyx

Lines changed: 11 additions & 140 deletions
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,12 @@ from cuda.core._utils.cuda_utils cimport (
1818
check_or_create_options,
1919
HANDLE_RETURN,
2020
)
21-
from cpython.mem cimport PyMem_Malloc, PyMem_Free
22-
2321
from dataclasses import dataclass
2422
import multiprocessing
2523
import platform # no-cython-lint
2624
import uuid
2725

28-
from ._peer_access_utils import (
29-
PeerAccessibleBySetProxy,
30-
_resolve_peer_device_id,
31-
plan_peer_access_update,
32-
)
26+
from ._peer_access_utils import PeerAccessibleBySetProxy, replace_peer_accessible_by
3327
from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
3428

3529
__all__ = ['DeviceMemoryResource', 'DeviceMemoryResourceOptions']
@@ -215,60 +209,27 @@ cdef class DeviceMemoryResource(_MemPool):
215209
@property
216210
def peer_accessible_by(self):
217211
"""
218-
Live driver-backed set view of the devices that can access allocations
212+
Get or set the devices that can access allocations from this memory
213+
pool. Access can be modified at any time and affects all allocations
219214
from this memory pool.
220215
221-
Returns a :class:`PeerAccessibleBySetProxy` (a
222-
:class:`collections.abc.MutableSet`) whose reads call
223-
``cuMemPoolGetAccess`` and whose writes call ``cuMemPoolSetAccess``.
224-
Iteration yields :class:`Device` objects; ``add``, ``discard``, and
225-
``__contains__`` accept either a :class:`Device` or a device-ordinal
226-
``int``. There is no in-memory cache, so the view always reflects the
227-
current driver state and stays consistent across multiple wrappers
228-
around the same pool.
229-
230-
When setting, accepts an iterable of :obj:`~_device.Device` objects or
231-
device IDs. Setting replaces the full set in a single batched driver call.
232-
233-
Bulk operations (``update``, ``|=``, ``&=``, ``-=``, ``^=``, ``clear``,
234-
and the property setter) each issue exactly one ``cuMemPoolSetAccess``
235-
call so the toolkit can update existing memory mappings in parallel.
216+
Returns a set-like proxy of :obj:`~_device.Device` objects that manages
217+
peer access. Inputs are accepted as either :obj:`~_device.Device`
218+
objects or device-ordinal :class:`int` values.
236219
237220
Examples
238221
--------
239222
>>> dmr = DeviceMemoryResource(0)
240-
>>> dmr.peer_accessible_by.add(1) # grant access to device 1
241-
>>> assert dmr.peer_accessible_by == {Device(1)}
242-
>>> dmr.peer_accessible_by |= {Device(2)} # batched grant via |=
243-
>>> dmr.peer_accessible_by = [] # revoke all in one call
223+
>>> dmr.peer_accessible_by = {1} # grant access to device 1
224+
>>> assert 1 in dmr.peer_accessible_by
225+
>>> dmr.peer_accessible_by.add(2) # update access to include device 2
226+
>>> dmr.peer_accessible_by = [] # revoke peer access
244227
"""
245228
return PeerAccessibleBySetProxy(self)
246229

247230
@peer_accessible_by.setter
248231
def peer_accessible_by(self, devices):
249-
_DMR_replace_peer_accessible_by(self, devices)
250-
251-
def _query_peer_access_ids(self):
252-
"""Return the current peer device IDs as a sorted tuple of ints.
253-
254-
Always queries the driver via ``cuMemPoolGetAccess`` for every visible
255-
device. Used by :class:`PeerAccessibleBySetProxy` for ``__iter__`` and
256-
``__len__``.
257-
"""
258-
return _DMR_query_peer_access_ids(self)
259-
260-
def _peer_access_includes(self, int dev_id) -> bool:
261-
"""Return True if peer access from ``dev_id`` is currently granted."""
262-
return _DMR_peer_access_includes(self, dev_id)
263-
264-
def _apply_peer_access_diff(self, to_add, to_remove):
265-
"""Issue a single ``cuMemPoolSetAccess`` for the given add/remove deltas.
266-
267-
``to_add`` and ``to_remove`` are iterables of device-ordinal ints.
268-
Both must already be filtered (no owner, no overlap, no duplicates).
269-
Used by :class:`PeerAccessibleBySetProxy` for batched writes.
270-
"""
271-
_DMR_apply_peer_access_diff(self, tuple(to_add), tuple(to_remove))
232+
replace_peer_accessible_by(self, devices)
272233

273234
@property
274235
def is_device_accessible(self) -> bool:
@@ -281,96 +242,6 @@ cdef class DeviceMemoryResource(_MemPool):
281242
return False
282243

283244

284-
cdef inline tuple _DMR_query_peer_access_ids(DeviceMemoryResource self):
285-
"""Return the current peer device IDs as a sorted tuple of ints."""
286-
cdef int total
287-
cdef cydriver.CUmemAccess_flags flags
288-
cdef cydriver.CUmemLocation location
289-
cdef list peers = []
290-
291-
with nogil:
292-
HANDLE_RETURN(cydriver.cuDeviceGetCount(&total))
293-
294-
location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
295-
for dev_id in range(total):
296-
if dev_id == self._dev_id:
297-
continue
298-
location.id = dev_id
299-
with nogil:
300-
HANDLE_RETURN(cydriver.cuMemPoolGetAccess(&flags, as_cu(self._h_pool), &location))
301-
if flags == cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE:
302-
peers.append(dev_id)
303-
304-
return tuple(sorted(peers))
305-
306-
307-
cdef inline bint _DMR_peer_access_includes(DeviceMemoryResource self, int dev_id):
308-
"""Return True if peer access from ``dev_id`` is currently granted."""
309-
cdef cydriver.CUmemAccess_flags flags
310-
cdef cydriver.CUmemLocation location
311-
312-
location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
313-
location.id = dev_id
314-
with nogil:
315-
HANDLE_RETURN(cydriver.cuMemPoolGetAccess(&flags, as_cu(self._h_pool), &location))
316-
return flags == cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
317-
318-
319-
cdef inline _DMR_apply_peer_access_diff(
320-
DeviceMemoryResource self, tuple to_add, tuple to_remove
321-
):
322-
"""Issue one ``cuMemPoolSetAccess`` for the given add/remove deltas."""
323-
cdef size_t count = len(to_add) + len(to_remove)
324-
cdef cydriver.CUmemAccessDesc* access_desc = NULL
325-
cdef size_t i = 0
326-
327-
if count == 0:
328-
return
329-
330-
access_desc = <cydriver.CUmemAccessDesc*>PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
331-
if access_desc == NULL:
332-
raise MemoryError("Failed to allocate memory for access descriptors")
333-
334-
try:
335-
for dev_id in to_add:
336-
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
337-
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
338-
access_desc[i].location.id = dev_id
339-
i += 1
340-
for dev_id in to_remove:
341-
access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
342-
access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
343-
access_desc[i].location.id = dev_id
344-
i += 1
345-
346-
with nogil:
347-
HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self._h_pool), access_desc, count))
348-
finally:
349-
if access_desc != NULL:
350-
PyMem_Free(access_desc)
351-
352-
353-
cdef inline _DMR_replace_peer_accessible_by(DeviceMemoryResource self, devices):
354-
"""Replace the full peer-access set in a single batched driver call.
355-
356-
Backs the ``mr.peer_accessible_by = [...]`` setter. Uses the same planner
357-
as the proxy's bulk ops; the only difference is that adds and removes are
358-
derived from the symmetric difference between current driver state and the
359-
requested target set.
360-
"""
361-
from .._device import Device
362-
363-
this_dev = Device(self._dev_id)
364-
plan = plan_peer_access_update(
365-
owner_device_id=self._dev_id,
366-
current_peer_ids=_DMR_query_peer_access_ids(self),
367-
requested_devices=devices,
368-
resolve_device_id=_resolve_peer_device_id,
369-
can_access_peer=this_dev.can_access_peer,
370-
)
371-
_DMR_apply_peer_access_diff(self, plan.to_add, plan.to_remove)
372-
373-
374245
cdef inline _DMR_init(DeviceMemoryResource self, device_id, options):
375246
from .._device import Device
376247
cdef int dev_id = Device(device_id).device_id

0 commit comments

Comments
 (0)