@@ -18,18 +18,12 @@ from cuda.core._utils.cuda_utils cimport (
1818 check_or_create_options,
1919 HANDLE_RETURN,
2020)
21- from cpython.mem cimport PyMem_Malloc, PyMem_Free
22-
2321from dataclasses import dataclass
2422import multiprocessing
2523import platform # no-cython-lint
2624import uuid
2725
28- from ._peer_access_utils import (
29- PeerAccessibleBySetProxy,
30- _resolve_peer_device_id,
31- plan_peer_access_update,
32- )
26+ from ._peer_access_utils import PeerAccessibleBySetProxy, replace_peer_accessible_by
3327from cuda.core._utils.cuda_utils import check_multiprocessing_start_method
3428
3529__all__ = [' DeviceMemoryResource' , ' DeviceMemoryResourceOptions' ]
@@ -215,60 +209,27 @@ cdef class DeviceMemoryResource(_MemPool):
215209 @property
216210 def peer_accessible_by(self ):
217211 """
218- Live driver-backed set view of the devices that can access allocations
212+ Get or set the devices that can access allocations from this memory
213+ pool. Access can be modified at any time and affects all allocations
219214 from this memory pool.
220215
221- Returns a :class:`PeerAccessibleBySetProxy` (a
222- :class:`collections.abc.MutableSet`) whose reads call
223- ``cuMemPoolGetAccess`` and whose writes call ``cuMemPoolSetAccess``.
224- Iteration yields :class:`Device` objects; ``add``, ``discard``, and
225- ``__contains__`` accept either a :class:`Device` or a device-ordinal
226- ``int``. There is no in-memory cache, so the view always reflects the
227- current driver state and stays consistent across multiple wrappers
228- around the same pool.
229-
230- When setting, accepts an iterable of :obj:`~_device.Device` objects or
231- device IDs. Setting replaces the full set in a single batched driver call.
232-
233- Bulk operations (``update``, ``|=``, ``&=``, ``-=``, ``^=``, ``clear``,
234- and the property setter) each issue exactly one ``cuMemPoolSetAccess``
235- call so the toolkit can update existing memory mappings in parallel.
216+ Returns a set-like proxy of :obj:`~_device.Device` objects that manages
217+ peer access. Inputs are accepted as either :obj:`~_device.Device`
218+ objects or device-ordinal :class:`int` values.
236219
237220 Examples
238221 --------
239222 >>> dmr = DeviceMemoryResource(0)
240- >>> dmr.peer_accessible_by.add(1) # grant access to device 1
241- >>> assert dmr.peer_accessible_by == {Device(1)}
242- >>> dmr.peer_accessible_by |= {Device (2)} # batched grant via |=
243- >>> dmr.peer_accessible_by = [] # revoke all in one call
223+ >>> dmr.peer_accessible_by = {1} # grant access to device 1
224+ >>> assert 1 in dmr.peer_accessible_by
225+ >>> dmr.peer_accessible_by.add (2) # update access to include device 2
226+ >>> dmr.peer_accessible_by = [] # revoke peer access
244227 """
245228 return PeerAccessibleBySetProxy(self )
246229
247230 @peer_accessible_by.setter
248231 def peer_accessible_by (self , devices ):
249- _DMR_replace_peer_accessible_by(self , devices)
250-
251- def _query_peer_access_ids (self ):
252- """ Return the current peer device IDs as a sorted tuple of ints.
253-
254- Always queries the driver via ``cuMemPoolGetAccess`` for every visible
255- device. Used by :class:`PeerAccessibleBySetProxy` for ``__iter__`` and
256- ``__len__``.
257- """
258- return _DMR_query_peer_access_ids(self )
259-
260- def _peer_access_includes (self , int dev_id ) -> bool:
261- """Return True if peer access from ``dev_id`` is currently granted."""
262- return _DMR_peer_access_includes(self , dev_id )
263-
264- def _apply_peer_access_diff(self , to_add , to_remove ):
265- """ Issue a single ``cuMemPoolSetAccess`` for the given add/remove deltas.
266-
267- ``to_add`` and ``to_remove`` are iterables of device-ordinal ints.
268- Both must already be filtered (no owner, no overlap, no duplicates).
269- Used by :class:`PeerAccessibleBySetProxy` for batched writes.
270- """
271- _DMR_apply_peer_access_diff(self , tuple (to_add), tuple (to_remove))
232+ replace_peer_accessible_by(self , devices)
272233
273234 @property
274235 def is_device_accessible (self ) -> bool:
@@ -281,96 +242,6 @@ cdef class DeviceMemoryResource(_MemPool):
281242 return False
282243
283244
284- cdef inline tuple _DMR_query_peer_access_ids(DeviceMemoryResource self ):
285- """ Return the current peer device IDs as a sorted tuple of ints."""
286- cdef int total
287- cdef cydriver.CUmemAccess_flags flags
288- cdef cydriver.CUmemLocation location
289- cdef list peers = []
290-
291- with nogil:
292- HANDLE_RETURN(cydriver.cuDeviceGetCount(& total))
293-
294- location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
295- for dev_id in range (total):
296- if dev_id == self ._dev_id:
297- continue
298- location.id = dev_id
299- with nogil:
300- HANDLE_RETURN(cydriver.cuMemPoolGetAccess(& flags, as_cu(self ._h_pool), & location))
301- if flags == cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE:
302- peers.append(dev_id)
303-
304- return tuple (sorted (peers))
305-
306-
307- cdef inline bint _DMR_peer_access_includes(DeviceMemoryResource self , int dev_id):
308- """ Return True if peer access from ``dev_id`` is currently granted."""
309- cdef cydriver.CUmemAccess_flags flags
310- cdef cydriver.CUmemLocation location
311-
312- location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
313- location.id = dev_id
314- with nogil:
315- HANDLE_RETURN(cydriver.cuMemPoolGetAccess(& flags, as_cu(self ._h_pool), & location))
316- return flags == cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
317-
318-
319- cdef inline _DMR_apply_peer_access_diff(
320- DeviceMemoryResource self , tuple to_add, tuple to_remove
321- ):
322- """ Issue one ``cuMemPoolSetAccess`` for the given add/remove deltas."""
323- cdef size_t count = len (to_add) + len (to_remove)
324- cdef cydriver.CUmemAccessDesc* access_desc = NULL
325- cdef size_t i = 0
326-
327- if count == 0 :
328- return
329-
330- access_desc = < cydriver.CUmemAccessDesc* > PyMem_Malloc(count * sizeof(cydriver.CUmemAccessDesc))
331- if access_desc == NULL :
332- raise MemoryError (" Failed to allocate memory for access descriptors" )
333-
334- try :
335- for dev_id in to_add:
336- access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_READWRITE
337- access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
338- access_desc[i].location.id = dev_id
339- i += 1
340- for dev_id in to_remove:
341- access_desc[i].flags = cydriver.CUmemAccess_flags.CU_MEM_ACCESS_FLAGS_PROT_NONE
342- access_desc[i].location.type = cydriver.CUmemLocationType.CU_MEM_LOCATION_TYPE_DEVICE
343- access_desc[i].location.id = dev_id
344- i += 1
345-
346- with nogil:
347- HANDLE_RETURN(cydriver.cuMemPoolSetAccess(as_cu(self ._h_pool), access_desc, count))
348- finally :
349- if access_desc != NULL :
350- PyMem_Free(access_desc)
351-
352-
353- cdef inline _DMR_replace_peer_accessible_by(DeviceMemoryResource self , devices):
354- """ Replace the full peer-access set in a single batched driver call.
355-
356- Backs the ``mr.peer_accessible_by = [...]`` setter. Uses the same planner
357- as the proxy's bulk ops; the only difference is that adds and removes are
358- derived from the symmetric difference between current driver state and the
359- requested target set.
360- """
361- from .._device import Device
362-
363- this_dev = Device(self ._dev_id)
364- plan = plan_peer_access_update(
365- owner_device_id = self ._dev_id,
366- current_peer_ids = _DMR_query_peer_access_ids(self ),
367- requested_devices = devices,
368- resolve_device_id = _resolve_peer_device_id,
369- can_access_peer = this_dev.can_access_peer,
370- )
371- _DMR_apply_peer_access_diff(self , plan.to_add, plan.to_remove)
372-
373-
374245cdef inline _DMR_init(DeviceMemoryResource self , device_id , options ):
375246 from .._device import Device
376247 cdef int dev_id = Device(device_id).device_id
0 commit comments