44
55from __future__ import annotations
66
7+ cimport cpython
78from libc.stdint cimport uintptr_t
9+ from libc.string cimport memcpy
810
911from cuda.bindings cimport cydriver
1012
@@ -14,12 +16,14 @@ from cuda.core.experimental._utils.cuda_utils cimport (
1416)
1517
1618from dataclasses import dataclass
19+ import multiprocessing
1720from typing import TYPE_CHECKING, Optional
1821
1922from cuda.core.experimental._context import Context
2023from cuda.core.experimental._utils.cuda_utils import (
2124 CUDAError,
2225 driver,
26+ handle_return,
2327)
2428if TYPE_CHECKING:
2529 import cuda.bindings
@@ -40,15 +44,15 @@ cdef class EventOptions:
4044 has actually been completed.
4145 Otherwise, the CPU thread will busy-wait until the event has
4246 been completed. (Default to False)
43- support_ipc : bool, optional
47+ ipc_enabled : bool, optional
4448 Event will be suitable for interprocess use.
4549 Note that enable_timing must be False. (Default to False)
4650
4751 """
4852
4953 enable_timing: Optional[bool ] = False
5054 busy_waited_sync: Optional[bool ] = False
51- support_ipc : Optional[bool ] = False
55+ ipc_enabled : Optional[bool ] = False
5256
5357
5458cdef class Event:
@@ -86,24 +90,35 @@ cdef class Event:
8690 raise RuntimeError (" Event objects cannot be instantiated directly. Please use Stream APIs (record)." )
8791
8892 @classmethod
89- def _init (cls , device_id: int , ctx_handle: Context , options = None ):
93+ def _init (cls , device_id: int , ctx_handle: Context , options = None , is_free = False ):
9094 cdef Event self = Event.__new__ (cls )
9195 cdef EventOptions opts = check_or_create_options(EventOptions, options, " Event options" )
9296 cdef unsigned int flags = 0x0
9397 self ._timing_disabled = False
9498 self ._busy_waited = False
99+ self ._ipc_enabled = False
100+ self ._ipc_descriptor = None
95101 if not opts.enable_timing:
96102 flags |= cydriver.CUevent_flags.CU_EVENT_DISABLE_TIMING
97103 self ._timing_disabled = True
98104 if opts.busy_waited_sync:
99105 flags |= cydriver.CUevent_flags.CU_EVENT_BLOCKING_SYNC
100106 self ._busy_waited = True
101- if opts.support_ipc:
102- raise NotImplementedError (" WIP: https://github.com/NVIDIA/cuda-python/issues/103" )
107+ if opts.ipc_enabled:
108+ if is_free:
109+ raise TypeError (
110+ " IPC-enabled events must be bound; use Stream.record for creation."
111+ )
112+ flags |= cydriver.CUevent_flags.CU_EVENT_INTERPROCESS
113+ self ._ipc_enabled = True
114+ if not self ._timing_disabled:
115+ raise TypeError (" IPC-enabled events cannot use timing." )
103116 with nogil:
104117 HANDLE_RETURN(cydriver.cuEventCreate(& self ._handle, flags))
105118 self ._device_id = device_id
106119 self ._ctx_handle = ctx_handle
120+ if opts.ipc_enabled:
121+ self .get_ipc_descriptor()
107122 return self
108123
109124 cpdef close(self ):
@@ -151,6 +166,40 @@ cdef class Event:
151166 raise CUDAError(err)
152167 raise RuntimeError (explanation)
153168
169+ def get_ipc_descriptor (self ) -> IPCEventDescriptor:
170+ """Export an event allocated for sharing between processes."""
171+ if self._ipc_descriptor is not None:
172+ return self._ipc_descriptor
173+ if not self.is_ipc_enabled:
174+ raise RuntimeError("Event is not IPC-enabled")
175+ cdef cydriver.CUipcEventHandle data
176+ with nogil:
177+ HANDLE_RETURN(cydriver.cuIpcGetEventHandle(&data , <cydriver.CUevent>(self._handle )))
178+ cdef bytes data_b = cpython.PyBytes_FromStringAndSize(< char * > (data.reserved), sizeof(data.reserved))
179+ self._ipc_descriptor = IPCEventDescriptor._init(data_b, self ._busy_waited)
180+ return self._ipc_descriptor
181+
182+ @classmethod
183+ def from_ipc_descriptor(cls , ipc_descriptor: IPCEventDescriptor ) -> Event:
184+ """Import an event that was exported from another process."""
185+ cdef cydriver.CUipcEventHandle data
186+ memcpy(data.reserved , <const void*><const char*>(ipc_descriptor._reserved ), sizeof(data.reserved ))
187+ cdef Event self = Event.__new__ (cls )
188+ with nogil:
189+ HANDLE_RETURN(cydriver.cuIpcOpenEventHandle(&self._handle , data ))
190+ self._timing_disabled = True
191+ self._busy_waited = ipc_descriptor._busy_waited
192+ self._ipc_enabled = True
193+ self._ipc_descriptor = ipc_descriptor
194+ self._device_id = - 1 # ??
195+ self._ctx_handle = None # ??
196+ return self
197+
198+ @property
199+ def is_ipc_enabled(self ) -> bool:
200+ """Return True if the event can be shared across process boundaries , otherwise False."""
201+ return self._ipc_enabled
202+
154203 @property
155204 def is_timing_disabled(self ) -> bool:
156205 """Return True if the event does not record timing data , otherwise False."""
@@ -161,11 +210,6 @@ cdef class Event:
161210 """Return True if the event synchronization would keep the CPU busy-waiting , otherwise False."""
162211 return self._busy_waited
163212
164- @property
165- def is_ipc_supported(self ) -> bool:
166- """Return True if this event can be used as an interprocess event , otherwise False."""
167- raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/103")
168-
169213 def sync(self ):
170214 """ Synchronize until the event completes.
171215
@@ -212,12 +256,43 @@ cdef class Event:
212256 context is set current after a event is created.
213257
214258 """
215-
216- from cuda.core.experimental._device import Device # avoid circular import
217-
218- return Device(self._device_id )
259+ if self._device_id >= 0:
260+ from ._device import Device # avoid circular import
261+ return Device(self._device_id )
219262
220263 @property
221264 def context(self ) -> Context:
222265 """Return the :obj:`~_context.Context` associated with this event."""
223- return Context._from_ctx(self._ctx_handle , self._device_id )
266+ if self._ctx_handle is not None and self._device_id >= 0:
267+ return Context._from_ctx(self._ctx_handle , self._device_id )
268+
269+
270+ cdef class IPCEventDescriptor:
271+ """Serializable object describing an event that can be shared between processes."""
272+
273+ cdef:
274+ bytes _reserved
275+ bint _busy_waited
276+
277+ def __init__(self , *arg , **kwargs ):
278+ raise RuntimeError (" IPCEventDescriptor objects cannot be instantiated directly. Please use Event APIs." )
279+
280+ @classmethod
281+ def _init (cls , reserved: bytes , busy_waited: bint ):
282+ cdef IPCEventDescriptor self = IPCEventDescriptor.__new__ (cls )
283+ self ._reserved = reserved
284+ self ._busy_waited = busy_waited
285+ return self
286+
287+ def __eq__ (self , IPCEventDescriptor rhs ):
288+ # No need to check self._busy_waited.
289+ return self ._reserved == rhs._reserved
290+
291+ def __reduce__ (self ):
292+ return self ._init, (self ._reserved, self ._busy_waited)
293+
294+
295+ def _reduce_event (event ):
296+ return event.from_ipc_descriptor, (event.get_ipc_descriptor(),)
297+
298+ multiprocessing.reduction.register(Event, _reduce_event)
0 commit comments