Skip to content

Commit d8b0a46

Browse files
committed
Merge branch 'main' into rparolin/skip_ipc_on_wsl
2 parents baac405 + 7335c05 commit d8b0a46

File tree

20 files changed

+89
-56
lines changed

20 files changed

+89
-56
lines changed

.github/workflows/ci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,7 @@ jobs:
193193

194194
checks:
195195
name: Check job status
196+
if: always()
196197
runs-on: ubuntu-latest
197198
needs:
198199
- test-linux-64

cuda_bindings/setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -329,7 +329,7 @@ def do_cythonize(extensions):
329329
return cythonize(
330330
extensions,
331331
nthreads=nthreads,
332-
compiler_directives=dict(language_level=3, embedsignature=True, binding=True),
332+
compiler_directives=dict(language_level=3, embedsignature=True, binding=True, freethreading_compatible=True),
333333
**extra_cythonize_kwargs,
334334
)
335335

cuda_bindings/tests/cython/build_tests.bat

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,5 +5,5 @@ REM SPDX-License-Identifier: LicenseRef-NVIDIA-SOFTWARE-LICENSE
55

66
setlocal
77
set CL=%CL% /I"%CUDA_HOME%\include"
8-
cythonize -3 -i %~dp0test_*.pyx
8+
cythonize -3 -i -Xfreethreading_compatible=True %~dp0test_*.pyx
99
endlocal

cuda_bindings/tests/cython/build_tests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,4 @@ else
1414
exit 1
1515
fi
1616

17-
cythonize -3 -i ${SCRIPTPATH}/test_*.pyx
17+
cythonize -3 -i -Xfreethreading_compatible=True ${SCRIPTPATH}/test_*.pyx

cuda_core/build_hooks.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ def get_cuda_paths():
101101
verbose=True,
102102
language_level=3,
103103
nthreads=nthreads,
104-
compiler_directives={"embedsignature": True, "warn.deprecated.IF": False},
104+
compiler_directives={"embedsignature": True, "warn.deprecated.IF": False, "freethreading_compatible": True},
105105
compile_time_env=compile_time_env,
106106
)
107107

cuda_core/cuda/core/_version.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,4 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5-
__version__ = "0.3.3a0"
5+
__version__ = "0.4.0"

cuda_core/cuda/core/experimental/_device.pyx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ from cuda.core.experimental._context import Context, ContextOptions
1616
from cuda.core.experimental._event import Event, EventOptions
1717
from cuda.core.experimental._graph import GraphBuilder
1818
from cuda.core.experimental._memory import Buffer, DeviceMemoryResource, MemoryResource, _SynchronousMemoryResource
19-
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions, default_stream
19+
from cuda.core.experimental._stream import IsStreamT, Stream, StreamOptions
2020
from cuda.core.experimental._utils.clear_error_support import assert_type
2121
from cuda.core.experimental._utils.cuda_utils import (
2222
ComputeCapability,
@@ -25,6 +25,7 @@ from cuda.core.experimental._utils.cuda_utils import (
2525
handle_return,
2626
runtime,
2727
)
28+
from cuda.core.experimental._stream cimport default_stream
2829

2930

3031
# TODO: I prefer to type these as "cdef object" and avoid accessing them from within Python,

cuda_core/cuda/core/experimental/_memory.pyx

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ from libc.string cimport memset, memcpy
1212
from cuda.bindings cimport cydriver
1313

1414
from cuda.core.experimental._stream cimport Stream as cyStream
15+
from cuda.core.experimental._stream cimport default_stream
1516
from cuda.core.experimental._utils.cuda_utils cimport (
1617
_check_driver_error as raise_if_driver_error,
1718
check_or_create_options,
@@ -30,7 +31,7 @@ import platform
3031
import weakref
3132

3233
from cuda.core.experimental._dlpack import DLDeviceType, make_py_capsule
33-
from cuda.core.experimental._stream import Stream, default_stream
34+
from cuda.core.experimental._stream import Stream
3435
from cuda.core.experimental._utils.cuda_utils import ( driver, Transaction, get_binding_version )
3536

3637
if platform.system() == "Linux":
@@ -73,6 +74,8 @@ cdef class _cyMemoryResource:
7374

7475
class MemoryResourceAttributes(abc.ABC):
7576

77+
__slots__ = ()
78+
7679
@property
7780
@abc.abstractmethod
7881
def is_device_accessible(self) -> bool:
@@ -107,8 +110,6 @@ cdef class Buffer(_cyBuffer, MemoryResourceAttributes):
107110
108111
Support for data interchange mechanisms are provided by DLPack.
109112
"""
110-
cdef dict __dict__ # required if inheriting from both Cython/Python classes
111-
112113
def __cinit__(self):
113114
self._ptr = 0
114115
self._size = 0
@@ -369,8 +370,6 @@ cdef class MemoryResource(_cyMemoryResource, MemoryResourceAttributes, abc.ABC):
369370
hold a reference to self, the buffer properties are retrieved simply by looking up the underlying
370371
memory resource's respective property.)
371372
"""
372-
cdef dict __dict__ # required if inheriting from both Cython/Python classes
373-
374373
cdef void _deallocate(self, intptr_t ptr, size_t size, cyStream stream) noexcept:
375374
self.deallocate(ptr, size, stream)
376375

@@ -585,7 +584,7 @@ class DeviceMemoryResourceAttributes:
585584
# This enables buffer serialization, as buffers can reduce to a pair
586585
# of comprising the memory resource UUID (the key into this registry)
587586
# and the serialized buffer descriptor.
588-
_ipc_registry = {}
587+
cdef object _ipc_registry = weakref.WeakValueDictionary()
589588

590589

591590
cdef class DeviceMemoryResource(MemoryResource):
@@ -675,7 +674,6 @@ cdef class DeviceMemoryResource(MemoryResource):
675674
bint _is_mapped
676675
object _uuid
677676
IPCAllocationHandle _alloc_handle
678-
dict __dict__ # required if inheriting from both Cython/Python classes
679677
object __weakref__
680678

681679
def __cinit__(self):
@@ -759,8 +757,6 @@ cdef class DeviceMemoryResource(MemoryResource):
759757
with nogil:
760758
HANDLE_RETURN(cydriver.cuMemPoolDestroy(self._mempool_handle))
761759
finally:
762-
if self.is_mapped:
763-
self.unregister()
764760
self._dev_id = cydriver.CU_DEVICE_INVALID
765761
self._mempool_handle = NULL
766762
self._attributes = None
@@ -806,13 +802,6 @@ cdef class DeviceMemoryResource(MemoryResource):
806802
self._uuid = uuid
807803
return self
808804

809-
def unregister(self):
810-
"""Unregister this mapped memory resource."""
811-
assert self.is_mapped
812-
if _ipc_registry is not None: # can occur during shutdown catastrophe
813-
with contextlib.suppress(KeyError):
814-
del _ipc_registry[self.uuid]
815-
816805
@property
817806
def uuid(self) -> Optional[uuid.UUID]:
818807
"""
@@ -1019,9 +1008,7 @@ class LegacyPinnedMemoryResource(MemoryResource):
10191008
APIs.
10201009
"""
10211010

1022-
def __init__(self):
1023-
# TODO: support flags from cuMemHostAlloc?
1024-
self._handle = None
1011+
# TODO: support creating this MR with flags that are later passed to cuMemHostAlloc?
10251012

10261013
def allocate(self, size_t size, stream: Stream = None) -> Buffer:
10271014
"""Allocate a buffer of the requested size.
@@ -1080,7 +1067,6 @@ class _SynchronousMemoryResource(MemoryResource):
10801067
__slots__ = ("_dev_id",)
10811068

10821069
def __init__(self, device_id : int | Device):
1083-
self._handle = None
10841070
self._dev_id = getattr(device_id, 'device_id', device_id)
10851071

10861072
def allocate(self, size, stream=None) -> Buffer:

cuda_core/cuda/core/experimental/_stream.pxd

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,6 @@ cdef class Stream:
2222
cpdef close(self)
2323
cdef int _get_context(self) except?-1 nogil
2424
cdef int _get_device_and_context(self) except?-1
25+
26+
27+
cdef Stream default_stream()

cuda_core/cuda/core/experimental/_stream.pyx

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from __future__ import annotations
66

77
from libc.stdint cimport uintptr_t, INT32_MIN
8+
from libc.stdlib cimport strtol, getenv
89

910
from cuda.bindings cimport cydriver
1011

@@ -388,11 +389,16 @@ cdef class Stream:
388389
return GraphBuilder._init(stream=self, is_stream_owner=False)
389390

390391

391-
LEGACY_DEFAULT_STREAM = Stream._legacy_default()
392-
PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default()
392+
# c-only python objects, not public
393+
cdef Stream C_LEGACY_DEFAULT_STREAM = Stream._legacy_default()
394+
cdef Stream C_PER_THREAD_DEFAULT_STREAM = Stream._per_thread_default()
393395

396+
# standard python objects, public
397+
LEGACY_DEFAULT_STREAM = C_LEGACY_DEFAULT_STREAM
398+
PER_THREAD_DEFAULT_STREAM = C_PER_THREAD_DEFAULT_STREAM
394399

395-
def default_stream():
400+
401+
cdef Stream default_stream():
396402
"""Return the default CUDA :obj:`~_stream.Stream`.
397403
398404
The type of default stream returned depends on if the environment
@@ -403,8 +409,14 @@ def default_stream():
403409
404410
"""
405411
# TODO: flip the default
406-
use_ptds = int(os.environ.get("CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM", 0))
412+
cdef const char* use_ptds_raw = getenv("CUDA_PYTHON_CUDA_PER_THREAD_DEFAULT_STREAM")
413+
414+
cdef int use_ptds = 0
415+
if use_ptds_raw != NULL:
416+
use_ptds = strtol(use_ptds_raw, NULL, 10)
417+
418+
# value is non-zero, including for weird stuff like 123foo
407419
if use_ptds:
408-
return PER_THREAD_DEFAULT_STREAM
420+
return C_PER_THREAD_DEFAULT_STREAM
409421
else:
410-
return LEGACY_DEFAULT_STREAM
422+
return C_LEGACY_DEFAULT_STREAM

0 commit comments

Comments
 (0)