address review comments

leofang · leofang · commit 2dd91380a59f · 2025-10-06T21:31:45.000Z
diff --git a/cuda_core/cuda/core/experimental/_memory.pyx b/cuda_core/cuda/core/experimental/_memory.pyx
@@ -5,6 +5,7 @@
 from __future__ import annotations
 
 cimport cpython
+from libc.limits cimport ULLONG_MAX
 from libc.stdint cimport uintptr_t, intptr_t
 from libc.string cimport memset, memcpy
 
@@ -95,13 +96,15 @@ cdef class Buffer:
             The stream object to use for asynchronous deallocation. If None,
             the behavior depends on the underlying memory resource.
         """
+        cdef _cyMemoryResource cy_mr
         if self._ptr and self._mr is not None:
             if isinstance(self._mr, _cyMemoryResource):
                 # FIXME
                 if stream is None:
                     stream = Stream.__new__(Stream)
                     (<cyStream>(stream))._handle = <cydriver.CUstream>(0)
-                (<_cyMemoryResource>(self._mr))._deallocate(self._ptr, self._size, <cyStream>stream)
+                cy_mr = <_cyMemoryResource>(self._mr)
+                cy_mr._deallocate(self._ptr, self._size, <cyStream>stream)
             else:
                 self._mr.deallocate(self._ptr, self._size, stream)
             self._ptr = 0
@@ -123,7 +126,7 @@ cdef class Buffer:
             return self._ptr
         else:
             # contract: Buffer is closed
-            return None
+            return 0
 
     @property
     def size(self) -> int:
@@ -673,7 +676,7 @@ cdef class DeviceMemoryResource(_cyMemoryResource, MemoryResource):
             DeviceMemoryResourceOptions, options, "DeviceMemoryResource options", keep_none=True
         )
         cdef cydriver.cuuint64_t current_threshold
-        cdef cydriver.cuuint64_t max_threshold = 0xFFFFFFFFFFFFFFFF
+        cdef cydriver.cuuint64_t max_threshold = ULLONG_MAX
         cdef cydriver.CUmemPoolProps properties
 
         if opts is None:
diff --git a/cuda_core/cuda/core/experimental/_stream.pyx b/cuda_core/cuda/core/experimental/_stream.pyx
@@ -113,10 +113,10 @@ cdef class Stream:
         self._handle = <cydriver.CUstream>(NULL)
         self._owner = None
         self._builtin = False
-        self._nonblocking = -1  # delayed
-        self._priority = INT32_MIN  # delayed
-        self._device_id = cydriver.CU_DEVICE_INVALID  # delayed
-        self._ctx_handle = CU_CONTEXT_INVALID  # delayed
+        self._nonblocking = -1  # lazy init'd
+        self._priority = INT32_MIN  # lazy init'd
+        self._device_id = cydriver.CU_DEVICE_INVALID  # lazy init'd
+        self._ctx_handle = CU_CONTEXT_INVALID  # lazy init'd
 
     def __init__(self, *args, **kwargs):
         raise RuntimeError(
diff --git a/cuda_core/docs/source/release/0.X.Y-notes.rst b/cuda_core/docs/source/release/0.X.Y-notes.rst
@@ -22,7 +22,6 @@ Breaking Changes
 - **CUDA 11 support dropped**: CUDA 11 support is no longer tested and it may or may not work with cuda.bindings and CTK 11.x. Users are encouraged to migrate to CUDA 12.x or 13.x.
 - Support for ``cuda-bindings`` (and ``cuda-python``) < 12.6.2 is dropped. Internally, ``cuda.core`` now always requires the `new binding module layout <https://nvidia.github.io/cuda-python/cuda-bindings/latest/release/12.6.1-notes.html#cuda-namespace-cleanup-with-a-new-module-layout>`_. As per the ``cuda-bindings`` `support policy <https://nvidia.github.io/cuda-python/cuda-bindings/latest/support.html>`_), CUDA 12 users are encouraged to use the latest ``cuda-bindings`` 12.9.x, which is backward-compatible with all CUDA Toolkit 12.y.
 - **LaunchConfig grid parameter interpretation**: When :attr:`LaunchConfig.cluster` is specified, the :attr:`LaunchConfig.grid` parameter now correctly represents the number of clusters instead of blocks. Previously, the grid parameter was incorrectly interpreted as blocks, causing a mismatch with the expected C++ behavior. This change ensures that ``LaunchConfig(grid=4, cluster=2, block=32)`` correctly produces 4 clusters × 2 blocks/cluster = 8 total blocks, matching the C++ equivalent ``cudax::make_hierarchy(cudax::grid_dims(4), cudax::cluster_dims(2), cudax::block_dims(32))``.
-- When :class:`Buffer` is closed, :attr:`Buffer.handle` is now set to ``None``. It was previously set to ``0`` by accident.
 
 
 New features
diff --git a/cuda_core/examples/memory_ops.py b/cuda_core/examples/memory_ops.py
@@ -128,8 +128,8 @@
 cp.cuda.Stream.null.use()  # reset CuPy's current stream to the null stream
 
 # Verify buffers are properly closed
-assert device_buffer.handle is None, "Device buffer should be closed"
-assert pinned_buffer.handle is None, "Pinned buffer should be closed"
-assert new_device_buffer.handle is None, "New device buffer should be closed"
+assert device_buffer.handle == 0, "Device buffer should be closed"
+assert pinned_buffer.handle == 0, "Pinned buffer should be closed"
+assert new_device_buffer.handle == 0, "New device buffer should be closed"
 
 print("Memory management example completed!")
diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py
@@ -370,4 +370,4 @@ def test_launch_with_buffers_allocated_by_memory_resource(init_cuda, memory_reso
     cp.cuda.Stream.null.use()  # reset CuPy's current stream to the null stream
 
     # Verify buffer is properly closed
-    assert buffer.handle is None, f"{name} buffer should be closed"
+    assert buffer.handle == 0, f"{name} buffer should be closed"
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
@@ -223,7 +223,7 @@ def test_buffer_copy_from():
 def buffer_close(dummy_mr: MemoryResource):
     buffer = dummy_mr.allocate(size=1024)
     buffer.close()
-    assert buffer.handle is None
+    assert buffer.handle == 0
     assert buffer.memory_resource is None