55#include < Python.h>
66
77#include " resource_handles.hpp"
8- #include " resource_handles_cxx_api.hpp"
98#include < cuda.h>
109#include < cstdint>
1110#include < cstring>
@@ -172,7 +171,7 @@ struct ContextBox {
172171};
173172} // namespace
174173
175- ContextHandle create_context_handle_ref (CUcontext ctx) {
174+ ContextHandle create_context_handle_ref (CUcontext ctx) noexcept {
176175 auto box = std::make_shared<const ContextBox>(ContextBox{ctx});
177176 return ContextHandle (box, &box->resource );
178177}
@@ -235,7 +234,7 @@ struct StreamBox {
235234};
236235} // namespace
237236
238- StreamHandle create_stream_handle (ContextHandle h_ctx, unsigned int flags, int priority) {
237+ StreamHandle create_stream_handle (ContextHandle h_ctx, unsigned int flags, int priority) noexcept {
239238 GILReleaseGuard gil;
240239 CUstream stream;
241240 if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority (&stream, flags, priority))) {
@@ -253,19 +252,28 @@ StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int p
253252 return StreamHandle (box, &box->resource );
254253}
255254
256- StreamHandle create_stream_handle_ref (CUstream stream) {
255+ StreamHandle create_stream_handle_ref (CUstream stream) noexcept {
257256 auto box = std::make_shared<const StreamBox>(StreamBox{stream});
258257 return StreamHandle (box, &box->resource );
259258}
260259
261- StreamHandle create_stream_handle_with_owner (CUstream stream, PyObject* owner) {
262- Py_XINCREF (owner);
260+ StreamHandle create_stream_handle_with_owner (CUstream stream, PyObject* owner) noexcept {
261+ if (!owner) {
262+ return create_stream_handle_ref (stream);
263+ }
264+ // GIL required when owner is provided
265+ GILAcquireGuard gil;
266+ if (!gil.acquired ()) {
267+ // Python finalizing - fall back to ref version (no owner tracking)
268+ return create_stream_handle_ref (stream);
269+ }
270+ Py_INCREF (owner);
263271 auto box = std::shared_ptr<const StreamBox>(
264272 new StreamBox{stream},
265273 [owner](const StreamBox* b) {
266274 GILAcquireGuard gil;
267275 if (gil.acquired ()) {
268- Py_XDECREF (owner);
276+ Py_DECREF (owner);
269277 }
270278 delete b;
271279 }
@@ -293,7 +301,7 @@ struct EventBox {
293301};
294302} // namespace
295303
296- EventHandle create_event_handle (ContextHandle h_ctx, unsigned int flags) {
304+ EventHandle create_event_handle (ContextHandle h_ctx, unsigned int flags) noexcept {
297305 GILReleaseGuard gil;
298306 CUevent event;
299307 if (CUDA_SUCCESS != (err = p_cuEventCreate (&event, flags))) {
@@ -311,11 +319,11 @@ EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) {
311319 return EventHandle (box, &box->resource );
312320}
313321
314- EventHandle create_event_handle (unsigned int flags) {
322+ EventHandle create_event_handle_noctx (unsigned int flags) noexcept {
315323 return create_event_handle (ContextHandle{}, flags);
316324}
317325
318- EventHandle create_event_handle_ipc (const CUipcEventHandle& ipc_handle) {
326+ EventHandle create_event_handle_ipc (const CUipcEventHandle& ipc_handle) noexcept {
319327 GILReleaseGuard gil;
320328 CUevent event;
321329 if (CUDA_SUCCESS != (err = p_cuIpcOpenEventHandle (&event, ipc_handle))) {
@@ -373,7 +381,7 @@ static MemoryPoolHandle wrap_mempool_owned(CUmemoryPool pool) {
373381 return MemoryPoolHandle (box, &box->resource );
374382}
375383
376- MemoryPoolHandle create_mempool_handle (const CUmemPoolProps& props) {
384+ MemoryPoolHandle create_mempool_handle (const CUmemPoolProps& props) noexcept {
377385 GILReleaseGuard gil;
378386 CUmemoryPool pool;
379387 if (CUDA_SUCCESS != (err = p_cuMemPoolCreate (&pool, &props))) {
@@ -382,7 +390,7 @@ MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) {
382390 return wrap_mempool_owned (pool);
383391}
384392
385- MemoryPoolHandle create_mempool_handle_ref (CUmemoryPool pool) {
393+ MemoryPoolHandle create_mempool_handle_ref (CUmemoryPool pool) noexcept {
386394 auto box = std::make_shared<const MemoryPoolBox>(MemoryPoolBox{pool});
387395 return MemoryPoolHandle (box, &box->resource );
388396}
@@ -396,7 +404,7 @@ MemoryPoolHandle get_device_mempool(int device_id) noexcept {
396404 return create_mempool_handle_ref (pool);
397405}
398406
399- MemoryPoolHandle create_mempool_handle_ipc (int fd, CUmemAllocationHandleType handle_type) {
407+ MemoryPoolHandle create_mempool_handle_ipc (int fd, CUmemAllocationHandleType handle_type) noexcept {
400408 GILReleaseGuard gil;
401409 CUmemoryPool pool;
402410 auto handle_ptr = reinterpret_cast <void *>(static_cast <uintptr_t >(fd));
@@ -432,15 +440,15 @@ static DevicePtrBox* get_box(const DevicePtrHandle& h) {
432440 );
433441}
434442
435- StreamHandle deallocation_stream (const DevicePtrHandle& h) {
443+ StreamHandle deallocation_stream (const DevicePtrHandle& h) noexcept {
436444 return get_box (h)->h_stream ;
437445}
438446
439- void set_deallocation_stream (const DevicePtrHandle& h, StreamHandle h_stream) {
447+ void set_deallocation_stream (const DevicePtrHandle& h, StreamHandle h_stream) noexcept {
440448 get_box (h)->h_stream = std::move (h_stream);
441449}
442450
443- DevicePtrHandle deviceptr_alloc_from_pool (size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) {
451+ DevicePtrHandle deviceptr_alloc_from_pool (size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) noexcept {
444452 GILReleaseGuard gil;
445453 CUdeviceptr ptr;
446454 if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync (&ptr, size, *h_pool, as_cu (h_stream)))) {
@@ -458,7 +466,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool,
458466 return DevicePtrHandle (box, &box->resource );
459467}
460468
461- DevicePtrHandle deviceptr_alloc_async (size_t size, StreamHandle h_stream) {
469+ DevicePtrHandle deviceptr_alloc_async (size_t size, StreamHandle h_stream) noexcept {
462470 GILReleaseGuard gil;
463471 CUdeviceptr ptr;
464472 if (CUDA_SUCCESS != (err = p_cuMemAllocAsync (&ptr, size, as_cu (h_stream)))) {
@@ -476,7 +484,7 @@ DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) {
476484 return DevicePtrHandle (box, &box->resource );
477485}
478486
479- DevicePtrHandle deviceptr_alloc (size_t size) {
487+ DevicePtrHandle deviceptr_alloc (size_t size) noexcept {
480488 GILReleaseGuard gil;
481489 CUdeviceptr ptr;
482490 if (CUDA_SUCCESS != (err = p_cuMemAlloc (&ptr, size))) {
@@ -494,7 +502,7 @@ DevicePtrHandle deviceptr_alloc(size_t size) {
494502 return DevicePtrHandle (box, &box->resource );
495503}
496504
497- DevicePtrHandle deviceptr_alloc_host (size_t size) {
505+ DevicePtrHandle deviceptr_alloc_host (size_t size) noexcept {
498506 GILReleaseGuard gil;
499507 void * ptr;
500508 if (CUDA_SUCCESS != (err = p_cuMemAllocHost (&ptr, size))) {
@@ -512,15 +520,21 @@ DevicePtrHandle deviceptr_alloc_host(size_t size) {
512520 return DevicePtrHandle (box, &box->resource );
513521}
514522
515- DevicePtrHandle deviceptr_create_ref (CUdeviceptr ptr) {
523+ DevicePtrHandle deviceptr_create_ref (CUdeviceptr ptr) noexcept {
516524 auto box = std::make_shared<DevicePtrBox>(DevicePtrBox{ptr, StreamHandle{}});
517525 return DevicePtrHandle (box, &box->resource );
518526}
519527
520- DevicePtrHandle deviceptr_create_with_owner (CUdeviceptr ptr, PyObject* owner) {
528+ DevicePtrHandle deviceptr_create_with_owner (CUdeviceptr ptr, PyObject* owner) noexcept {
521529 if (!owner) {
522530 return deviceptr_create_ref (ptr);
523531 }
532+ // GIL required when owner is provided
533+ GILAcquireGuard gil;
534+ if (!gil.acquired ()) {
535+ // Python finalizing - fall back to ref version (no owner tracking)
536+ return deviceptr_create_ref (ptr);
537+ }
524538 Py_INCREF (owner);
525539 auto box = std::shared_ptr<DevicePtrBox>(
526540 new DevicePtrBox{ptr, StreamHandle{}},
@@ -593,7 +607,7 @@ struct ExportDataKeyHash {
593607static std::mutex ipc_ptr_cache_mutex;
594608static std::unordered_map<ExportDataKey, std::weak_ptr<DevicePtrBox>, ExportDataKeyHash> ipc_ptr_cache;
595609
596- DevicePtrHandle deviceptr_import_ipc (MemoryPoolHandle h_pool, const void * export_data, StreamHandle h_stream) {
610+ DevicePtrHandle deviceptr_import_ipc (MemoryPoolHandle h_pool, const void * export_data, StreamHandle h_stream) noexcept {
597611 auto data = const_cast <CUmemPoolPtrExportData*>(
598612 reinterpret_cast <const CUmemPoolPtrExportData*>(export_data));
599613
@@ -661,60 +675,4 @@ DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export
661675 }
662676}
663677
664- // ============================================================================
665- // Capsule C++ API table
666- // ============================================================================
667-
668- const ResourceHandlesCxxApiV1* get_resource_handles_cxx_api_v1 () noexcept {
669- static const ResourceHandlesCxxApiV1 table = []() {
670- ResourceHandlesCxxApiV1 t{};
671- t.abi_version = RESOURCE_HANDLES_CXX_API_VERSION;
672- t.struct_size = static_cast <std::uint32_t >(sizeof (ResourceHandlesCxxApiV1));
673-
674- // Error handling
675- t.get_last_error = &get_last_error;
676- t.peek_last_error = &peek_last_error;
677- t.clear_last_error = &clear_last_error;
678-
679- // Context
680- t.create_context_handle_ref = &create_context_handle_ref;
681- t.get_primary_context = &get_primary_context;
682- t.get_current_context = &get_current_context;
683-
684- // Stream
685- t.create_stream_handle = &create_stream_handle;
686- t.create_stream_handle_ref = &create_stream_handle_ref;
687- t.create_stream_handle_with_owner = &create_stream_handle_with_owner;
688- t.get_legacy_stream = &get_legacy_stream;
689- t.get_per_thread_stream = &get_per_thread_stream;
690-
691- // Event (resolve overloads explicitly)
692- t.create_event_handle =
693- static_cast <EventHandle (*)(ContextHandle, unsigned int )>(&create_event_handle);
694- t.create_event_handle_noctx =
695- static_cast <EventHandle (*)(unsigned int )>(&create_event_handle);
696- t.create_event_handle_ipc = &create_event_handle_ipc;
697-
698- // Memory pool
699- t.create_mempool_handle = &create_mempool_handle;
700- t.create_mempool_handle_ref = &create_mempool_handle_ref;
701- t.get_device_mempool = &get_device_mempool;
702- t.create_mempool_handle_ipc = &create_mempool_handle_ipc;
703-
704- // Device pointer
705- t.deviceptr_alloc_from_pool = &deviceptr_alloc_from_pool;
706- t.deviceptr_alloc_async = &deviceptr_alloc_async;
707- t.deviceptr_alloc = &deviceptr_alloc;
708- t.deviceptr_alloc_host = &deviceptr_alloc_host;
709- t.deviceptr_create_ref = &deviceptr_create_ref;
710- t.deviceptr_create_with_owner = &deviceptr_create_with_owner;
711- t.deviceptr_import_ipc = &deviceptr_import_ipc;
712- t.deallocation_stream = &deallocation_stream;
713- t.set_deallocation_stream = &set_deallocation_stream;
714-
715- return t;
716- }();
717- return &table;
718- }
719-
720678} // namespace cuda_core
0 commit comments