@@ -171,15 +171,15 @@ struct ContextBox {
171171};
172172} // namespace
173173
174- ContextHandle create_context_handle_ref (CUcontext ctx) noexcept {
174+ ContextHandle create_context_handle_ref (CUcontext ctx) {
175175 auto box = std::make_shared<const ContextBox>(ContextBox{ctx});
176176 return ContextHandle (box, &box->resource );
177177}
178178
179179// Thread-local cache of primary contexts indexed by device ID
180180static thread_local std::vector<ContextHandle> primary_context_cache;
181181
182- ContextHandle get_primary_context (int device_id) noexcept {
182+ ContextHandle get_primary_context (int device_id) {
183183 // Check thread-local cache
184184 if (static_cast <size_t >(device_id) < primary_context_cache.size ()) {
185185 if (auto cached = primary_context_cache[device_id]) {
@@ -212,7 +212,7 @@ ContextHandle get_primary_context(int device_id) noexcept {
212212 return h;
213213}
214214
215- ContextHandle get_current_context () noexcept {
215+ ContextHandle get_current_context () {
216216 GILReleaseGuard gil;
217217 CUcontext ctx = nullptr ;
218218 if (CUDA_SUCCESS != (err = p_cuCtxGetCurrent (&ctx))) {
@@ -234,7 +234,7 @@ struct StreamBox {
234234};
235235} // namespace
236236
237- StreamHandle create_stream_handle (ContextHandle h_ctx, unsigned int flags, int priority) noexcept {
237+ StreamHandle create_stream_handle (ContextHandle h_ctx, unsigned int flags, int priority) {
238238 GILReleaseGuard gil;
239239 CUstream stream;
240240 if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority (&stream, flags, priority))) {
@@ -252,12 +252,12 @@ StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int p
252252 return StreamHandle (box, &box->resource );
253253}
254254
255- StreamHandle create_stream_handle_ref (CUstream stream) noexcept {
255+ StreamHandle create_stream_handle_ref (CUstream stream) {
256256 auto box = std::make_shared<const StreamBox>(StreamBox{stream});
257257 return StreamHandle (box, &box->resource );
258258}
259259
260- StreamHandle create_stream_handle_with_owner (CUstream stream, PyObject* owner) noexcept {
260+ StreamHandle create_stream_handle_with_owner (CUstream stream, PyObject* owner) {
261261 if (!owner) {
262262 return create_stream_handle_ref (stream);
263263 }
@@ -281,12 +281,12 @@ StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) n
281281 return StreamHandle (box, &box->resource );
282282}
283283
284- StreamHandle get_legacy_stream () noexcept {
284+ StreamHandle get_legacy_stream () {
285285 static StreamHandle handle = create_stream_handle_ref (CU_STREAM_LEGACY);
286286 return handle;
287287}
288288
289- StreamHandle get_per_thread_stream () noexcept {
289+ StreamHandle get_per_thread_stream () {
290290 static StreamHandle handle = create_stream_handle_ref (CU_STREAM_PER_THREAD);
291291 return handle;
292292}
@@ -301,7 +301,7 @@ struct EventBox {
301301};
302302} // namespace
303303
304- EventHandle create_event_handle (ContextHandle h_ctx, unsigned int flags) noexcept {
304+ EventHandle create_event_handle (ContextHandle h_ctx, unsigned int flags) {
305305 GILReleaseGuard gil;
306306 CUevent event;
307307 if (CUDA_SUCCESS != (err = p_cuEventCreate (&event, flags))) {
@@ -319,11 +319,11 @@ EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) noexcep
319319 return EventHandle (box, &box->resource );
320320}
321321
322- EventHandle create_event_handle_noctx (unsigned int flags) noexcept {
322+ EventHandle create_event_handle_noctx (unsigned int flags) {
323323 return create_event_handle (ContextHandle{}, flags);
324324}
325325
326- EventHandle create_event_handle_ipc (const CUipcEventHandle& ipc_handle) noexcept {
326+ EventHandle create_event_handle_ipc (const CUipcEventHandle& ipc_handle) {
327327 GILReleaseGuard gil;
328328 CUevent event;
329329 if (CUDA_SUCCESS != (err = p_cuIpcOpenEventHandle (&event, ipc_handle))) {
@@ -353,19 +353,24 @@ struct MemoryPoolBox {
353353
354354// Helper to clear peer access before destroying a memory pool.
355355// Works around nvbug 5698116: recycled pool handles inherit peer access state.
356- static void clear_mempool_peer_access (CUmemoryPool pool) {
357- int device_count = 0 ;
358- if (p_cuDeviceGetCount (&device_count) != CUDA_SUCCESS || device_count <= 0 ) {
359- return ;
360- }
356+ // Must be noexcept since it's called from a shared_ptr deleter.
357+ static void clear_mempool_peer_access (CUmemoryPool pool) noexcept {
358+ try {
359+ int device_count = 0 ;
360+ if (p_cuDeviceGetCount (&device_count) != CUDA_SUCCESS || device_count <= 0 ) {
361+ return ;
362+ }
361363
362- std::vector<CUmemAccessDesc> clear_access (device_count);
363- for (int i = 0 ; i < device_count; ++i) {
364- clear_access[i].location .type = CU_MEM_LOCATION_TYPE_DEVICE;
365- clear_access[i].location .id = i;
366- clear_access[i].flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
364+ std::vector<CUmemAccessDesc> clear_access (device_count);
365+ for (int i = 0 ; i < device_count; ++i) {
366+ clear_access[i].location .type = CU_MEM_LOCATION_TYPE_DEVICE;
367+ clear_access[i].location .id = i;
368+ clear_access[i].flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
369+ }
370+ p_cuMemPoolSetAccess (pool, clear_access.data (), device_count); // Best effort
371+ } catch (...) {
372+ // Swallow exceptions - this is best-effort cleanup in destructor context
367373 }
368- p_cuMemPoolSetAccess (pool, clear_access.data (), device_count); // Best effort
369374}
370375
371376static MemoryPoolHandle wrap_mempool_owned (CUmemoryPool pool) {
@@ -381,7 +386,7 @@ static MemoryPoolHandle wrap_mempool_owned(CUmemoryPool pool) {
381386 return MemoryPoolHandle (box, &box->resource );
382387}
383388
384- MemoryPoolHandle create_mempool_handle (const CUmemPoolProps& props) noexcept {
389+ MemoryPoolHandle create_mempool_handle (const CUmemPoolProps& props) {
385390 GILReleaseGuard gil;
386391 CUmemoryPool pool;
387392 if (CUDA_SUCCESS != (err = p_cuMemPoolCreate (&pool, &props))) {
@@ -390,12 +395,12 @@ MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) noexcept {
390395 return wrap_mempool_owned (pool);
391396}
392397
393- MemoryPoolHandle create_mempool_handle_ref (CUmemoryPool pool) noexcept {
398+ MemoryPoolHandle create_mempool_handle_ref (CUmemoryPool pool) {
394399 auto box = std::make_shared<const MemoryPoolBox>(MemoryPoolBox{pool});
395400 return MemoryPoolHandle (box, &box->resource );
396401}
397402
398- MemoryPoolHandle get_device_mempool (int device_id) noexcept {
403+ MemoryPoolHandle get_device_mempool (int device_id) {
399404 GILReleaseGuard gil;
400405 CUmemoryPool pool;
401406 if (CUDA_SUCCESS != (err = p_cuDeviceGetMemPool (&pool, device_id))) {
@@ -404,7 +409,7 @@ MemoryPoolHandle get_device_mempool(int device_id) noexcept {
404409 return create_mempool_handle_ref (pool);
405410}
406411
407- MemoryPoolHandle create_mempool_handle_ipc (int fd, CUmemAllocationHandleType handle_type) noexcept {
412+ MemoryPoolHandle create_mempool_handle_ipc (int fd, CUmemAllocationHandleType handle_type) {
408413 GILReleaseGuard gil;
409414 CUmemoryPool pool;
410415 auto handle_ptr = reinterpret_cast <void *>(static_cast <uintptr_t >(fd));
@@ -448,7 +453,7 @@ void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) no
448453 get_box (h)->h_stream = std::move (h_stream);
449454}
450455
451- DevicePtrHandle deviceptr_alloc_from_pool (size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) noexcept {
456+ DevicePtrHandle deviceptr_alloc_from_pool (size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) {
452457 GILReleaseGuard gil;
453458 CUdeviceptr ptr;
454459 if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync (&ptr, size, *h_pool, as_cu (h_stream)))) {
@@ -466,7 +471,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool,
466471 return DevicePtrHandle (box, &box->resource );
467472}
468473
469- DevicePtrHandle deviceptr_alloc_async (size_t size, StreamHandle h_stream) noexcept {
474+ DevicePtrHandle deviceptr_alloc_async (size_t size, StreamHandle h_stream) {
470475 GILReleaseGuard gil;
471476 CUdeviceptr ptr;
472477 if (CUDA_SUCCESS != (err = p_cuMemAllocAsync (&ptr, size, as_cu (h_stream)))) {
@@ -484,7 +489,7 @@ DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) noexce
484489 return DevicePtrHandle (box, &box->resource );
485490}
486491
487- DevicePtrHandle deviceptr_alloc (size_t size) noexcept {
492+ DevicePtrHandle deviceptr_alloc (size_t size) {
488493 GILReleaseGuard gil;
489494 CUdeviceptr ptr;
490495 if (CUDA_SUCCESS != (err = p_cuMemAlloc (&ptr, size))) {
@@ -502,7 +507,7 @@ DevicePtrHandle deviceptr_alloc(size_t size) noexcept {
502507 return DevicePtrHandle (box, &box->resource );
503508}
504509
505- DevicePtrHandle deviceptr_alloc_host (size_t size) noexcept {
510+ DevicePtrHandle deviceptr_alloc_host (size_t size) {
506511 GILReleaseGuard gil;
507512 void * ptr;
508513 if (CUDA_SUCCESS != (err = p_cuMemAllocHost (&ptr, size))) {
@@ -520,12 +525,12 @@ DevicePtrHandle deviceptr_alloc_host(size_t size) noexcept {
520525 return DevicePtrHandle (box, &box->resource );
521526}
522527
523- DevicePtrHandle deviceptr_create_ref (CUdeviceptr ptr) noexcept {
528+ DevicePtrHandle deviceptr_create_ref (CUdeviceptr ptr) {
524529 auto box = std::make_shared<DevicePtrBox>(DevicePtrBox{ptr, StreamHandle{}});
525530 return DevicePtrHandle (box, &box->resource );
526531}
527532
528- DevicePtrHandle deviceptr_create_with_owner (CUdeviceptr ptr, PyObject* owner) noexcept {
533+ DevicePtrHandle deviceptr_create_with_owner (CUdeviceptr ptr, PyObject* owner) {
529534 if (!owner) {
530535 return deviceptr_create_ref (ptr);
531536 }
@@ -607,7 +612,7 @@ struct ExportDataKeyHash {
607612static std::mutex ipc_ptr_cache_mutex;
608613static std::unordered_map<ExportDataKey, std::weak_ptr<DevicePtrBox>, ExportDataKeyHash> ipc_ptr_cache;
609614
610- DevicePtrHandle deviceptr_import_ipc (MemoryPoolHandle h_pool, const void * export_data, StreamHandle h_stream) noexcept {
615+ DevicePtrHandle deviceptr_import_ipc (MemoryPoolHandle h_pool, const void * export_data, StreamHandle h_stream) {
611616 auto data = const_cast <CUmemPoolPtrExportData*>(
612617 reinterpret_cast <const CUmemPoolPtrExportData*>(export_data));
613618
@@ -639,14 +644,16 @@ DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export
639644 new DevicePtrBox{ptr, h_stream},
640645 [h_pool, key](DevicePtrBox* b) {
641646 GILReleaseGuard gil;
642- {
647+ try {
643648 std::lock_guard<std::mutex> lock (ipc_ptr_cache_mutex);
644649 // Only erase if expired - avoids race where another thread
645650 // replaced the entry with a new import before we acquired the lock.
646651 auto it = ipc_ptr_cache.find (key);
647652 if (it != ipc_ptr_cache.end () && it->second .expired ()) {
648653 ipc_ptr_cache.erase (it);
649654 }
655+ } catch (...) {
656+ // Cache cleanup is best-effort - swallow exceptions in destructor context
650657 }
651658 p_cuMemFreeAsync (b->resource , as_cu (b->h_stream ));
652659 delete b;
0 commit comments