Skip to content

Commit b8261e5

Browse files
authored
fix: remove incorrect noexcept from resource handle functions (#1489)
Functions that allocate memory (via new, std::make_shared, or container operations) can throw std::bad_alloc. Marking them noexcept causes std::terminate on OOM instead of allowing graceful error handling. Changes: - Remove noexcept from 22 C++ functions that can throw - Keep noexcept on 5 functions that truly cannot throw: get_last_error, peek_last_error, clear_last_error, deallocation_stream, set_deallocation_stream - Update Cython .pxd/.pyx to use "nogil except+" for throwing functions - Wrap clear_mempool_peer_access in try-catch (std::vector can throw) - Wrap IPC cache deleter operations in try-catch (std::lock_guard can throw) This allows C++ exceptions to be translated to Python exceptions by Cython, enabling proper error handling instead of process termination. Based on feedback from Leo Fang, Lawrence Mitchell, and Vyas Ramasubramani.
1 parent bb3ee2c commit b8261e5

4 files changed

Lines changed: 107 additions & 100 deletions

File tree

cuda_core/cuda/core/_cpp/resource_handles.cpp

Lines changed: 41 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -171,15 +171,15 @@ struct ContextBox {
171171
};
172172
} // namespace
173173

174-
ContextHandle create_context_handle_ref(CUcontext ctx) noexcept {
174+
ContextHandle create_context_handle_ref(CUcontext ctx) {
175175
auto box = std::make_shared<const ContextBox>(ContextBox{ctx});
176176
return ContextHandle(box, &box->resource);
177177
}
178178

179179
// Thread-local cache of primary contexts indexed by device ID
180180
static thread_local std::vector<ContextHandle> primary_context_cache;
181181

182-
ContextHandle get_primary_context(int device_id) noexcept {
182+
ContextHandle get_primary_context(int device_id) {
183183
// Check thread-local cache
184184
if (static_cast<size_t>(device_id) < primary_context_cache.size()) {
185185
if (auto cached = primary_context_cache[device_id]) {
@@ -212,7 +212,7 @@ ContextHandle get_primary_context(int device_id) noexcept {
212212
return h;
213213
}
214214

215-
ContextHandle get_current_context() noexcept {
215+
ContextHandle get_current_context() {
216216
GILReleaseGuard gil;
217217
CUcontext ctx = nullptr;
218218
if (CUDA_SUCCESS != (err = p_cuCtxGetCurrent(&ctx))) {
@@ -234,7 +234,7 @@ struct StreamBox {
234234
};
235235
} // namespace
236236

237-
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) noexcept {
237+
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) {
238238
GILReleaseGuard gil;
239239
CUstream stream;
240240
if (CUDA_SUCCESS != (err = p_cuStreamCreateWithPriority(&stream, flags, priority))) {
@@ -252,12 +252,12 @@ StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int p
252252
return StreamHandle(box, &box->resource);
253253
}
254254

255-
StreamHandle create_stream_handle_ref(CUstream stream) noexcept {
255+
StreamHandle create_stream_handle_ref(CUstream stream) {
256256
auto box = std::make_shared<const StreamBox>(StreamBox{stream});
257257
return StreamHandle(box, &box->resource);
258258
}
259259

260-
StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) noexcept {
260+
StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) {
261261
if (!owner) {
262262
return create_stream_handle_ref(stream);
263263
}
@@ -281,12 +281,12 @@ StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) n
281281
return StreamHandle(box, &box->resource);
282282
}
283283

284-
StreamHandle get_legacy_stream() noexcept {
284+
StreamHandle get_legacy_stream() {
285285
static StreamHandle handle = create_stream_handle_ref(CU_STREAM_LEGACY);
286286
return handle;
287287
}
288288

289-
StreamHandle get_per_thread_stream() noexcept {
289+
StreamHandle get_per_thread_stream() {
290290
static StreamHandle handle = create_stream_handle_ref(CU_STREAM_PER_THREAD);
291291
return handle;
292292
}
@@ -301,7 +301,7 @@ struct EventBox {
301301
};
302302
} // namespace
303303

304-
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) noexcept {
304+
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) {
305305
GILReleaseGuard gil;
306306
CUevent event;
307307
if (CUDA_SUCCESS != (err = p_cuEventCreate(&event, flags))) {
@@ -319,11 +319,11 @@ EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) noexcep
319319
return EventHandle(box, &box->resource);
320320
}
321321

322-
EventHandle create_event_handle_noctx(unsigned int flags) noexcept {
322+
EventHandle create_event_handle_noctx(unsigned int flags) {
323323
return create_event_handle(ContextHandle{}, flags);
324324
}
325325

326-
EventHandle create_event_handle_ipc(const CUipcEventHandle& ipc_handle) noexcept {
326+
EventHandle create_event_handle_ipc(const CUipcEventHandle& ipc_handle) {
327327
GILReleaseGuard gil;
328328
CUevent event;
329329
if (CUDA_SUCCESS != (err = p_cuIpcOpenEventHandle(&event, ipc_handle))) {
@@ -353,19 +353,24 @@ struct MemoryPoolBox {
353353

354354
// Helper to clear peer access before destroying a memory pool.
355355
// Works around nvbug 5698116: recycled pool handles inherit peer access state.
356-
static void clear_mempool_peer_access(CUmemoryPool pool) {
357-
int device_count = 0;
358-
if (p_cuDeviceGetCount(&device_count) != CUDA_SUCCESS || device_count <= 0) {
359-
return;
360-
}
356+
// Must be noexcept since it's called from a shared_ptr deleter.
357+
static void clear_mempool_peer_access(CUmemoryPool pool) noexcept {
358+
try {
359+
int device_count = 0;
360+
if (p_cuDeviceGetCount(&device_count) != CUDA_SUCCESS || device_count <= 0) {
361+
return;
362+
}
361363

362-
std::vector<CUmemAccessDesc> clear_access(device_count);
363-
for (int i = 0; i < device_count; ++i) {
364-
clear_access[i].location.type = CU_MEM_LOCATION_TYPE_DEVICE;
365-
clear_access[i].location.id = i;
366-
clear_access[i].flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
364+
std::vector<CUmemAccessDesc> clear_access(device_count);
365+
for (int i = 0; i < device_count; ++i) {
366+
clear_access[i].location.type = CU_MEM_LOCATION_TYPE_DEVICE;
367+
clear_access[i].location.id = i;
368+
clear_access[i].flags = CU_MEM_ACCESS_FLAGS_PROT_NONE;
369+
}
370+
p_cuMemPoolSetAccess(pool, clear_access.data(), device_count); // Best effort
371+
} catch (...) {
372+
// Swallow exceptions - this is best-effort cleanup in destructor context
367373
}
368-
p_cuMemPoolSetAccess(pool, clear_access.data(), device_count); // Best effort
369374
}
370375

371376
static MemoryPoolHandle wrap_mempool_owned(CUmemoryPool pool) {
@@ -381,7 +386,7 @@ static MemoryPoolHandle wrap_mempool_owned(CUmemoryPool pool) {
381386
return MemoryPoolHandle(box, &box->resource);
382387
}
383388

384-
MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) noexcept {
389+
MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) {
385390
GILReleaseGuard gil;
386391
CUmemoryPool pool;
387392
if (CUDA_SUCCESS != (err = p_cuMemPoolCreate(&pool, &props))) {
@@ -390,12 +395,12 @@ MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) noexcept {
390395
return wrap_mempool_owned(pool);
391396
}
392397

393-
MemoryPoolHandle create_mempool_handle_ref(CUmemoryPool pool) noexcept {
398+
MemoryPoolHandle create_mempool_handle_ref(CUmemoryPool pool) {
394399
auto box = std::make_shared<const MemoryPoolBox>(MemoryPoolBox{pool});
395400
return MemoryPoolHandle(box, &box->resource);
396401
}
397402

398-
MemoryPoolHandle get_device_mempool(int device_id) noexcept {
403+
MemoryPoolHandle get_device_mempool(int device_id) {
399404
GILReleaseGuard gil;
400405
CUmemoryPool pool;
401406
if (CUDA_SUCCESS != (err = p_cuDeviceGetMemPool(&pool, device_id))) {
@@ -404,7 +409,7 @@ MemoryPoolHandle get_device_mempool(int device_id) noexcept {
404409
return create_mempool_handle_ref(pool);
405410
}
406411

407-
MemoryPoolHandle create_mempool_handle_ipc(int fd, CUmemAllocationHandleType handle_type) noexcept {
412+
MemoryPoolHandle create_mempool_handle_ipc(int fd, CUmemAllocationHandleType handle_type) {
408413
GILReleaseGuard gil;
409414
CUmemoryPool pool;
410415
auto handle_ptr = reinterpret_cast<void*>(static_cast<uintptr_t>(fd));
@@ -448,7 +453,7 @@ void set_deallocation_stream(const DevicePtrHandle& h, StreamHandle h_stream) no
448453
get_box(h)->h_stream = std::move(h_stream);
449454
}
450455

451-
DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) noexcept {
456+
DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool, StreamHandle h_stream) {
452457
GILReleaseGuard gil;
453458
CUdeviceptr ptr;
454459
if (CUDA_SUCCESS != (err = p_cuMemAllocFromPoolAsync(&ptr, size, *h_pool, as_cu(h_stream)))) {
@@ -466,7 +471,7 @@ DevicePtrHandle deviceptr_alloc_from_pool(size_t size, MemoryPoolHandle h_pool,
466471
return DevicePtrHandle(box, &box->resource);
467472
}
468473

469-
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) noexcept {
474+
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) {
470475
GILReleaseGuard gil;
471476
CUdeviceptr ptr;
472477
if (CUDA_SUCCESS != (err = p_cuMemAllocAsync(&ptr, size, as_cu(h_stream)))) {
@@ -484,7 +489,7 @@ DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) noexce
484489
return DevicePtrHandle(box, &box->resource);
485490
}
486491

487-
DevicePtrHandle deviceptr_alloc(size_t size) noexcept {
492+
DevicePtrHandle deviceptr_alloc(size_t size) {
488493
GILReleaseGuard gil;
489494
CUdeviceptr ptr;
490495
if (CUDA_SUCCESS != (err = p_cuMemAlloc(&ptr, size))) {
@@ -502,7 +507,7 @@ DevicePtrHandle deviceptr_alloc(size_t size) noexcept {
502507
return DevicePtrHandle(box, &box->resource);
503508
}
504509

505-
DevicePtrHandle deviceptr_alloc_host(size_t size) noexcept {
510+
DevicePtrHandle deviceptr_alloc_host(size_t size) {
506511
GILReleaseGuard gil;
507512
void* ptr;
508513
if (CUDA_SUCCESS != (err = p_cuMemAllocHost(&ptr, size))) {
@@ -520,12 +525,12 @@ DevicePtrHandle deviceptr_alloc_host(size_t size) noexcept {
520525
return DevicePtrHandle(box, &box->resource);
521526
}
522527

523-
DevicePtrHandle deviceptr_create_ref(CUdeviceptr ptr) noexcept {
528+
DevicePtrHandle deviceptr_create_ref(CUdeviceptr ptr) {
524529
auto box = std::make_shared<DevicePtrBox>(DevicePtrBox{ptr, StreamHandle{}});
525530
return DevicePtrHandle(box, &box->resource);
526531
}
527532

528-
DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner) noexcept {
533+
DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner) {
529534
if (!owner) {
530535
return deviceptr_create_ref(ptr);
531536
}
@@ -607,7 +612,7 @@ struct ExportDataKeyHash {
607612
static std::mutex ipc_ptr_cache_mutex;
608613
static std::unordered_map<ExportDataKey, std::weak_ptr<DevicePtrBox>, ExportDataKeyHash> ipc_ptr_cache;
609614

610-
DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) noexcept {
615+
DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export_data, StreamHandle h_stream) {
611616
auto data = const_cast<CUmemPoolPtrExportData*>(
612617
reinterpret_cast<const CUmemPoolPtrExportData*>(export_data));
613618

@@ -639,14 +644,16 @@ DevicePtrHandle deviceptr_import_ipc(MemoryPoolHandle h_pool, const void* export
639644
new DevicePtrBox{ptr, h_stream},
640645
[h_pool, key](DevicePtrBox* b) {
641646
GILReleaseGuard gil;
642-
{
647+
try {
643648
std::lock_guard<std::mutex> lock(ipc_ptr_cache_mutex);
644649
// Only erase if expired - avoids race where another thread
645650
// replaced the entry with a new import before we acquired the lock.
646651
auto it = ipc_ptr_cache.find(key);
647652
if (it != ipc_ptr_cache.end() && it->second.expired()) {
648653
ipc_ptr_cache.erase(it);
649654
}
655+
} catch (...) {
656+
// Cache cleanup is best-effort - swallow exceptions in destructor context
650657
}
651658
p_cuMemFreeAsync(b->resource, as_cu(b->h_stream));
652659
delete b;

cuda_core/cuda/core/_cpp/resource_handles.hpp

Lines changed: 22 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -75,15 +75,15 @@ using MemoryPoolHandle = std::shared_ptr<const CUmemoryPool>;
7575
// ============================================================================
7676

7777
// Function to create a non-owning context handle (references existing context).
78-
ContextHandle create_context_handle_ref(CUcontext ctx) noexcept;
78+
ContextHandle create_context_handle_ref(CUcontext ctx);
7979

8080
// Get handle to the primary context for a device (with thread-local caching)
8181
// Returns empty handle on error (caller must check)
82-
ContextHandle get_primary_context(int device_id) noexcept;
82+
ContextHandle get_primary_context(int device_id);
8383

8484
// Get handle to the current CUDA context
8585
// Returns empty handle if no context is current (caller must check)
86-
ContextHandle get_current_context() noexcept;
86+
ContextHandle get_current_context();
8787

8888
// ============================================================================
8989
// Stream handle functions
@@ -93,26 +93,26 @@ ContextHandle get_current_context() noexcept;
9393
// The stream structurally depends on the provided context handle.
9494
// When the last reference is released, cuStreamDestroy is called automatically.
9595
// Returns empty handle on error (caller must check).
96-
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority) noexcept;
96+
StreamHandle create_stream_handle(ContextHandle h_ctx, unsigned int flags, int priority);
9797

9898
// Create a non-owning stream handle (references existing stream).
9999
// Use for borrowed streams (from foreign code) or built-in streams.
100100
// The stream will NOT be destroyed when the handle is released.
101101
// Caller is responsible for keeping the stream's context alive.
102-
StreamHandle create_stream_handle_ref(CUstream stream) noexcept;
102+
StreamHandle create_stream_handle_ref(CUstream stream);
103103

104104
// Create a non-owning stream handle that prevents a Python owner from being GC'd.
105105
// The owner's refcount is incremented; decremented when handle is released.
106106
// The owner is responsible for keeping the stream's context alive.
107-
StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner) noexcept;
107+
StreamHandle create_stream_handle_with_owner(CUstream stream, PyObject* owner);
108108

109109
// Get non-owning handle to the legacy default stream (CU_STREAM_LEGACY)
110110
// Note: Legacy stream has no specific context dependency.
111-
StreamHandle get_legacy_stream() noexcept;
111+
StreamHandle get_legacy_stream();
112112

113113
// Get non-owning handle to the per-thread default stream (CU_STREAM_PER_THREAD)
114114
// Note: Per-thread stream has no specific context dependency.
115-
StreamHandle get_per_thread_stream() noexcept;
115+
StreamHandle get_per_thread_stream();
116116

117117
// ============================================================================
118118
// Event handle functions
@@ -122,19 +122,19 @@ StreamHandle get_per_thread_stream() noexcept;
122122
// The event structurally depends on the provided context handle.
123123
// When the last reference is released, cuEventDestroy is called automatically.
124124
// Returns empty handle on error (caller must check).
125-
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags) noexcept;
125+
EventHandle create_event_handle(ContextHandle h_ctx, unsigned int flags);
126126

127127
// Create an owning event handle without context dependency.
128128
// Use for temporary events that are created and destroyed in the same scope.
129129
// When the last reference is released, cuEventDestroy is called automatically.
130130
// Returns empty handle on error (caller must check).
131-
EventHandle create_event_handle_noctx(unsigned int flags) noexcept;
131+
EventHandle create_event_handle_noctx(unsigned int flags);
132132

133133
// Create an owning event handle from an IPC handle.
134134
// The originating process owns the event and its context.
135135
// When the last reference is released, cuEventDestroy is called automatically.
136136
// Returns empty handle on error (caller must check).
137-
EventHandle create_event_handle_ipc(const CUipcEventHandle& ipc_handle) noexcept;
137+
EventHandle create_event_handle_ipc(const CUipcEventHandle& ipc_handle);
138138

139139
// ============================================================================
140140
// Memory pool handle functions
@@ -144,22 +144,22 @@ EventHandle create_event_handle_ipc(const CUipcEventHandle& ipc_handle) noexcept
144144
// Memory pools are device-scoped (not context-scoped).
145145
// When the last reference is released, cuMemPoolDestroy is called automatically.
146146
// Returns empty handle on error (caller must check).
147-
MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props) noexcept;
147+
MemoryPoolHandle create_mempool_handle(const CUmemPoolProps& props);
148148

149149
// Create a non-owning memory pool handle (references existing pool).
150150
// Use for device default/current pools that are managed by the driver.
151151
// The pool will NOT be destroyed when the handle is released.
152-
MemoryPoolHandle create_mempool_handle_ref(CUmemoryPool pool) noexcept;
152+
MemoryPoolHandle create_mempool_handle_ref(CUmemoryPool pool);
153153

154154
// Get non-owning handle to the current memory pool for a device.
155155
// Returns empty handle on error (caller must check).
156-
MemoryPoolHandle get_device_mempool(int device_id) noexcept;
156+
MemoryPoolHandle get_device_mempool(int device_id);
157157

158158
// Create an owning memory pool handle from an IPC import.
159159
// The file descriptor is NOT owned by this handle (caller manages FD separately).
160160
// When the last reference is released, cuMemPoolDestroy is called automatically.
161161
// Returns empty handle on error (caller must check).
162-
MemoryPoolHandle create_mempool_handle_ipc(int fd, CUmemAllocationHandleType handle_type) noexcept;
162+
MemoryPoolHandle create_mempool_handle_ipc(int fd, CUmemAllocationHandleType handle_type);
163163

164164
// ============================================================================
165165
// Device pointer handle functions
@@ -174,33 +174,33 @@ using DevicePtrHandle = std::shared_ptr<const CUdeviceptr>;
174174
DevicePtrHandle deviceptr_alloc_from_pool(
175175
size_t size,
176176
MemoryPoolHandle h_pool,
177-
StreamHandle h_stream) noexcept;
177+
StreamHandle h_stream);
178178

179179
// Allocate device memory asynchronously via cuMemAllocAsync.
180180
// When the last reference is released, cuMemFreeAsync is called on the stored stream.
181181
// Returns empty handle on error (caller must check).
182-
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream) noexcept;
182+
DevicePtrHandle deviceptr_alloc_async(size_t size, StreamHandle h_stream);
183183

184184
// Allocate device memory synchronously via cuMemAlloc.
185185
// When the last reference is released, cuMemFree is called.
186186
// Returns empty handle on error (caller must check).
187-
DevicePtrHandle deviceptr_alloc(size_t size) noexcept;
187+
DevicePtrHandle deviceptr_alloc(size_t size);
188188

189189
// Allocate pinned host memory via cuMemAllocHost.
190190
// When the last reference is released, cuMemFreeHost is called.
191191
// Returns empty handle on error (caller must check).
192-
DevicePtrHandle deviceptr_alloc_host(size_t size) noexcept;
192+
DevicePtrHandle deviceptr_alloc_host(size_t size);
193193

194194
// Create a non-owning device pointer handle (references existing pointer).
195195
// Use for foreign pointers (e.g., from external libraries).
196196
// The pointer will NOT be freed when the handle is released.
197-
DevicePtrHandle deviceptr_create_ref(CUdeviceptr ptr) noexcept;
197+
DevicePtrHandle deviceptr_create_ref(CUdeviceptr ptr);
198198

199199
// Create a non-owning device pointer handle that prevents a Python owner from being GC'd.
200200
// The owner's refcount is incremented; decremented when handle is released.
201201
// The pointer will NOT be freed when the handle is released.
202202
// If owner is nullptr, equivalent to deviceptr_create_ref.
203-
DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner) noexcept;
203+
DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner);
204204

205205
// Import a device pointer from IPC via cuMemPoolImportPointer.
206206
// When the last reference is released, cuMemFreeAsync is called on the stored stream.
@@ -209,7 +209,7 @@ DevicePtrHandle deviceptr_create_with_owner(CUdeviceptr ptr, PyObject* owner) no
209209
DevicePtrHandle deviceptr_import_ipc(
210210
MemoryPoolHandle h_pool,
211211
const void* export_data,
212-
StreamHandle h_stream) noexcept;
212+
StreamHandle h_stream);
213213

214214
// Access the deallocation stream for a device pointer handle (read-only).
215215
// For non-owning handles, the stream is not used but can still be accessed.

0 commit comments

Comments
 (0)