Skip to content

Commit 9789512

Browse files
authored
ggml-cuda: flush legacy pool on OOM and retry (ggml-org#22155)
* ggml-cuda: flush legacy pool on OOM and retry Signed-off-by: 梁厚宏 <2695316095@qq.com> * Address review comments: add explicit sync, update destructor, clean up MUSA macros Signed-off-by: 梁厚宏 <2695316095@qq.com> --------- Signed-off-by: 梁厚宏 <2695316095@qq.com>
1 parent 86f8daa commit 9789512

3 files changed

Lines changed: 23 additions & 2 deletions

File tree

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -368,15 +368,21 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
368368
}
369369

370370
~ggml_cuda_pool_leg() {
371+
clear_pool();
372+
GGML_ASSERT(pool_size == 0);
373+
}
374+
375+
void clear_pool() {
371376
ggml_cuda_set_device(device);
372377
for (int i = 0; i < MAX_BUFFERS; ++i) {
373378
ggml_cuda_buffer & b = buffer_pool[i];
374379
if (b.ptr != nullptr) {
375380
CUDA_CHECK(cudaFree(b.ptr));
376381
pool_size -= b.size;
382+
b.ptr = nullptr;
383+
b.size = 0;
377384
}
378385
}
379-
GGML_ASSERT(pool_size == 0);
380386
}
381387

382388
void * alloc(size_t size, size_t * actual_size) override {
@@ -421,7 +427,20 @@ struct ggml_cuda_pool_leg : public ggml_cuda_pool {
421427
size_t look_ahead_size = (size_t) (1.05 * size);
422428
look_ahead_size = 256 * ((look_ahead_size + 255)/256);
423429
ggml_cuda_set_device(device);
424-
CUDA_CHECK(ggml_cuda_device_malloc(&ptr, look_ahead_size, device));
430+
cudaError_t err = ggml_cuda_device_malloc(&ptr, look_ahead_size, device);
431+
if (err == cudaErrorMemoryAllocation) {
432+
(void)cudaGetLastError();
433+
const size_t cached_bytes = pool_size;
434+
GGML_LOG_DEBUG(GGML_CUDA_NAME " pool[%d]: alloc of %.2f MiB failed, flushing %.2f MiB of cached buffers and retrying\n",
435+
device, look_ahead_size/1024.0/1024.0, cached_bytes/1024.0/1024.0);
436+
CUDA_CHECK(cudaDeviceSynchronize());
437+
clear_pool();
438+
err = ggml_cuda_device_malloc(&ptr, look_ahead_size, device);
439+
if (err == cudaSuccess) {
440+
GGML_LOG_DEBUG(GGML_CUDA_NAME " pool[%d]: retry succeeded\n", device);
441+
}
442+
}
443+
CUDA_CHECK(err);
425444
*actual_size = look_ahead_size;
426445
pool_size += look_ahead_size;
427446
#ifdef DEBUG_CUDA_MALLOC

ggml/src/ggml-cuda/vendors/hip.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#define cudaDeviceProp hipDeviceProp_t
5959
#define cudaDeviceSynchronize hipDeviceSynchronize
6060
#define cudaError_t hipError_t
61+
#define cudaErrorMemoryAllocation hipErrorOutOfMemory
6162
#define cudaErrorPeerAccessAlreadyEnabled hipErrorPeerAccessAlreadyEnabled
6263
#define cudaErrorPeerAccessNotEnabled hipErrorPeerAccessNotEnabled
6364
#define cudaEventCreateWithFlags hipEventCreateWithFlags

ggml/src/ggml-cuda/vendors/musa.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#define cudaDeviceProp musaDeviceProp
4343
#define cudaDeviceSynchronize musaDeviceSynchronize
4444
#define cudaError_t musaError_t
45+
#define cudaErrorMemoryAllocation musaErrorMemoryAllocation
4546
#define cudaErrorPeerAccessAlreadyEnabled musaErrorPeerAccessAlreadyEnabled
4647
#define cudaErrorPeerAccessNotEnabled musaErrorPeerAccessNotEnabled
4748
#define cudaEventCreateWithFlags musaEventCreateWithFlags

0 commit comments

Comments
 (0)