Skip to content

Commit d5aa244

Browse files
am17anArberSephirotheca
authored andcommitted
CUDA: use LRU based eviction for cuda graphs (ggml-org#21611)
* CUDA: use a ring-buffer for cuda graphs * bump limit to 128 * use LRU eviction * better naming * do periodic clean-up
1 parent 44336c1 commit d5aa244

1 file changed

Lines changed: 19 additions & 2 deletions

File tree

ggml/src/ggml-cuda/common.cuh

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1187,6 +1187,7 @@ struct ggml_cuda_graph {
11871187
bool disable_due_to_gpu_arch = false;
11881188
bool warmup_complete = false;
11891189
uint64_t uid = 0;
1190+
int64_t last_used_time = 0;
11901191
struct node_properties {
11911192
ggml_tensor node;
11921193
void * node_src_data_ptrs[GGML_MAX_SRC];
@@ -1368,12 +1369,28 @@ struct ggml_backend_cuda_context {
13681369
// when the computation is split across CPU/GPU (e.g., with --n-cpu-moe)
13691370
std::unordered_map<const void *, std::unique_ptr<ggml_cuda_graph>> cuda_graphs;
13701371

1372+
int64_t last_graph_eviction_sweep = 0;
1373+
13711374
ggml_cuda_graph * cuda_graph(const void * first_node_ptr) {
1375+
const int64_t time_now = ggml_time_us();
1376+
1377+
// sweep every 5s, evicting cuda graphs unused for >=10s
1378+
if (time_now - last_graph_eviction_sweep >= 5'000'000) {
1379+
last_graph_eviction_sweep = time_now;
1380+
for (auto it = cuda_graphs.begin(); it != cuda_graphs.end(); ) {
1381+
if (time_now - it->second->last_used_time >= 10'000'000) {
1382+
it = cuda_graphs.erase(it);
1383+
} else {
1384+
++it;
1385+
}
1386+
}
1387+
}
1388+
13721389
auto it = cuda_graphs.find(first_node_ptr);
13731390
if (it == cuda_graphs.end()) {
1374-
cuda_graphs[first_node_ptr] = std::make_unique<ggml_cuda_graph>();
1375-
return cuda_graphs[first_node_ptr].get();
1391+
it = cuda_graphs.emplace(first_node_ptr, std::make_unique<ggml_cuda_graph>()).first;
13761392
}
1393+
it->second->last_used_time = time_now;
13771394
return it->second.get();
13781395
}
13791396

0 commit comments

Comments
 (0)