Skip to content

Commit 3f7c29d

Browse files
authored
ggml: add graph_reused (ggml-org#21764)
* ggml: add graph_reused * use versioning instead of reuse flag * increment version with atomic * use top bits for split numbering * add assert * move counter to ggml.c * set uid in split_graph only * fix windows * address further review comments * get next_uid rather than doing bit manipulation * rename + add comment about uid
1 parent ae2d348 commit 3f7c29d

5 files changed

Lines changed: 35 additions & 0 deletions

File tree

ggml/src/ggml-backend.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1030,6 +1030,8 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
10301030
GGML_ABORT("%s: failed to initialize context\n", __func__);
10311031
}
10321032

1033+
graph->uid = ggml_graph_next_uid();
1034+
10331035
// pass 1: assign backends to ops with pre-allocated inputs
10341036
for (int i = 0; i < graph->n_leafs; i++) {
10351037
struct ggml_tensor * leaf = graph->leafs[i];
@@ -1477,6 +1479,11 @@ void ggml_backend_sched_split_graph(ggml_backend_sched_t sched, struct ggml_cgra
14771479
assert(graph_copy->size > graph_copy->n_leafs);
14781480
graph_copy->leafs[graph_copy->n_leafs++] = leaf;
14791481
}
1482+
1483+
// set ids for all splits
1484+
for (int i = 0; i < sched->n_splits; ++i) {
1485+
sched->splits[i].graph.uid = ggml_graph_next_uid();
1486+
}
14801487
}
14811488

14821489
static bool ggml_backend_sched_alloc_splits(ggml_backend_sched_t sched) {

ggml/src/ggml-cuda/common.cuh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,7 @@ struct ggml_cuda_graph {
11861186
std::vector<cudaGraphNode_t> nodes;
11871187
bool disable_due_to_gpu_arch = false;
11881188
bool warmup_complete = false;
1189+
uint64_t uid = 0;
11891190
struct node_properties {
11901191
ggml_tensor node;
11911192
void * node_src_data_ptrs[GGML_MAX_SRC];

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3108,6 +3108,15 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
31083108
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
31093109
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
31103110

3111+
if (cgraph->uid != 0 &&
3112+
cgraph->uid == graph->uid) {
3113+
GGML_LOG_DEBUG("CUDA Graph id %zu reused\n", cgraph->uid);
3114+
GGML_ASSERT((int)graph->node_props.size() == cgraph->n_nodes);
3115+
return false;
3116+
}
3117+
3118+
graph->uid = cgraph->uid;
3119+
31113120
// Check if the graph size has changed
31123121
if ((int)graph->node_props.size() != cgraph->n_nodes) {
31133122
res = true;

ggml/src/ggml-impl.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,8 @@ extern "C" {
3030

3131
void ggml_print_backtrace(void);
3232

33+
uint64_t ggml_graph_next_uid(void);
34+
3335
#ifndef MIN
3436
# define MIN(a, b) ((a) < (b) ? (a) : (b))
3537
#endif
@@ -338,6 +340,10 @@ struct ggml_cgraph {
338340
struct ggml_hash_set visited_hash_set;
339341

340342
enum ggml_cgraph_eval_order order;
343+
344+
// an optional identifier that can be utilized to recognize same graphs if two non-zero values match
345+
// a value of 0 means it is not set and should be ignored
346+
uint64_t uid;
341347
};
342348

343349
// returns a slice of cgraph with nodes [i0, i1)

ggml/src/ggml.c

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,16 @@
5353

5454
#define UNUSED GGML_UNUSED
5555

56+
uint64_t ggml_graph_next_uid(void) {
57+
#ifdef _MSC_VER
58+
static volatile long long counter = 1;
59+
return (uint64_t) _InterlockedIncrement64(&counter) - 1;
60+
#else
61+
static uint64_t counter = 1;
62+
return __atomic_fetch_add(&counter, 1, __ATOMIC_RELAXED);
63+
#endif
64+
}
65+
5666
// Needed for ggml_fp32_to_bf16_row()
5767
#if defined(__AVX512BF16__)
5868
#if defined(_MSC_VER)
@@ -7098,6 +7108,7 @@ struct ggml_cgraph * ggml_new_graph_custom(struct ggml_context * ctx, size_t siz
70987108
/*.use_counts =*/ use_counts_ptr,
70997109
/*.hash_table =*/ { hash_size, hash_used, hash_keys_ptr },
71007110
/*.order =*/ GGML_CGRAPH_EVAL_ORDER_LEFT_TO_RIGHT,
7111+
/*.uid =*/ 0,
71017112
};
71027113

71037114
ggml_hash_set_reset(&cgraph->visited_hash_set);
@@ -7125,6 +7136,7 @@ struct ggml_cgraph ggml_graph_view(struct ggml_cgraph * cgraph0, int i0, int i1)
71257136
/*.use_counts =*/ cgraph0->use_counts,
71267137
/*.visited_hash_set =*/ cgraph0->visited_hash_set,
71277138
/*.order =*/ cgraph0->order,
7139+
/*.uid =*/ 0
71287140
};
71297141

71307142
return cgraph;

0 commit comments

Comments
 (0)