@@ -3041,12 +3041,48 @@ static const void * ggml_cuda_graph_get_key(ggml_cgraph * cgraph) {
30413041 return cgraph->nodes [0 ];
30423042}
30433043
3044+ // compute a FNV-1a over all nodes and srcs which should change when a cuda graph cannot be reused
3045+ static uint64_t ggml_cuda_graph_hash (ggml_cgraph * cgraph) {
3046+ uint64_t h = 0xcbf29ce484222325ULL ;
3047+ constexpr uint64_t prime = 0x100000001b3ULL ;
3048+
3049+ for (int i = 0 ; i < cgraph->n_nodes ; i++) {
3050+ const ggml_tensor * node = cgraph->nodes [i];
3051+
3052+ h ^= (uintptr_t )node->data ;
3053+ h *= prime;
3054+
3055+ for (int s = 0 ; s < GGML_MAX_SRC; s++) {
3056+ if (node->src [s]) {
3057+ h ^= (uintptr_t )node->src [s]->data ;
3058+ h *= prime;
3059+ }
3060+ }
3061+
3062+ // Hash first 16 bytes of op_params
3063+ const uint64_t * params = (const uint64_t *)node->op_params ;
3064+ h ^= params[0 ];
3065+ h *= prime;
3066+ h ^= params[1 ];
3067+ h *= prime;
3068+ }
3069+
3070+ return h;
3071+ }
3072+
30443073static bool ggml_cuda_graph_update_required (ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph) {
30453074 bool res = false ;
30463075
30473076 const void * graph_key = ggml_cuda_graph_get_key (cgraph);
30483077 ggml_cuda_graph * graph = cuda_ctx->cuda_graph (graph_key);
30493078
3079+ if (graph->props_stable >= 2 && graph->props .size () == (size_t )cgraph->n_nodes ) {
3080+ if (ggml_cuda_graph_hash (cgraph) == graph->last_props_hash ) {
3081+ return false ;
3082+ }
3083+ graph->props_stable = 0 ;
3084+ }
3085+
30503086 // Check if the graph size has changed
30513087 if (graph->props .size () != (size_t )cgraph->n_nodes ) {
30523088 res = true ;
@@ -3096,6 +3132,13 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
30963132 ggml_cuda_graph_node_set_properties (&graph->extra [i], srcs_extra[i]);
30973133 }
30983134
3135+ if (!res) {
3136+ graph->props_stable ++;
3137+ graph->last_props_hash = ggml_cuda_graph_hash (cgraph);
3138+ } else {
3139+ graph->props_stable = 0 ;
3140+ }
3141+
30993142 return res;
31003143}
31013144
0 commit comments