Skip to content

Commit c9922e1

Browse files
committed
CUDA: compute fast hash instead of expensive props check
1 parent c08d28d commit c9922e1

File tree

2 files changed

+45
-0
lines changed

2 files changed

+45
-0
lines changed

ggml/src/ggml-cuda/common.cuh

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1186,6 +1186,8 @@ struct ggml_cuda_graph {
11861186
std::vector<cudaGraphNode_t> nodes;
11871187
bool disable_due_to_gpu_arch = false;
11881188
bool warmup_complete = false;
1189+
uint64_t last_props_hash = 0; // FNV hash of node properties from last successful check
1190+
int props_stable = 0; // consecutive checks with no change
11891191
std::vector<ggml_cuda_graph_node_properties> props;
11901192

11911193
// these are extra tensors (inputs) that participate in the ggml graph but are not nodes

ggml/src/ggml-cuda/ggml-cuda.cu

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3041,12 +3041,48 @@ static const void * ggml_cuda_graph_get_key(ggml_cgraph * cgraph) {
30413041
return cgraph->nodes[0];
30423042
}
30433043

3044+
//compute a FNV-1a over all nodes and srcs which should change when a cuda graph cannot be reused
3045+
static uint64_t ggml_cuda_graph_hash(ggml_cgraph * cgraph) {
3046+
uint64_t h = 0xcbf29ce484222325ULL;
3047+
constexpr uint64_t prime = 0x100000001b3ULL;
3048+
3049+
for (int i = 0; i < cgraph->n_nodes; i++) {
3050+
const ggml_tensor * node = cgraph->nodes[i];
3051+
3052+
h ^= (uintptr_t)node->data;
3053+
h *= prime;
3054+
3055+
for (int s = 0; s < GGML_MAX_SRC; s++) {
3056+
if (node->src[s]) {
3057+
h ^= (uintptr_t)node->src[s]->data;
3058+
h *= prime;
3059+
}
3060+
}
3061+
3062+
// Hash first 16 bytes of op_params
3063+
const uint64_t * params = (const uint64_t *)node->op_params;
3064+
h ^= params[0];
3065+
h *= prime;
3066+
h ^= params[1];
3067+
h *= prime;
3068+
}
3069+
3070+
return h;
3071+
}
3072+
30443073
static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx, ggml_cgraph * cgraph) {
30453074
bool res = false;
30463075

30473076
const void * graph_key = ggml_cuda_graph_get_key(cgraph);
30483077
ggml_cuda_graph * graph = cuda_ctx->cuda_graph(graph_key);
30493078

3079+
if (graph->props_stable >= 2 && graph->props.size() == (size_t)cgraph->n_nodes) {
3080+
if (ggml_cuda_graph_hash(cgraph) == graph->last_props_hash) {
3081+
return false;
3082+
}
3083+
graph->props_stable = 0;
3084+
}
3085+
30503086
// Check if the graph size has changed
30513087
if (graph->props.size() != (size_t)cgraph->n_nodes) {
30523088
res = true;
@@ -3096,6 +3132,13 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
30963132
ggml_cuda_graph_node_set_properties(&graph->extra[i], srcs_extra[i]);
30973133
}
30983134

3135+
if (!res) {
3136+
graph->props_stable++;
3137+
graph->last_props_hash = ggml_cuda_graph_hash(cgraph);
3138+
} else {
3139+
graph->props_stable = 0;
3140+
}
3141+
30993142
return res;
31003143
}
31013144

0 commit comments

Comments
 (0)