8282#include < cstdlib>
8383#include < string>
8484#include < vector>
85- #include < unordered_set>
8685
8786static_assert (sizeof (half) == sizeof (ggml_fp16_t ), " wrong fp16 size" );
8887
@@ -2969,74 +2968,6 @@ static bool ggml_cuda_graph_check_compability(ggml_cgraph * cgraph) {
29692968 return use_cuda_graph;
29702969}
29712970
2972- static void ggml_cuda_graph_node_set_properties (ggml_cuda_graph_node_properties * props, ggml_tensor * node) {
2973- memset (props, 0 , sizeof (ggml_cuda_graph_node_properties));
2974- props->node_data = node->data ;
2975- props->node_op = node->op ;
2976- props->node_type = node->type ;
2977- props->flags = node->flags ;
2978- for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
2979- props->ne [i] = node->ne [i];
2980- props->nb [i] = node->nb [i];
2981- }
2982- for (int i = 0 ; i < GGML_MAX_SRC; i++) {
2983- if (!node->src [i]) {
2984- continue ;
2985- }
2986-
2987- props->src_data [i] = node->src [i]->data ;
2988- }
2989- memcpy (props->op_params , node->op_params , GGML_MAX_OP_PARAMS);
2990- }
2991-
2992- static bool ggml_cuda_graph_node_properties_match (ggml_tensor * node, ggml_cuda_graph_node_properties * props) {
2993- if (node->data != props->node_data && node->op != GGML_OP_VIEW) {
2994- return false ;
2995- }
2996-
2997- if (node->op != props->node_op ) {
2998- return false ;
2999- }
3000-
3001- if (node->type != props->node_type ) {
3002- return false ;
3003- }
3004-
3005- for (int i = 0 ; i < GGML_MAX_DIMS; i++) {
3006- if (node->ne [i] != props->ne [i]) {
3007- return false ;
3008- }
3009- if (node->nb [i] != props->nb [i]) {
3010- return false ;
3011- }
3012- }
3013-
3014- if (node->op != GGML_OP_VIEW) {
3015- for (int i = 0 ; i < GGML_MAX_SRC; i++) {
3016- if (!node->src [i]) {
3017- if (props->src_data [i] != nullptr ) {
3018- return false ;
3019- }
3020- continue ;
3021- }
3022-
3023- if (node->src [i]->data != props->src_data [i]) {
3024- return false ;
3025- }
3026- }
3027- }
3028-
3029- if (memcmp (props->op_params , node->op_params , GGML_MAX_OP_PARAMS) != 0 ) {
3030- return false ;
3031- }
3032-
3033- if ((node->flags & GGML_TENSOR_FLAG_COMPUTE) != (props->flags & GGML_TENSOR_FLAG_COMPUTE)) {
3034- return false ;
3035- }
3036-
3037- return true ;
3038- }
3039-
30402971static const void * ggml_cuda_graph_get_key (ggml_cgraph * cgraph) {
30412972 return cgraph->nodes [0 ];
30422973}
@@ -3048,52 +2979,18 @@ static bool ggml_cuda_graph_update_required(ggml_backend_cuda_context * cuda_ctx
30482979 ggml_cuda_graph * graph = cuda_ctx->cuda_graph (graph_key);
30492980
30502981 // Check if the graph size has changed
3051- if (graph->props .size () != ( size_t ) cgraph->n_nodes ) {
2982+ if (( int ) graph->nodes_copy .size () != cgraph->n_nodes ) {
30522983 res = true ;
3053- graph->props .resize (cgraph->n_nodes );
2984+ graph->nodes_copy .resize (cgraph->n_nodes );
30542985 }
30552986
3056- // Loop over nodes in GGML graph to determine if CUDA graph update is required
3057- // and store properties to allow this comparison for the next token
3058- std::unordered_set<ggml_tensor *> seen_node;
3059- std::vector<ggml_tensor *> srcs_extra;
30602987 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
3061- bool props_match = true ;
3062-
3063- seen_node.insert (cgraph->nodes [i]);
3064-
30652988 if (!res) {
3066- props_match = ggml_cuda_graph_node_properties_match (cgraph->nodes [i], &graph->props [i]);
3067- }
3068- if (!props_match) {
3069- res = true ;
3070- }
3071- ggml_cuda_graph_node_set_properties (&graph->props [i], cgraph->nodes [i]);
3072-
3073- for (int src_idx = 0 ; src_idx < GGML_MAX_SRC; ++src_idx) {
3074- ggml_tensor * src = cgraph->nodes [i]->src [src_idx];
3075- if (src && seen_node.find (src) == seen_node.end ()) {
3076- srcs_extra.push_back (src);
2989+ if (memcmp (&graph->nodes_copy [i], cgraph->nodes [i], sizeof (ggml_tensor)) != 0 ) {
2990+ res = true ;
30772991 }
30782992 }
3079- }
3080-
3081- if (graph->extra .size () != (size_t ) srcs_extra.size ()) {
3082- res = true ;
3083- graph->extra .resize (srcs_extra.size ());
3084- }
3085-
3086- for (size_t i = 0 ; i < srcs_extra.size (); ++i) {
3087- bool props_match = true ;
3088-
3089- if (!res) {
3090- props_match = ggml_cuda_graph_node_properties_match (srcs_extra[i], &graph->extra [i]);
3091- }
3092-
3093- if (!props_match) {
3094- res = true ;
3095- }
3096- ggml_cuda_graph_node_set_properties (&graph->extra [i], srcs_extra[i]);
2993+ memcpy (&graph->nodes_copy [i], cgraph->nodes [i], sizeof (ggml_tensor));
30972994 }
30982995
30992996 return res;
0 commit comments