@@ -2872,6 +2872,7 @@ static bool ggml_cuda_graph_check_compability(ggml_cgraph * cgraph) {
28722872 const std::string ffn_moe_down_bias_prefix = " ffn_moe_down_biased" ;
28732873 const std::string nemotron_h_block_out_prefix = " nemotron_h_block_out" ;
28742874 const std::string mamba2_y_add_d_prefix = " mamba2_y_add_d" ;
2875+ const std::string delta_net_prefix = " dnet_add" ;
28752876
28762877 for (int i = 0 ; i < cgraph->n_nodes ; i++) {
28772878 ggml_tensor * node = cgraph->nodes [i];
@@ -2902,7 +2903,8 @@ static bool ggml_cuda_graph_check_compability(ggml_cgraph * cgraph) {
29022903 strncmp (node->name , ffn_moe_up_bias_prefix.c_str (), ffn_moe_up_bias_prefix.size ()) != 0 &&
29032904 strncmp (node->name , ffn_moe_down_bias_prefix.c_str (), ffn_moe_down_bias_prefix.size ()) != 0 &&
29042905 strncmp (node->name , nemotron_h_block_out_prefix.c_str (), nemotron_h_block_out_prefix.size ()) != 0 &&
2905- strncmp (node->name , mamba2_y_add_d_prefix.c_str (), mamba2_y_add_d_prefix.size ()) != 0 ) {
2906+ strncmp (node->name , mamba2_y_add_d_prefix.c_str (), mamba2_y_add_d_prefix.size ()) != 0 &&
2907+ strncmp (node->name , delta_net_prefix.c_str (), delta_net_prefix.size ()) != 0 ) {
29062908 // disable CUDA graphs for batch size > 1 for now while excluding the matrix-matrix addition as part of Gemma3n's `project_per_layer_input` operation
29072909 // by means of matching node names. See
29082910 // https://github.com/ggml-org/llama.cpp/blob/f9a31eea06a859e34cecb88b4d020c7f03d86cc4/src/llama-model.cpp#L10199-L10241 and
@@ -4544,6 +4546,8 @@ static bool ggml_backend_cuda_device_supports_op(ggml_backend_dev_t dev, const g
45444546 case GGML_UNARY_OP_CEIL:
45454547 case GGML_UNARY_OP_ROUND:
45464548 case GGML_UNARY_OP_TRUNC:
4549+ // TODO: should become:
4550+ // return ggml_is_contiguous_rows(op->src[0]);
45474551 return ggml_is_contiguous (op->src [0 ]);
45484552 default :
45494553 return false ;
0 commit comments