TP: fix granularity for Qwen 3.5/3.6 + 3 GPUs (ggml-org#23843)

JohannesGaessler · web-flow · commit 8b0e0db60645 · 2026-05-30T16:48:00.000+03:00
* TP: fix granularity for Qwen 3.5/3.6 + 3 GPUs

* fix afmoe TP
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -410,16 +410,16 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str
     auto get_tensor_config = [&]() -> tensor_config {
         // standard attention
         if (std::regex_match(tensor_name, pattern_q_weight) || std::regex_match(tensor_name, pattern_kv_weight)) {
-            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight");
+            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");
         }
         if (std::regex_match(tensor_name, pattern_q_bias) || std::regex_match(tensor_name, pattern_kv_bias)) {
-            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight");
+            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight", "ssm_out.weight");
         }
         if (std::regex_match(tensor_name, pattern_qkv_weight)) {
-            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1);
+            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");
         }
         if ( std::regex_match(tensor_name, pattern_qkv_bias)) {
-            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0);
+            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight", "ssm_out.weight");
         }
         if (std::regex_match(tensor_name, pattern_qk_norm)) {
             return get_tensor_config_impl(tensor->ne[1] == 1 ? GGML_BACKEND_SPLIT_AXIS_MIRRORED : GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight");
@@ -435,7 +435,7 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str
         }
 
         if (std::regex_match(tensor_name, pattern_attn_gate_weight)) {
-            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1);
+            return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");
         }
         if (std::regex_match(tensor_name, pattern_ssm_dt) || std::regex_match(tensor_name, pattern_ssm_a)) {
             return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "ssm_out.weight");

Original file line number	Diff line number	Diff line change
`@@ -410,16 +410,16 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str`
`410`	`410`	`auto get_tensor_config = [&]() -> tensor_config {`
`411`	`411`	`// standard attention`
`412`	`412`	`if (std::regex_match(tensor_name, pattern_q_weight) \|\| std::regex_match(tensor_name, pattern_kv_weight)) {`
`413`		`- return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight");`
	`413`	`+ return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");`
`414`	`414`	`}`
`415`	`415`	`if (std::regex_match(tensor_name, pattern_q_bias) \|\| std::regex_match(tensor_name, pattern_kv_bias)) {`
`416`		`- return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight");`
	`416`	`+ return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight", "ssm_out.weight");`
`417`	`417`	`}`
`418`	`418`	`if (std::regex_match(tensor_name, pattern_qkv_weight)) {`
`419`		`- return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1);`
	`419`	`+ return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");`
`420`	`420`	`}`
`421`	`421`	`if ( std::regex_match(tensor_name, pattern_qkv_bias)) {`
`422`		`- return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0);`
	`422`	`+ return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "attn_output.weight", "ssm_out.weight");`
`423`	`423`	`}`
`424`	`424`	`if (std::regex_match(tensor_name, pattern_qk_norm)) {`
`425`	`425`	`return get_tensor_config_impl(tensor->ne[1] == 1 ? GGML_BACKEND_SPLIT_AXIS_MIRRORED : GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight");`
`@@ -435,7 +435,7 @@ struct ggml_backend_meta_split_state llama_meta_device_get_split_state(const str`
`435`	`435`	`}`
`436`	`436`
`437`	`437`	`if (std::regex_match(tensor_name, pattern_attn_gate_weight)) {`
`438`		`- return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1);`
	`438`	`+ return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_1, "attn_output.weight", "ssm_out.weight");`
`439`	`439`	`}`
`440`	`440`	`if (std::regex_match(tensor_name, pattern_ssm_dt) \|\| std::regex_match(tensor_name, pattern_ssm_a)) {`
`441`	`441`	`return get_tensor_config_impl(GGML_BACKEND_SPLIT_AXIS_0, "ssm_out.weight");`