llama-bench: print -n-cpu-moe when offloaded layers > 1 (ggml-org#20984)

am17an · web-flow · commit 35ae99294fd8 · 2026-03-25T21:17:27.000+08:00
diff --git a/tools/llama-bench/llama-bench.cpp b/tools/llama-bench/llama-bench.cpp
@@ -1807,7 +1807,7 @@ struct markdown_printer : public printer {
         if (!is_cpu_backend) {
             fields.emplace_back("n_gpu_layers");
         }
-        if (params.n_cpu_moe.size() > 1) {
+        if (params.n_cpu_moe.size() > 1 || params.n_cpu_moe != cmd_params_defaults.n_cpu_moe) {
             fields.emplace_back("n_cpu_moe");
         }
         if (params.n_threads.size() > 1 || params.n_threads != cmd_params_defaults.n_threads || is_cpu_backend) {

Original file line number	Diff line number	Diff line change
`@@ -1807,7 +1807,7 @@ struct markdown_printer : public printer {`
`1807`	`1807`	`if (!is_cpu_backend) {`
`1808`	`1808`	`fields.emplace_back("n_gpu_layers");`
`1809`	`1809`	`}`
`1810`		`- if (params.n_cpu_moe.size() > 1) {`
	`1810`	`+ if (params.n_cpu_moe.size() > 1 \|\| params.n_cpu_moe != cmd_params_defaults.n_cpu_moe) {`
`1811`	`1811`	`fields.emplace_back("n_cpu_moe");`
`1812`	`1812`	`}`
`1813`	`1813`	`if (params.n_threads.size() > 1 \|\| params.n_threads != cmd_params_defaults.n_threads \|\| is_cpu_backend) {`