diff --git a/src/llama-graph.cpp b/src/llama-graph.cpp index 0e7d96ca10d..aa8a35721fa 100644 --- a/src/llama-graph.cpp +++ b/src/llama-graph.cpp @@ -1185,8 +1185,8 @@ ggml_tensor * llm_graph_context::build_ffn( if (down) { cur = build_lora_mm(down, cur); - if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2) { - // GLM4, GLM4_MOE, and JAIS2 seem to have numerical issues with half-precision accumulators + if (arch == LLM_ARCH_GLM4 || arch == LLM_ARCH_GLM4_MOE || arch == LLM_ARCH_JAIS2 || arch == LLM_ARCH_GEMMA4) { + // certain models seem to have numerical issues with half-precision accumulators ggml_mul_mat_set_prec(cur, GGML_PREC_F32); } }