From 483fd1cc33285a3a95690c483dad82c032afa3ee Mon Sep 17 00:00:00 2001 From: zhanghonggeng Date: Fri, 17 Apr 2026 09:58:11 +0000 Subject: [PATCH] [GLM4MoE] Set attention_softmax_in_fp32 and bf16 defaults in GLMMoEModelProvider --- paddleformers/transformers/glm4_moe/modeling.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/paddleformers/transformers/glm4_moe/modeling.py b/paddleformers/transformers/glm4_moe/modeling.py index 7ded2baffb9..67dc1dff385 100644 --- a/paddleformers/transformers/glm4_moe/modeling.py +++ b/paddleformers/transformers/glm4_moe/modeling.py @@ -88,6 +88,9 @@ class GLMMoEModelProvider(GPTModelProvider): router_aux_loss_coef: float = 0.001 moe_grouped_gemm: bool = False + attention_softmax_in_fp32: bool = True + bf16: bool = True + def eager_attention_forward( module: nn.Layer,