PaddlePaddle · zoooo0820 · Apr 9, 2026 · Mar 24, 2026 · Mar 24, 2026 · Mar 24, 2026
diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
@@ -82,10 +82,17 @@ def process_loaded_weights(self, layer, weights) -> None:
         layer.weight.set_value(weights)
 
     def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor:
-        linear_out = paddle.matmul(x, layer.weight)
         if layer.with_bias:
-            linear_out = paddle.add(linear_out, layer.bias)
-        return linear_out
+            bias = layer.bias
+            assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], (
+                f"bias must be 1D with size equal to the last dim of weight, "
+                f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}"
+            )
+            out = paddle.nn.functional.linear(x, layer.weight, bias)
+        else:
+            out = paddle.matmul(x, layer.weight)
+
+        return out
 
 
 class LinearBase(nn.Layer):

diff --git a/tests/e2e/utils/rollout_routing_replay_test_utils.py b/tests/e2e/utils/rollout_routing_replay_test_utils.py
@@ -157,10 +157,10 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode
     model_path = os.getenv("MODEL_PATH")
     if model_path:
         baseline_path = os.path.join(
-            model_path, f"R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}"
+            model_path, f"R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}"
         )
     else:
-        baseline_path = f"./R3_BaseLine_dev_uint8_0402/routing_replay_output_baseline_{model_name}"
+        baseline_path = f"./R3_BaseLine_dev_uint8_0403/routing_replay_output_baseline_{model_name}"
     stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream")
 
     nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")