PaddlePaddle · zhoutianzi666 · May 21, 2026 · May 21, 2026 · May 21, 2026 · May 21, 2026
diff --git a/fastdeploy/model_executor/layers/linear.py b/fastdeploy/model_executor/layers/linear.py
@@ -41,6 +41,22 @@
 from .utils import _set_var_distributed, divide, get_tensor, modules_to_convert
 
 
+def may_be_do_cast(loaded_weight, param):
+
+    assert param.shape == loaded_weight.shape, (
+        f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
+    )
+    # Ensure loaded weight dtype matches model param dtype
+    if loaded_weight.dtype != param.dtype:
+        if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
+            loaded_weight = loaded_weight.view(param.dtype)
+        else:
+            assert (
+                loaded_weight.dtype == param.dtype
+            ), f"loaded_weight.dtype: {loaded_weight.dtype}, param.dtype: {param.dtype}"
+    return loaded_weight
+
+
 class UnquantizedLinearMethod(QuantMethodBase):
     """Linear method without quantization."""
 
@@ -407,15 +423,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 start=param_shard_offset,
                 end=param_shard_offset + param_shard_size,
             )
-        assert param.shape == loaded_weight.shape, (
-            f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
-        )
-        # Ensure loaded weight dtype matches model param dtype
-        if loaded_weight.dtype != param.dtype:
-            if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
-                loaded_weight = loaded_weight.view(param.dtype)
-            else:
-                loaded_weight = loaded_weight.cast(param.dtype)
+        loaded_weight = may_be_do_cast(loaded_weight, param)
         # (bukejiyu) After this fix, the early H2D copy for non-GPU devices is no longer needed and can be safely removed.
         h2d_copy(param, loaded_weight)
 
@@ -592,16 +600,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
             if hasattr(param, "tensor_track"):
                 param.tensor_track.mark(start=param_shard_offset, end=param_shard_offset + param_shard_size)
             param = slice_fn(param, output_dim, start=param_shard_offset, end=param_shard_offset + param_shard_size)
-            assert param.shape == loaded_weight.shape, (
-                f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
-            )
-            # Ensure loaded weight dtype matches model param dtype
-            if loaded_weight.dtype != param.dtype:
-                if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
-                    loaded_weight = loaded_weight.view(param.dtype)
-                else:
-                    loaded_weight = loaded_weight.cast(param.dtype)
-
+            loaded_weight = may_be_do_cast(loaded_weight, param)
             h2d_copy(param, loaded_weight)
 
     def load_state_dict(self, state_dict: dict):
@@ -753,15 +752,7 @@ def weight_loader(self, param, loaded_weight, loaded_shard_id: Optional[str] = N
                 param.tensor_track.mark(start=param_shard_offset, end=param_shard_offset + param_shard_size)
 
             param = slice_fn(param, output_dim, start=param_shard_offset, end=param_shard_offset + param_shard_size)
-            assert param.shape == loaded_weight.shape, (
-                f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
-            )
-            # Ensure loaded weight dtype matches model param dtype
-            if loaded_weight.dtype != param.dtype:
-                if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
-                    loaded_weight = loaded_weight.view(param.dtype)
-                else:
-                    loaded_weight = loaded_weight.cast(param.dtype)
+            loaded_weight = may_be_do_cast(loaded_weight, param)
             h2d_copy(param, loaded_weight)
 
     def load_weight(self, state_dict: dict):
@@ -1279,15 +1270,7 @@ def qkv_weight_loader(self, param, loaded_weight, loaded_shard_id):
                 param.tensor_track.mark(start=param_shard_offset, end=param_shard_offset + param_shard_size)
 
             param = slice_fn(param, output_dim, start=param_shard_offset, end=param_shard_offset + param_shard_size)
-            assert param.shape == loaded_weight.shape, (
-                f" Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
-            )
-            # Ensure loaded weight dtype matches model param dtype
-            if loaded_weight.dtype != param.dtype:
-                if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
-                    loaded_weight = loaded_weight.view(param.dtype)
-                else:
-                    loaded_weight = loaded_weight.cast(param.dtype)
+            loaded_weight = may_be_do_cast(loaded_weight, param)
             h2d_copy(param, loaded_weight)
 
     def gate_weight_loader(self, param, loaded_weight):
@@ -1319,15 +1302,7 @@ def gate_weight_loader(self, param, loaded_weight):
             param.tensor_track.mark(start=param_shard_offset, end=param_shard_offset + param_shard_size)
 
         param = slice_fn(param, output_dim, start=param_shard_offset, end=param_shard_offset + param_shard_size)
-        assert param.shape == loaded_weight.shape, (
-            f"Attempted to load weight ({loaded_weight.shape}) " f"into parameter ({param.shape})"
-        )
-        # Ensure loaded weight dtype matches model param dtype
-        if loaded_weight.dtype != param.dtype:
-            if loaded_weight.dtype == paddle.int8 and param.dtype == paddle.float8_e4m3fn:
-                loaded_weight = loaded_weight.view(param.dtype)
-            else:
-                loaded_weight = loaded_weight.cast(param.dtype)
+        loaded_weight = may_be_do_cast(loaded_weight, param)
         h2d_copy(param, loaded_weight)
 
     def load_weight(self, state_dict: dict):

diff --git a/tests/model_executor/test_linear.py b/tests/model_executor/test_linear.py
@@ -189,12 +189,12 @@ def test_merged_and_column_weight_paths():
     layer_merge = MergedReplicatedLinear.__new__(MergedReplicatedLinear)
     layer_merge.__dict__.update(fd_config=make_fd_config(model_format="paddle"), output_sizes=[2, 2])
     param = TinyParam(paddle.zeros([2, 4], dtype="float32"), initialized=False, with_track=True)
-    loaded_weight = paddle.ones([2, 4], dtype="float16")
+    loaded_weight = paddle.ones([2, 4], dtype="float32")
     layer_merge.weight_loader(param, loaded_weight, loaded_shard_id=None)
     assert param.tensor_track.calls == [(0, loaded_weight.shape[-1])]
     np.testing.assert_allclose(param._tensor.numpy(), np.ones((2, 4), dtype="float32"))
     param_shard = TinyParam(paddle.zeros([2, 4], dtype="float32"), initialized=False)
-    layer_merge.weight_loader(param_shard, paddle.ones([2, 2], dtype="int8"), loaded_shard_id="gate")
+    layer_merge.weight_loader(param_shard, paddle.ones([2, 2], dtype="float32"), loaded_shard_id="gate")
     assert param_shard._is_initialized() is True
     assert not np.allclose(param_shard._tensor.numpy()[..., :2], 0)
     assert np.allclose(param_shard._tensor.numpy()[..., 2:], 0)
@@ -213,7 +213,7 @@ def test_merged_and_column_weight_paths():
     param_gate = TinyParam(paddle.zeros([2, 4], dtype="float32"), initialized=True)
     param_gate.output_dim = True
     param_gate.weight_need_transpose = True
-    layer_mc.weight_loader(param_gate, paddle.ones([4, 2], dtype="int8"), loaded_shard_id="gate")
+    layer_mc.weight_loader(param_gate, paddle.ones([4, 2], dtype="float32"), loaded_shard_id="gate")
     assert not np.allclose(param_gate._tensor.numpy()[..., :2], 0)
     assert np.allclose(param_gate._tensor.numpy()[..., 2:], 0)
     layer_mc.local_rank = 1