Skip to content

Commit 364197c

Browse files
authored
support w4afp8 mtp (PaddlePaddle#5429)
1 parent 438c9f7 commit 364197c

2 files changed

Lines changed: 8 additions & 4 deletions

File tree

fastdeploy/model_executor/layers/moe/fused_moe_cutlass_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -930,11 +930,13 @@ def process_loaded_weights(self, layer: nn.Layer, state_dict):
930930
Paddle cutlass load weight process.
931931
"""
932932
if not layer.is_quantized:
933+
prefix_layer_name = layer.fd_config.model_config.prefix_layer_name
933934
logger.info(
934-
f"Rotating ernie.layers.{layer.layer_idx}.mlp.experts.[{layer.ep_rank * layer.num_local_experts},{layer.ep_rank * layer.num_local_experts + layer.num_local_experts}).down_proj.weight..."
935+
f"Rotating ernie.{prefix_layer_name}.{layer.layer_idx}.mlp.experts.[{layer.ep_rank * layer.num_local_experts},{layer.ep_rank * layer.num_local_experts + layer.num_local_experts}).down_proj.weight..."
935936
)
936937
rotate_model(
937938
state_dict,
939+
prefix_layer_name,
938940
layer.layer_idx,
939941
layer.num_local_experts,
940942
layer.hidden_size,

fastdeploy/model_executor/layers/utils.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -141,15 +141,17 @@ def get_orthogonal_matrix(size, mode="hadamard", device="cuda"):
141141
raise ValueError(f"Unknown mode {mode}")
142142

143143

144-
def rotate_model(state_dict, layer_idx, moe_num_experts=48, hidden_size=7168, moe_intermediate_size=3584, ep_rank=0):
144+
def rotate_model(
145+
state_dict, prefix_layer_name, layer_idx, moe_num_experts, hidden_size, moe_intermediate_size, ep_rank=0
146+
):
145147
with paddle.no_grad():
146148
# collect hadamard rotation matrix [moe_intermediate_size, moe_intermediate_size]
147149
Q_ffn2, moe_block_size = get_orthogonal_matrix(size=moe_intermediate_size, mode="hadamard_ffn2")
148150
# down_proj.weight: [moe_intermediate_size, hidden_size]
149151
expert_list = [
150152
get_tensor(
151153
state_dict[
152-
f"ernie.layers.{layer_idx}.mlp.experts.{ep_rank * moe_num_experts + expert_idx}.down_proj.weight"
154+
f"ernie.{prefix_layer_name}.{layer_idx}.mlp.experts.{ep_rank * moe_num_experts + expert_idx}.down_proj.weight"
153155
]
154156
)
155157
for expert_idx in range(moe_num_experts)
@@ -159,7 +161,7 @@ def rotate_model(state_dict, layer_idx, moe_num_experts=48, hidden_size=7168, mo
159161
for expert_idx in range(moe_num_experts):
160162
rotated_weight = new_moe_weight[:, expert_idx * hidden_size : (expert_idx + 1) * hidden_size]
161163
expert_idx_local = ep_rank * moe_num_experts + expert_idx
162-
state_dict[f"ernie.layers.{layer_idx}.mlp.experts.{expert_idx_local}.down_proj.weight"] = (
164+
state_dict[f"ernie.{prefix_layer_name}.{layer_idx}.mlp.experts.{expert_idx_local}.down_proj.weight"] = (
163165
rotated_weight.cpu()
164166
)
165167
del moe_weight, new_moe_weight, rotated_weight

0 commit comments

Comments
 (0)