Skip to content

Commit 324f083

Browse files
authored
[Cherry-Pick][Optimization] merge matmul and add(#6986) (#7184)
* replace matmul+add to linear * modify baseline
1 parent 19cac90 commit 324f083

File tree

4 files changed

+15
-7
lines changed

4 files changed

+15
-7
lines changed

fastdeploy/model_executor/layers/linear.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -82,10 +82,16 @@ def process_loaded_weights(self, layer, weights) -> None:
8282
layer.weight.set_value(weights)
8383

8484
def apply(self, layer: nn.Layer, x: paddle.Tensor) -> paddle.Tensor:
85-
linear_out = paddle.matmul(x, layer.weight)
8685
if layer.with_bias:
87-
linear_out = paddle.add(linear_out, layer.bias)
88-
return linear_out
86+
bias = layer.bias
87+
assert bias.dim() == 1 and bias.shape[-1] == layer.weight.shape[-1], (
88+
f"bias must be 1D with size equal to the last dim of weight, "
89+
f"but got bias.shape={bias.shape}, weight.shape[-1]={layer.weight.shape[-1]}"
90+
)
91+
out = paddle.nn.functional.linear(x, layer.weight, bias)
92+
else:
93+
out = paddle.matmul(x, layer.weight)
94+
return out
8995

9096

9197
class LinearBase(nn.Layer):

tests/e2e/4cards_cases/test_GLM_45_AIR_mtp_tp4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -184,7 +184,7 @@ def test_lm_head_fp32(api_url, headers, consistent_payload):
184184
# 校验返回内容与概率信息
185185
assert (
186186
resp_json["choices"][0]["message"]["content"]
187-
== "\n<think>这个问题是关于牛顿的三大运动定律的。牛顿的三大运动定律是经典"
187+
== "\n<think>我需要回答牛顿的三大运动定律是什么。牛顿的三大运动定律是经典"
188188
), f"The response content is not as expected {resp_json['choices'][0]['message']['content']}."
189189

190190

tests/e2e/4cards_cases/test_GLM_45_AIR_tp4.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ def test_lm_head_fp32(api_url, headers, consistent_payload):
181181
# 校验返回内容与概率信息
182182
assert (
183183
resp_json["choices"][0]["message"]["content"]
184-
== "\n<think>这个问题是关于牛顿的三大运动定律的。牛顿的三大运动定律是经典"
184+
== "\n<think>我需要回答牛顿的三大运动定律是什么。牛顿的三大运动定律是经典"
185185
), f"The response content is not as expected {resp_json['choices'][0]['message']['content']}."
186186

187187

tests/e2e/utils/rollout_routing_replay_test_utils.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -156,9 +156,11 @@ def check_routing_replay_chat_completion(openai_client, moe_layer_num: int, mode
156156
cur_save_routing_path = f"./R3_tmp/routing_replay_output_{model_name}/"
157157
model_path = os.getenv("MODEL_PATH")
158158
if model_path:
159-
baseline_path = os.path.join(model_path, f"R3_BaseLine_25_uint8/routing_replay_output_baseline_{model_name}")
159+
baseline_path = os.path.join(
160+
model_path, f"R3_BaseLine_25_uint8_0403/routing_replay_output_baseline_{model_name}"
161+
)
160162
else:
161-
baseline_path = f"./R3_BaseLine_25_uint8/routing_replay_output_baseline_{model_name}"
163+
baseline_path = f"./R3_BaseLine_25_uint8_0403/routing_replay_output_baseline_{model_name}"
162164
stream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_stream")
163165

164166
nonstream_baseline_path = os.path.join(baseline_path, "r3_chat_completion_nonstream")

0 commit comments

Comments
 (0)