Skip to content

Commit 0f1bb60

Browse files
authored
model : remove duplicate wo_s scale after build_attn (Qwen3, LLaMA) (#22421)
Signed-off-by: Yash Nankani <ynankani@nvidia.com>
1 parent d13540b commit 0f1bb60

3 files changed

Lines changed: 0 additions & 9 deletions

File tree

src/models/llama.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,6 @@ llm_build_llama<embed>::llm_build_llama(const llama_model & model, const llm_gra
7272
cur = build_attn(inp_attn,
7373
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
7474
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, kq_scale, il);
75-
if (model.layers[il].wo_s) {
76-
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
77-
}
7875
cb(cur, "attn_out", il);
7976
}
8077
if (il == n_layer - 1 && inp_out_ids) {

src/models/qwen3.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ llm_build_qwen3::llm_build_qwen3(const llama_model & model, const llm_graph_para
5858
cur = build_attn(inp_attn,
5959
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
6060
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
61-
if (model.layers[il].wo_s) {
62-
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
63-
}
6461
}
6562
if (il == n_layer - 1 && inp_out_ids) {
6663
cur = ggml_get_rows(ctx0, cur, inp_out_ids);

src/models/qwen3moe.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -58,9 +58,6 @@ llm_build_qwen3moe::llm_build_qwen3moe(const llama_model & model, const llm_grap
5858
cur = build_attn(inp_attn,
5959
model.layers[il].wo, model.layers[il].wo_b, model.layers[il].wo_s,
6060
Qcur, Kcur, Vcur, nullptr, nullptr, nullptr, 1.0f/sqrtf(float(n_embd_head)), il);
61-
if (model.layers[il].wo_s) {
62-
cur = ggml_mul(ctx0, cur, model.layers[il].wo_s);
63-
}
6461
}
6562
if (il == n_layer - 1 && inp_out_ids) {
6663
cur = ggml_get_rows(ctx0, cur, inp_out_ids);

0 commit comments

Comments
 (0)