Skip to content

Commit 7f6cc4b

Browse files
Add MoE to Gemma4 TP plan (#45219)
reduce memory for gemma4 moe model in tp Signed-off-by: Wang, Yi <yi.a.wang@intel.com> Co-authored-by: Cyril Vallez <cyril.vallez@huggingface.co>
1 parent c850500 commit 7f6cc4b

1 file changed

Lines changed: 3 additions & 0 deletions

File tree

src/transformers/models/gemma4/configuration_gemma4.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,6 +132,9 @@ class Gemma4TextConfig(PreTrainedConfig):
132132
"layers.*.mlp.gate_proj": "colwise",
133133
"layers.*.mlp.up_proj": "colwise",
134134
"layers.*.mlp.down_proj": "rowwise",
135+
"layers.*.experts.gate_up_proj": "packed_colwise",
136+
"layers.*.experts.down_proj": "rowwise",
137+
"layers.*.experts": "moe_tp_experts",
135138
}
136139
base_model_pp_plan = {
137140
"embed_tokens": (["input_ids"], ["inputs_embeds"]),

0 commit comments

Comments
 (0)