We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents 9767436 + f40620f commit 1e72989Copy full SHA for 1e72989
1 file changed
src/maxtext/configs/models/deepseek3-671b-2dfsdp.yml
@@ -71,9 +71,7 @@ logical_axis_rules: [
71
['activation_stage', 'stage'],
72
['embed', ['fsdp']],
73
['embed_moe', ['fsdp']],
74
- ['embed_vocab', ['fsdp']],
75
- ['embed_no_exp', ['fsdp']],
76
- ['embed_no_exp_moe', ['fsdp']],
+ ['embed_vocab', ['fsdp', 'fsdp_transpose']],
77
['q_lora', ['fsdp']],
78
['kv_lora', ['fsdp']],
79
['layers', 'stage'],
@@ -83,7 +81,6 @@ logical_axis_rules: [
83
81
['kv_heads', ['fsdp_transpose', 'expert']],
84
82
['heads', ['fsdp_transpose', 'expert']],
85
['mlp', ['fsdp_transpose', 'expert']],
86
- ['mlp_only_fsdp_transpose', ['fsdp_transpose']],
87
- ['mlp_only_tensor', ['expert']],
+ ['mlp_moe', ['fsdp_transpose', 'expert']],
88
['diloco', 'diloco'],
89
]
0 commit comments