We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 6f01998 commit ec47f96Copy full SHA for ec47f96
1 file changed
src/maxtext/configs/models/deepseek3-671b-batchsplit.yml
@@ -75,12 +75,14 @@ logical_axis_rules: [
75
['q_lora', ['fsdp']],
76
['kv_lora', ['fsdp']],
77
['layers', 'stage'],
78
- ['q_lora_up_proj', ['fsdp_transpose', 'expert']],
79
- ['kv_lora_up_proj', ['fsdp_transpose', 'expert']],
80
- ['q_heads', ['fsdp_transpose', 'expert']],
81
- ['kv_heads', ['fsdp_transpose', 'expert']],
82
- ['heads', ['fsdp_transpose', 'expert']],
83
- ['mlp', ['fsdp_transpose', 'expert']],
+ ['q_lora_up_proj', ['fsdp_transpose']],
+ ['kv_lora_up_proj', ['fsdp_transpose']],
+ ['q_heads', ['fsdp_transpose']],
+ ['kv_heads', ['fsdp_transpose']],
+ ['heads', ['fsdp_transpose']],
+ ['mlp', ['fsdp_transpose']],
84
['mlp_only_fsdp_transpose', ['fsdp_transpose']],
85
- ['mlp_only_tensor', ['expert']],
+ ['expert_only', ['expert']],
86
+ ['fsdp_transpose_only', ['fsdp_transpose']],
87
+ ['fsdp_transpose_and_expert', ['fsdp_transpose', 'expert']],
88
]
0 commit comments