File tree Expand file tree Collapse file tree
training/deepspeed_finetune_demo/configs Expand file tree Collapse file tree Original file line number Diff line number Diff line change 1+ {
2+ "train_batch_size" : 16 ,
3+ "bf16" : {
4+ "enabled" : true
5+ },
6+ "zero_optimization" : {
7+ "stage" : 2
8+ },
9+ "optimizer" : {
10+ "type" : " AdamW" ,
11+ "params" : {
12+ "lr" : 2e-06 ,
13+ "betas" : [0.9 , 0.999 ],
14+ "eps" : 1e-08 ,
15+ "weight_decay" : 0.01
16+ }
17+ },
18+ "expert_parallel" : {
19+ "enabled" : true ,
20+ "autoep_size" : 8 ,
21+ "expert_w1" : " gate_proj" ,
22+ "expert_w2" : " down_proj" ,
23+ "expert_w3" : " up_proj" ,
24+ "route_scale" : 2.446 ,
25+ "load_balance_coeff" : null
26+ },
27+ "gradient_accumulation_steps" : 2 ,
28+ "gradient_clipping" : 1.0 ,
29+ "zero_allow_untested_optimizer" : true
30+ }
Original file line number Diff line number Diff line change 1+ {
2+ "train_batch_size" : 16 ,
3+ "bf16" : {
4+ "enabled" : true
5+ },
6+ "zero_optimization" : {
7+ "stage" : 2
8+ },
9+ "optimizer" : {
10+ "type" : " Muon" ,
11+ "params" : {
12+ "muon_lr" : 0.002 ,
13+ "adam_lr" : 2e-06 ,
14+ "momentum" : 0.95 ,
15+ "ns_method" : " gram" ,
16+ "betas" : [
17+ 0.9 ,
18+ 0.999
19+ ],
20+ "eps" : 1e-08 ,
21+ "weight_decay" : 0.01
22+ }
23+ },
24+ "expert_parallel" : {
25+ "enabled" : true ,
26+ "autoep_size" : 8 ,
27+ "expert_w1" : " gate_proj" ,
28+ "expert_w2" : " down_proj" ,
29+ "expert_w3" : " up_proj" ,
30+ "route_scale" : 2.446 ,
31+ "load_balance_coeff" : null
32+ },
33+ "gradient_accumulation_steps" : 2 ,
34+ "gradient_clipping" : 1.0 ,
35+ "zero_allow_untested_optimizer" : true
36+ }
You can’t perform that action at this time.
0 commit comments