File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -60,16 +60,23 @@ scenarios:
6060 - ' ibm-granite/granite-3.0-3b-a800m-instruct'
6161 - ' ibm-research/moe-7b-1b-active-shared-experts'
6262
63- - name : accelerated-moe-granite4
63+ - name : accelerated-moe-granite-4
6464 framework_config :
65- - moe-scattermoe-granite-ep8
66- - moe-scattermoe-granite-ep8-foak
67- - moe-scattermoe-granite-ep8-padding-free-foak
65+ - # without acceleration
66+ - moe-scattermoe-granite-ep1
67+ - moe-scattermoe-granite-ep2
68+ - moe-scattermoe-granite-ep4
69+ - moe-scattermoe-granite-ep1-padding-free
70+ - moe-scattermoe-granite-ep1-padding-free-foak
71+ - moe-scattermoe-granite-ep2-padding-free
72+ - moe-scattermoe-granite-ep2-padding-free-foak
73+ - moe-scattermoe-granite-ep4-padding-free
74+ - moe-scattermoe-granite-ep4-padding-free-foak
6875 arguments :
6976 learning_rate : 5e-5
7077 torch_dtype : bfloat16
7178 gradient_accumulation_steps : null
72- per_device_train_batch_size : 8
79+ per_device_train_batch_size : 4
7380 logging_steps : 1
7481 packing : False
7582 adam_epsilon : 1e-8
You can’t perform that action at this time.
0 commit comments