@@ -41,13 +41,13 @@ scenarios:
4141 - # without acceleration
4242 - moe-scattermoe-granite-ep1
4343 - moe-scattermoe-granite-ep2
44- - moe-scattermoe-granite-ep4
45- - moe-scattermoe-granite-ep1-padding-free
46- - moe-scattermoe-granite-ep1-padding-free-foak
47- - moe-scattermoe-granite-ep2-padding-free
48- - moe-scattermoe-granite-ep2-padding-free-foak
49- - moe-scattermoe-granite-ep4-padding-free
50- - moe-scattermoe-granite-ep4-padding-free-foak
44+ # - moe-scattermoe-granite-ep4
45+ # - moe-scattermoe-granite-ep1-padding-free
46+ # - moe-scattermoe-granite-ep1-padding-free-foak
47+ # - moe-scattermoe-granite-ep2-padding-free
48+ # - moe-scattermoe-granite-ep2-padding-free-foak
49+ # - moe-scattermoe-granite-ep4-padding-free
50+ # - moe-scattermoe-granite-ep4-padding-free-foak
5151 arguments :
5252 learning_rate : 5e-5
5353 torch_dtype : bfloat16
@@ -58,8 +58,8 @@ scenarios:
5858 adam_epsilon : 1e-8
5959 model_name_or_path :
6060 - ' ibm-granite/granite-4.0-tiny-preview'
61- - ' ibm-granite/granite-3.0-3b-a800m-instruct'
62- - ' ibm-research/moe-7b-1b-active-shared-experts'
61+ # - 'ibm-granite/granite-3.0-3b-a800m-instruct'
62+ # - 'ibm-research/moe-7b-1b-active-shared-experts'
6363
6464
6565 - name : accelerated-moe-full-mixtral
0 commit comments