@@ -42,6 +42,8 @@ def convert_model_config(modalities_config: dict) -> GPT2Config:
4242 config = modalities_config ["model_raw" if "model_raw" in modalities_config else "model" ]["config" ]
4343 _check_conversion_criteria (config )
4444
45+ ffn_norm_key = "ffn_norm_config"
46+
4547 return GPT2Config (
4648 vocab_size = config ["vocab_size" ],
4749 hidden_size = config ["n_embd" ],
@@ -53,9 +55,9 @@ def convert_model_config(modalities_config: dict) -> GPT2Config:
5355 attention_bias = config ["bias" ],
5456 mlp_bias = config ["bias" ],
5557 hidden_act = "silu" ,
56- layer_norm_eps = _get_layer_norm_value (config ["ffn_norm_config" ]["config" ], "eps" ),
57- layer_norm_elementwise_affine = _get_layer_norm_value (config ["ffn_norm_config" ]["config" ], "elementwise_affine" ),
58- layer_norm_bias = _get_layer_norm_value (config ["ffn_norm_config" ]["config" ], "bias" ),
58+ layer_norm_eps = _get_layer_norm_value (config [ffn_norm_key ]["config" ], "eps" ),
59+ layer_norm_elementwise_affine = _get_layer_norm_value (config [ffn_norm_key ]["config" ], "elementwise_affine" ),
60+ layer_norm_bias = _get_layer_norm_value (config [ffn_norm_key ]["config" ], "bias" ),
5961 max_position_embeddings = config ["sequence_length" ],
6062 rope_theta = config ["attention_config" ]["qkv_transforms" ][0 ]["config" ]["base_freq" ],
6163 _attn_implementation = _map_attention_type (config ),
0 commit comments