Skip to content

Commit e4d28d6

Browse files
committed
fix qwen3_30b_a3b_base_config.
1 parent 659d5b1 commit e4d28d6

1 file changed

Lines changed: 5 additions & 2 deletions

File tree

src/maxtext/checkpoint_conversion/utils/hf_model_configs.py

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -724,14 +724,15 @@
724724
attention_dropout=0.0,
725725
bos_token_id=151643,
726726
decoder_sparse_step=1,
727-
eos_token_id=151645,
727+
eos_token_id=151643,
728728
head_dim=128,
729729
hidden_act="silu",
730730
hidden_size=2048,
731731
initializer_range=0.02,
732732
intermediate_size=6144,
733-
max_position_embeddings=262144,
733+
max_position_embeddings=32768,
734734
max_window_layers=48,
735+
mlp_only_layers=[],
735736
model_type="qwen3_moe",
736737
moe_intermediate_size=768,
737738
norm_topk_prob=True,
@@ -748,8 +749,10 @@
748749
sliding_window=None,
749750
tie_word_embeddings=False,
750751
torch_dtype="bfloat16",
752+
transformers_version="4.51.0",
751753
use_cache=True,
752754
vocab_size=151936,
755+
use_sliding_window=False,
753756
)
754757

755758
qwen3_235b_a22b_thinking_2507_config = transformers.Qwen3MoeConfig(

0 commit comments

Comments
 (0)