File tree Expand file tree Collapse file tree
src/maxtext/checkpoint_conversion/utils Expand file tree Collapse file tree Original file line number Diff line number Diff line change 724724 attention_dropout = 0.0 ,
725725 bos_token_id = 151643 ,
726726 decoder_sparse_step = 1 ,
727- eos_token_id = 151645 ,
727+ eos_token_id = 151643 ,
728728 head_dim = 128 ,
729729 hidden_act = "silu" ,
730730 hidden_size = 2048 ,
731731 initializer_range = 0.02 ,
732732 intermediate_size = 6144 ,
733- max_position_embeddings = 262144 ,
733+ max_position_embeddings = 32768 ,
734734 max_window_layers = 48 ,
735+ mlp_only_layers = [],
735736 model_type = "qwen3_moe" ,
736737 moe_intermediate_size = 768 ,
737738 norm_topk_prob = True ,
748749 sliding_window = None ,
749750 tie_word_embeddings = False ,
750751 torch_dtype = "bfloat16" ,
752+ transformers_version = "4.51.0" ,
751753 use_cache = True ,
752754 vocab_size = 151936 ,
755+ use_sliding_window = False ,
753756)
754757
755758qwen3_235b_a22b_thinking_2507_config = transformers .Qwen3MoeConfig (
You can’t perform that action at this time.
0 commit comments