We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent 257711e commit 228b1c8Copy full SHA for 228b1c8
1 file changed
src/maxtext/configs/models/deepseek3-test.yml
@@ -35,7 +35,7 @@ routed_scaling_factor: 2.5
35
routed_score_func: "sigmoid"
36
routed_bias: True
37
decoder_block: "deepseek"
38
-# MLA
+# MLA.
39
attention_type: "mla"
40
q_lora_rank: 1536
41
kv_lora_rank: 512
0 commit comments