New gsa config (#646)

leideng · web-flow · commit b1e95c6661c4 · 2026-01-20T11:25:52.000+08:00
- Add a new GSAOnDevice config for Qwen3-Coder-30B-A3B-Instruct
- Update a better GSAOnDevice config for Qwen3-32B model
diff --git a/ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json b/ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json
@@ -3,7 +3,7 @@
     "is_mla": false,
     "hash_weight_type": "random",
     "num_hidden_layers": 64,
-    "seq_len_threshhold": 2048,
+    "seq_len_threshhold": 4096,
     "chunk_size": 128,
     "chunk_repre_method": "max",
     "head_dim": 128,
@@ -152,7 +152,7 @@
     "hash_bits_qk_rope": null,
     "hash_weight_kv_lora": null,
     "hash_weight_qk_rope": null,
-    "vllm_hash_attention_topk": 2048,
+    "vllm_hash_attention_topk": 4096,
     "vllm_hash_attention_reduction_head_num": null,
     "vllm_hash_attention_rollback_layers": [
         0,
@@ -161,6 +161,7 @@
         3,
         4,
         5,
+        6,
         61,
         62,
         63
@@ -172,6 +173,7 @@
         true,
         true,
         true,
+        true,
         false,
         false,
         true,
@@ -228,7 +230,6 @@
         true,
         true,
         true,
-        true, 
         true
     ]
 }
diff --git a/ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_coder_30B_A3B_config.json b/ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_coder_30B_A3B_config.json
@@ -0,0 +1,180 @@
+{
+    "model_name": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
+    "is_mla": false,
+    "hash_weight_type": "random",
+    "num_hidden_layers": 48,
+    "seq_len_threshhold": 2048,
+    "chunk_size": 128,
+    "chunk_repre_method": "max",
+    "head_dim": 128,
+    "hash_bits": 128,
+    "top_k_ratio_per_layer": [
+        1,
+        1,
+        1,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3,
+        0.3
+    ],
+    "top_k_index_reuse": [
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1,
+        -1
+    ],
+    "must_select_blocks": [
+        0,
+        -2,
+        -1
+    ],
+    "hash_weight": null,
+    "kv_lora_rank": null,
+    "qk_rope_head_dim": null,
+    "hash_bits_kv_lora": null,
+    "hash_bits_qk_rope": null,
+    "hash_weight_kv_lora": null,
+    "hash_weight_qk_rope": null,
+    "vllm_hash_attention_topk": 2048,
+    "vllm_hash_attention_reduction_head_num": null,
+    "vllm_hash_attention_rollback_layers": [
+        0,
+        1,
+        2
+    ],
+    "vllm_hash_attention_skip_layers": [
+        true,
+        true,
+        true,
+        false,
+        false,
+        false,
+        false,
+        false,
+        true,
+        true,
+        false,
+        false,
+        true,
+        false,
+        true,
+        true,
+        true,
+        true,
+        false,
+        false,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false,
+        true,
+        false,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        true,
+        false,
+        true,
+        true,
+        true
+    ]
+}
diff --git a/ucm/sparse/gsa_on_device/gsa_on_device.py b/ucm/sparse/gsa_on_device/gsa_on_device.py
@@ -48,8 +48,10 @@ def gsa_on_device_config_path_for_model(vllm_config) -> str:
         rel = (
             "ucm/sparse/gsa_on_device/configs/gsa_on_device_deepseek_r1_awq_config.json"
         )
-    elif "qwen3" in model and "32b" in model:
+    elif "qwen3" in model and "32b" in model and "coder" not in model:
         rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json"
+    elif "qwen3" in model and "30b" in model and "coder" in model:
+        rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_coder_30B_A3B_config.json"
     elif "qwen3" in model and "4b" in model:
         rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_4B_config.json"
     elif "qwq" in model and "32b" in model: