Skip to content

Commit b1e95c6

Browse files
authored
New gsa config (#646)
- Add a new GSAOnDevice config for Qwen3-Coder-30B-A3B-Instruct - Update a better GSAOnDevice config for Qwen3-32B model
1 parent f845eed commit b1e95c6

3 files changed

Lines changed: 187 additions & 4 deletions

File tree

ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
"is_mla": false,
44
"hash_weight_type": "random",
55
"num_hidden_layers": 64,
6-
"seq_len_threshhold": 2048,
6+
"seq_len_threshhold": 4096,
77
"chunk_size": 128,
88
"chunk_repre_method": "max",
99
"head_dim": 128,
@@ -152,7 +152,7 @@
152152
"hash_bits_qk_rope": null,
153153
"hash_weight_kv_lora": null,
154154
"hash_weight_qk_rope": null,
155-
"vllm_hash_attention_topk": 2048,
155+
"vllm_hash_attention_topk": 4096,
156156
"vllm_hash_attention_reduction_head_num": null,
157157
"vllm_hash_attention_rollback_layers": [
158158
0,
@@ -161,6 +161,7 @@
161161
3,
162162
4,
163163
5,
164+
6,
164165
61,
165166
62,
166167
63
@@ -172,6 +173,7 @@
172173
true,
173174
true,
174175
true,
176+
true,
175177
false,
176178
false,
177179
true,
@@ -228,7 +230,6 @@
228230
true,
229231
true,
230232
true,
231-
true,
232233
true
233234
]
234235
}
Lines changed: 180 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,180 @@
1+
{
2+
"model_name": "Qwen/Qwen3-Coder-30B-A3B-Instruct",
3+
"is_mla": false,
4+
"hash_weight_type": "random",
5+
"num_hidden_layers": 48,
6+
"seq_len_threshhold": 2048,
7+
"chunk_size": 128,
8+
"chunk_repre_method": "max",
9+
"head_dim": 128,
10+
"hash_bits": 128,
11+
"top_k_ratio_per_layer": [
12+
1,
13+
1,
14+
1,
15+
0.3,
16+
0.3,
17+
0.3,
18+
0.3,
19+
0.3,
20+
0.3,
21+
0.3,
22+
0.3,
23+
0.3,
24+
0.3,
25+
0.3,
26+
0.3,
27+
0.3,
28+
0.3,
29+
0.3,
30+
0.3,
31+
0.3,
32+
0.3,
33+
0.3,
34+
0.3,
35+
0.3,
36+
0.3,
37+
0.3,
38+
0.3,
39+
0.3,
40+
0.3,
41+
0.3,
42+
0.3,
43+
0.3,
44+
0.3,
45+
0.3,
46+
0.3,
47+
0.3,
48+
0.3,
49+
0.3,
50+
0.3,
51+
0.3,
52+
0.3,
53+
0.3,
54+
0.3,
55+
0.3,
56+
0.3,
57+
0.3,
58+
0.3,
59+
0.3
60+
],
61+
"top_k_index_reuse": [
62+
-1,
63+
-1,
64+
-1,
65+
-1,
66+
-1,
67+
-1,
68+
-1,
69+
-1,
70+
-1,
71+
-1,
72+
-1,
73+
-1,
74+
-1,
75+
-1,
76+
-1,
77+
-1,
78+
-1,
79+
-1,
80+
-1,
81+
-1,
82+
-1,
83+
-1,
84+
-1,
85+
-1,
86+
-1,
87+
-1,
88+
-1,
89+
-1,
90+
-1,
91+
-1,
92+
-1,
93+
-1,
94+
-1,
95+
-1,
96+
-1,
97+
-1,
98+
-1,
99+
-1,
100+
-1,
101+
-1,
102+
-1,
103+
-1,
104+
-1,
105+
-1,
106+
-1,
107+
-1,
108+
-1,
109+
-1
110+
],
111+
"must_select_blocks": [
112+
0,
113+
-2,
114+
-1
115+
],
116+
"hash_weight": null,
117+
"kv_lora_rank": null,
118+
"qk_rope_head_dim": null,
119+
"hash_bits_kv_lora": null,
120+
"hash_bits_qk_rope": null,
121+
"hash_weight_kv_lora": null,
122+
"hash_weight_qk_rope": null,
123+
"vllm_hash_attention_topk": 2048,
124+
"vllm_hash_attention_reduction_head_num": null,
125+
"vllm_hash_attention_rollback_layers": [
126+
0,
127+
1,
128+
2
129+
],
130+
"vllm_hash_attention_skip_layers": [
131+
true,
132+
true,
133+
true,
134+
false,
135+
false,
136+
false,
137+
false,
138+
false,
139+
true,
140+
true,
141+
false,
142+
false,
143+
true,
144+
false,
145+
true,
146+
true,
147+
true,
148+
true,
149+
false,
150+
false,
151+
true,
152+
true,
153+
true,
154+
true,
155+
true,
156+
true,
157+
true,
158+
true,
159+
true,
160+
true,
161+
true,
162+
true,
163+
false,
164+
true,
165+
false,
166+
true,
167+
true,
168+
true,
169+
true,
170+
true,
171+
true,
172+
true,
173+
true,
174+
true,
175+
false,
176+
true,
177+
true,
178+
true
179+
]
180+
}

ucm/sparse/gsa_on_device/gsa_on_device.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,8 +48,10 @@ def gsa_on_device_config_path_for_model(vllm_config) -> str:
4848
rel = (
4949
"ucm/sparse/gsa_on_device/configs/gsa_on_device_deepseek_r1_awq_config.json"
5050
)
51-
elif "qwen3" in model and "32b" in model:
51+
elif "qwen3" in model and "32b" in model and "coder" not in model:
5252
rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json"
53+
elif "qwen3" in model and "30b" in model and "coder" in model:
54+
rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_coder_30B_A3B_config.json"
5355
elif "qwen3" in model and "4b" in model:
5456
rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_4B_config.json"
5557
elif "qwq" in model and "32b" in model:

0 commit comments

Comments
 (0)