File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change 33 "is_mla" : false ,
44 "hash_weight_type" : " random" ,
55 "num_hidden_layers" : 64 ,
6- "seq_len_threshhold" : 2048 ,
6+ "seq_len_threshhold" : 4096 ,
77 "chunk_size" : 128 ,
88 "chunk_repre_method" : " max" ,
99 "head_dim" : 128 ,
152152 "hash_bits_qk_rope" : null ,
153153 "hash_weight_kv_lora" : null ,
154154 "hash_weight_qk_rope" : null ,
155- "vllm_hash_attention_topk" : 2048 ,
155+ "vllm_hash_attention_topk" : 4096 ,
156156 "vllm_hash_attention_reduction_head_num" : null ,
157157 "vllm_hash_attention_rollback_layers" : [
158158 0 ,
161161 3 ,
162162 4 ,
163163 5 ,
164+ 6 ,
164165 61 ,
165166 62 ,
166167 63
172173 true ,
173174 true ,
174175 true ,
176+ true ,
175177 false ,
176178 false ,
177179 true ,
228230 true ,
229231 true ,
230232 true ,
231- true ,
232233 true
233234 ]
234235}
Original file line number Diff line number Diff line change 1+ {
2+ "model_name" : " Qwen/Qwen3-Coder-30B-A3B-Instruct" ,
3+ "is_mla" : false ,
4+ "hash_weight_type" : " random" ,
5+ "num_hidden_layers" : 48 ,
6+ "seq_len_threshhold" : 2048 ,
7+ "chunk_size" : 128 ,
8+ "chunk_repre_method" : " max" ,
9+ "head_dim" : 128 ,
10+ "hash_bits" : 128 ,
11+ "top_k_ratio_per_layer" : [
12+ 1 ,
13+ 1 ,
14+ 1 ,
15+ 0.3 ,
16+ 0.3 ,
17+ 0.3 ,
18+ 0.3 ,
19+ 0.3 ,
20+ 0.3 ,
21+ 0.3 ,
22+ 0.3 ,
23+ 0.3 ,
24+ 0.3 ,
25+ 0.3 ,
26+ 0.3 ,
27+ 0.3 ,
28+ 0.3 ,
29+ 0.3 ,
30+ 0.3 ,
31+ 0.3 ,
32+ 0.3 ,
33+ 0.3 ,
34+ 0.3 ,
35+ 0.3 ,
36+ 0.3 ,
37+ 0.3 ,
38+ 0.3 ,
39+ 0.3 ,
40+ 0.3 ,
41+ 0.3 ,
42+ 0.3 ,
43+ 0.3 ,
44+ 0.3 ,
45+ 0.3 ,
46+ 0.3 ,
47+ 0.3 ,
48+ 0.3 ,
49+ 0.3 ,
50+ 0.3 ,
51+ 0.3 ,
52+ 0.3 ,
53+ 0.3 ,
54+ 0.3 ,
55+ 0.3 ,
56+ 0.3 ,
57+ 0.3 ,
58+ 0.3 ,
59+ 0.3
60+ ],
61+ "top_k_index_reuse" : [
62+ -1 ,
63+ -1 ,
64+ -1 ,
65+ -1 ,
66+ -1 ,
67+ -1 ,
68+ -1 ,
69+ -1 ,
70+ -1 ,
71+ -1 ,
72+ -1 ,
73+ -1 ,
74+ -1 ,
75+ -1 ,
76+ -1 ,
77+ -1 ,
78+ -1 ,
79+ -1 ,
80+ -1 ,
81+ -1 ,
82+ -1 ,
83+ -1 ,
84+ -1 ,
85+ -1 ,
86+ -1 ,
87+ -1 ,
88+ -1 ,
89+ -1 ,
90+ -1 ,
91+ -1 ,
92+ -1 ,
93+ -1 ,
94+ -1 ,
95+ -1 ,
96+ -1 ,
97+ -1 ,
98+ -1 ,
99+ -1 ,
100+ -1 ,
101+ -1 ,
102+ -1 ,
103+ -1 ,
104+ -1 ,
105+ -1 ,
106+ -1 ,
107+ -1 ,
108+ -1 ,
109+ -1
110+ ],
111+ "must_select_blocks" : [
112+ 0 ,
113+ -2 ,
114+ -1
115+ ],
116+ "hash_weight" : null ,
117+ "kv_lora_rank" : null ,
118+ "qk_rope_head_dim" : null ,
119+ "hash_bits_kv_lora" : null ,
120+ "hash_bits_qk_rope" : null ,
121+ "hash_weight_kv_lora" : null ,
122+ "hash_weight_qk_rope" : null ,
123+ "vllm_hash_attention_topk" : 2048 ,
124+ "vllm_hash_attention_reduction_head_num" : null ,
125+ "vllm_hash_attention_rollback_layers" : [
126+ 0 ,
127+ 1 ,
128+ 2
129+ ],
130+ "vllm_hash_attention_skip_layers" : [
131+ true ,
132+ true ,
133+ true ,
134+ false ,
135+ false ,
136+ false ,
137+ false ,
138+ false ,
139+ true ,
140+ true ,
141+ false ,
142+ false ,
143+ true ,
144+ false ,
145+ true ,
146+ true ,
147+ true ,
148+ true ,
149+ false ,
150+ false ,
151+ true ,
152+ true ,
153+ true ,
154+ true ,
155+ true ,
156+ true ,
157+ true ,
158+ true ,
159+ true ,
160+ true ,
161+ true ,
162+ true ,
163+ false ,
164+ true ,
165+ false ,
166+ true ,
167+ true ,
168+ true ,
169+ true ,
170+ true ,
171+ true ,
172+ true ,
173+ true ,
174+ true ,
175+ false ,
176+ true ,
177+ true ,
178+ true
179+ ]
180+ }
Original file line number Diff line number Diff line change @@ -48,8 +48,10 @@ def gsa_on_device_config_path_for_model(vllm_config) -> str:
4848 rel = (
4949 "ucm/sparse/gsa_on_device/configs/gsa_on_device_deepseek_r1_awq_config.json"
5050 )
51- elif "qwen3" in model and "32b" in model :
51+ elif "qwen3" in model and "32b" in model and "coder" not in model :
5252 rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_32B_config.json"
53+ elif "qwen3" in model and "30b" in model and "coder" in model :
54+ rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_coder_30B_A3B_config.json"
5355 elif "qwen3" in model and "4b" in model :
5456 rel = "ucm/sparse/gsa_on_device/configs/gsa_on_device_qwen3_4B_config.json"
5557 elif "qwq" in model and "32b" in model :
You can’t perform that action at this time.
0 commit comments