@@ -124,7 +124,6 @@ def init_model(self, kvargs):
124124 "max_seq_length" : kvargs .get ("max_seq_length" , 1024 * 5 ),
125125 "is_token_healing" : kvargs .get ("is_token_healing" , False ),
126126 "return_all_prompt_logics" : self .return_all_prompt_logprobs ,
127- "use_dynamic_prompt_cache" : self .use_dynamic_prompt_cache ,
128127 "disable_chunked_prefill" : self .disable_chunked_prefill ,
129128 "data_type" : kvargs .get ("data_type" , "float16" ),
130129 "graph_max_batch_size" : kvargs .get ("graph_max_batch_size" , 16 ),
@@ -231,7 +230,6 @@ def init_mtp_draft_model(self, main_kvargs: dict):
231230 "max_seq_length" : main_kvargs .get ("max_seq_length" , 1024 * 5 ),
232231 "is_token_healing" : False ,
233232 "return_all_prompt_logics" : False ,
234- "use_dynamic_prompt_cache" : self .use_dynamic_prompt_cache ,
235233 "disable_chunked_prefill" : self .disable_chunked_prefill ,
236234 "data_type" : main_kvargs .get ("data_type" , "float16" ),
237235 "graph_max_batch_size" : main_kvargs .get ("graph_max_batch_size" , 16 ),
0 commit comments