@@ -595,16 +595,15 @@ def insert_tasks_v1(self, req_dicts: List[Request], num_running_requests: int):
595595 prefill_start_index = request .prefill_start_index
596596 prefill_end_index = request .prefill_end_index
597597 length = prefill_end_index - prefill_start_index
598+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
598599 if request .get ("enable_thinking" , False ) and request .get ("reasoning_max_tokens" , None ) is not None :
599600 # Enable thinking
600601 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = request .get ("reasoning_max_tokens" )
601602 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
602- self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
603603 else :
604604 # Disable thinking
605605 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = - 1
606606 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
607- self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
608607
609608 if (
610609 hasattr (request , "sampling_params" )
@@ -796,16 +795,15 @@ def insert_prefill_inputs(self, req_dicts: List[Request], num_running_requests:
796795 )[0 ]
797796 self .share_inputs ["seq_lens_decoder" ][idx : idx + 1 ] = 0
798797
798+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
799799 if request .get ("enable_thinking" , False ) and request .get ("reasoning_max_tokens" , None ) is not None :
800800 # Enable thinking
801801 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = request .get ("reasoning_max_tokens" )
802802 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
803- self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
804803 else :
805804 # Disable thinking
806805 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = - 1
807806 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
808- self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
809807
810808 def get_attr_from_request (request , attr , default_value = None ):
811809 res = request .get (attr , default_value )
0 commit comments