@@ -599,10 +599,12 @@ def insert_tasks_v1(self, req_dicts: List[Request], num_running_requests: int):
599599 # Enable thinking
600600 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = request .get ("reasoning_max_tokens" )
601601 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
602+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
602603 else :
603604 # Disable thinking
604605 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = - 1
605606 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
607+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
606608
607609 if (
608610 hasattr (request , "sampling_params" )
@@ -798,10 +800,12 @@ def insert_prefill_inputs(self, req_dicts: List[Request], num_running_requests:
798800 # Enable thinking
799801 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = request .get ("reasoning_max_tokens" )
800802 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
803+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
801804 else :
802805 # Disable thinking
803806 self .share_inputs ["max_think_lens" ][idx : idx + 1 , :] = - 1
804807 self .share_inputs ["limit_think_status" ][idx : idx + 1 , :] = 0
808+ self .share_inputs ["reasoning_status" ][idx : idx + 1 , :] = 0
805809
806810 def get_attr_from_request (request , attr , default_value = None ):
807811 res = request .get (attr , default_value )
0 commit comments