diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 351744398..628b51111 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -71,6 +71,7 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): return_token_id_information: bool uses_reasoning_parser: bool + uses_interleaved_reasoning: bool = True replace_developer_role_with_system: bool = False # Whether or not the model can generate a reasoning output, and called again to produce additional reasoning output. @@ -275,7 +276,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict: if isinstance(content, str): reasoning_matches, remaining_content = self._converter._extract_reasoning_from_content(content) message_dict["content"] = remaining_content - if reasoning_matches: + if reasoning_matches and self.config.uses_interleaved_reasoning: message_dict["reasoning_content"] = reasoning_matches[0] # TODO when NeMo RL migrates to vLLM>=0.16.0, remove the reasoning_content support above. @@ -293,7 +294,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict: # Even though we set the reasoning content already here, we still loop through all the content item dicts for the assert above. content_item_dict["text"] = remaining_content - if reasoning_matches: + if reasoning_matches and self.config.uses_interleaved_reasoning: message_dict["reasoning_content"] = reasoning_matches[0] # See the TODO wrt reasoning_content above message_dict["reasoning"] = reasoning_matches[0] diff --git a/responses_api_models/vllm_model/configs/vllm_model.yaml b/responses_api_models/vllm_model/configs/vllm_model.yaml index aefd15487..696a9b06e 100644 --- a/responses_api_models/vllm_model/configs/vllm_model.yaml +++ b/responses_api_models/vllm_model/configs/vllm_model.yaml @@ -7,5 +7,6 @@ policy_model: model: ${policy_model_name} return_token_id_information: false uses_reasoning_parser: true + uses_interleaved_reasoning: true chat_template_kwargs: null extra_body: null