From 351a9f0c505cfd77a87f75c48b209494832de906 Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Wed, 22 Apr 2026 02:39:02 -0700 Subject: [PATCH 1/2] support disable interleaved reasoning Signed-off-by: Yuki Huang --- responses_api_models/vllm_model/app.py | 5 +++-- responses_api_models/vllm_model/configs/vllm_model.yaml | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/responses_api_models/vllm_model/app.py b/responses_api_models/vllm_model/app.py index 351744398..628b51111 100644 --- a/responses_api_models/vllm_model/app.py +++ b/responses_api_models/vllm_model/app.py @@ -71,6 +71,7 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig): return_token_id_information: bool uses_reasoning_parser: bool + uses_interleaved_reasoning: bool = True replace_developer_role_with_system: bool = False # Whether or not the model can generate a reasoning output, and called again to produce additional reasoning output. @@ -275,7 +276,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict: if isinstance(content, str): reasoning_matches, remaining_content = self._converter._extract_reasoning_from_content(content) message_dict["content"] = remaining_content - if reasoning_matches: + if reasoning_matches and self.config.uses_interleaved_reasoning: message_dict["reasoning_content"] = reasoning_matches[0] # TODO when NeMo RL migrates to vLLM>=0.16.0, remove the reasoning_content support above. @@ -293,7 +294,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict: # Even though we set the reasoning content already here, we still loop through all the content item dicts for the assert above. content_item_dict["text"] = remaining_content - if reasoning_matches: + if reasoning_matches and self.config.uses_interleaved_reasoning: message_dict["reasoning_content"] = reasoning_matches[0] # See the TODO wrt reasoning_content above message_dict["reasoning"] = reasoning_matches[0] diff --git a/responses_api_models/vllm_model/configs/vllm_model.yaml b/responses_api_models/vllm_model/configs/vllm_model.yaml index aefd15487..696a9b06e 100644 --- a/responses_api_models/vllm_model/configs/vllm_model.yaml +++ b/responses_api_models/vllm_model/configs/vllm_model.yaml @@ -7,5 +7,6 @@ policy_model: model: ${policy_model_name} return_token_id_information: false uses_reasoning_parser: true + uses_interleaved_reasoning: true chat_template_kwargs: null extra_body: null From 7c54a60e87413cbef6ea52d95ddb44e649039d8d Mon Sep 17 00:00:00 2001 From: Yuki Huang Date: Thu, 23 Apr 2026 07:52:14 -0700 Subject: [PATCH 2/2] empty push Signed-off-by: Yuki Huang