Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions responses_api_models/vllm_model/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ class VLLMModelConfig(BaseResponsesAPIModelConfig):
return_token_id_information: bool

uses_reasoning_parser: bool
uses_interleaved_reasoning: bool = True
replace_developer_role_with_system: bool = False

# Whether or not the model can generate a reasoning output, and called again to produce additional reasoning output.
Expand Down Expand Up @@ -275,7 +276,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict:
if isinstance(content, str):
reasoning_matches, remaining_content = self._converter._extract_reasoning_from_content(content)
message_dict["content"] = remaining_content
if reasoning_matches:
if reasoning_matches and self.config.uses_interleaved_reasoning:
message_dict["reasoning_content"] = reasoning_matches[0]

# TODO when NeMo RL migrates to vLLM>=0.16.0, remove the reasoning_content support above.
Expand All @@ -293,7 +294,7 @@ def _preprocess_chat_completion_create_params(self, request: Request, body_dict:

# Even though we set the reasoning content already here, we still loop through all the content item dicts for the assert above.
content_item_dict["text"] = remaining_content
if reasoning_matches:
if reasoning_matches and self.config.uses_interleaved_reasoning:
message_dict["reasoning_content"] = reasoning_matches[0]
# See the TODO wrt reasoning_content above
message_dict["reasoning"] = reasoning_matches[0]
Expand Down
1 change: 1 addition & 0 deletions responses_api_models/vllm_model/configs/vllm_model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ policy_model:
model: ${policy_model_name}
return_token_id_information: false
uses_reasoning_parser: true
uses_interleaved_reasoning: true
chat_template_kwargs: null
extra_body: null
Loading