Skip to content

Commit 70d9b5b

Browse files
ArkaD171717julian-rischclaude
authored
feat: add reasoning token support to OpenRouter integration (#3264)
Co-authored-by: Julian Risch <julian.risch@deepset.ai> Co-authored-by: Claude Opus 4.8 (1M context) <noreply@anthropic.com>
1 parent 7c94de2 commit 70d9b5b

4 files changed

Lines changed: 720 additions & 22 deletions

File tree

integrations/openrouter/pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ classifiers = [
2323
"Programming Language :: Python :: Implementation :: CPython",
2424
"Programming Language :: Python :: Implementation :: PyPy",
2525
]
26-
dependencies = ["haystack-ai>=2.22.0"]
26+
dependencies = ["haystack-ai>=2.30.0"]
2727

2828
[project.urls]
2929
Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/openrouter#readme"

integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py

Lines changed: 264 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -2,18 +2,105 @@
22
#
33
# SPDX-License-Identifier: Apache-2.0
44

5+
import json
56
from typing import Any
67

78
from haystack import component, default_to_dict, logging
89
from haystack.components.generators.chat import OpenAIChatGenerator
9-
from haystack.dataclasses import ChatMessage, StreamingCallbackT
10+
from haystack.components.generators.chat.openai import _check_finish_reason
11+
from haystack.components.generators.utils import _normalize_messages, _serialize_object
12+
from haystack.dataclasses import (
13+
ChatMessage,
14+
ReasoningContent,
15+
StreamingCallbackT,
16+
ToolCall,
17+
select_streaming_callback,
18+
)
1019
from haystack.tools import ToolsType, _check_duplicate_tool_names, flatten_tools_or_toolsets, serialize_tools_or_toolset
1120
from haystack.utils import serialize_callable
1221
from haystack.utils.auth import Secret
22+
from openai.types.chat import ChatCompletion, ParsedChatCompletion
23+
from openai.types.chat.chat_completion import Choice
1324

1425
logger = logging.getLogger(__name__)
1526

1627

28+
def _extract_reasoning(message: Any) -> ReasoningContent | None:
29+
"""Extract reasoning content from an OpenRouter API response message."""
30+
# OpenRouter attaches reasoning content as extra attributes on the standard OpenAI SDK message,
31+
# so we read them with getattr rather than relying on typed fields.
32+
reasoning_text = getattr(message, "reasoning", None) or ""
33+
raw_details = getattr(message, "reasoning_details", None) or []
34+
35+
if not reasoning_text and not raw_details:
36+
return None
37+
38+
details = []
39+
for d in raw_details:
40+
if isinstance(d, dict):
41+
details.append(d)
42+
elif hasattr(d, "model_dump"):
43+
details.append(d.model_dump())
44+
else:
45+
details.append(vars(d))
46+
47+
# Some models only return structured details without a flat `reasoning` string, so we
48+
# reconstruct the text from the known detail types.
49+
if not reasoning_text and details:
50+
parts = []
51+
for d in details:
52+
dtype = d.get("type", "")
53+
if dtype == "reasoning.text":
54+
parts.append(d.get("text", ""))
55+
elif dtype == "reasoning.summary":
56+
parts.append(d.get("summary", ""))
57+
reasoning_text = "".join(parts)
58+
59+
extra = {}
60+
if details:
61+
extra["reasoning_details"] = details
62+
63+
return ReasoningContent(reasoning_text=reasoning_text, extra=extra)
64+
65+
66+
def _convert_openrouter_completion_to_chat_message(
67+
completion: ChatCompletion | ParsedChatCompletion, choice: Choice
68+
) -> ChatMessage:
69+
"""Convert an OpenRouter chat completion to a ChatMessage, including reasoning content."""
70+
message = choice.message
71+
text = message.content
72+
tool_calls = []
73+
if message.tool_calls:
74+
for tc in message.tool_calls:
75+
func = getattr(tc, "function", None)
76+
if func is None:
77+
continue
78+
try:
79+
arguments = json.loads(func.arguments)
80+
tool_calls.append(ToolCall(id=tc.id, tool_name=func.name, arguments=arguments))
81+
except json.JSONDecodeError:
82+
logger.warning(
83+
"OpenRouter returned a malformed JSON string for tool call arguments. "
84+
"Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
85+
_id=tc.id,
86+
_name=func.name,
87+
_arguments=func.arguments,
88+
)
89+
90+
logprobs = _serialize_object(choice.logprobs) if choice.logprobs else None
91+
meta = {
92+
"model": completion.model,
93+
"index": choice.index,
94+
"finish_reason": choice.finish_reason,
95+
"usage": _serialize_object(completion.usage),
96+
}
97+
if logprobs:
98+
meta["logprobs"] = logprobs
99+
100+
reasoning = _extract_reasoning(message)
101+
return ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta, reasoning=reasoning)
102+
103+
17104
@component
18105
class OpenRouterChatGenerator(OpenAIChatGenerator):
19106
"""
@@ -26,9 +113,12 @@ class OpenRouterChatGenerator(OpenAIChatGenerator):
26113
parameter in `run` method.
27114
28115
Key Features and Compatibility:
29-
- **Primary Compatibility**: Designed to work seamlessly with the OpenRouter chat completion endpoint.
116+
- **Primary Compatibility**: Compatible with the OpenRouter chat completion endpoint.
30117
- **Streaming Support**: Supports streaming responses from the OpenRouter chat completion endpoint.
31118
- **Customizability**: Supports all parameters supported by the OpenRouter chat completion endpoint.
119+
- **Reasoning Support**: Extracts reasoning/thinking content from models that support it
120+
(e.g., DeepSeek R1, Claude with extended thinking) and stores it in the `ReasoningContent`
121+
field on `ChatMessage`. Reasoning content is only captured for non-streaming requests.
32122
33123
This component uses the ChatMessage format for structuring both input and output,
34124
ensuring coherent and contextually relevant responses in chat-based text generation scenarios.
@@ -40,20 +130,20 @@ class OpenRouterChatGenerator(OpenAIChatGenerator):
40130
41131
Usage example:
42132
```python
43-
from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator
133+
from haystack_integrations.components.generators.openrouter import (
134+
OpenRouterChatGenerator,
135+
)
44136
from haystack.dataclasses import ChatMessage
45137
46138
messages = [ChatMessage.from_user("What's Natural Language Processing?")]
47139
48-
client = OpenRouterChatGenerator()
140+
client = OpenRouterChatGenerator(
141+
model="deepseek/deepseek-r1",
142+
generation_kwargs={"reasoning": {"effort": "high"}},
143+
)
49144
response = client.run(messages)
50-
print(response)
51-
52-
>>{'replies': [ChatMessage(_content='Natural Language Processing (NLP) is a branch of artificial intelligence
53-
>>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
54-
>>meaningful and useful.', _role=<ChatRole.ASSISTANT: 'assistant'>, _name=None,
55-
>>_meta={'model': 'openai/gpt-5-mini', 'index': 0, 'finish_reason': 'stop',
56-
>>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
145+
print(response["replies"][0].reasoning) # Access reasoning content
146+
print(response["replies"][0].text) # Access final answer
57147
```
58148
"""
59149

@@ -98,14 +188,11 @@ def __init__(
98188
events as they become available, with the stream terminated by a data: [DONE] message.
99189
- `safe_prompt`: Whether to inject a safety prompt before all conversations.
100190
- `random_seed`: The seed to use for random sampling.
191+
- `reasoning`: A dict to configure reasoning/thinking tokens for models that support it.
192+
Example: `{"effort": "high"}` or `{"max_tokens": 2000}`.
193+
Reasoning content is only captured for non-streaming requests.
194+
See [OpenRouter reasoning docs](https://openrouter.ai/docs/use-cases/reasoning-tokens).
101195
- `response_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response.
102-
If provided, the output will always be validated against this
103-
format (unless the model returns a tool call).
104-
For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
105-
Notes:
106-
- This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
107-
- For structured outputs with streaming,
108-
the `response_format` must be a JSON schema and not a Pydantic model.
109196
:param tools:
110197
A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
111198
list of `Tool` objects or a `Toolset` instance.
@@ -187,6 +274,12 @@ def _prepare_api_call(
187274
# adapt ChatMessage(s) to the format expected by the OpenAI API
188275
openai_formatted_messages = [message.to_openai_dict_format() for message in messages]
189276

277+
# OpenRouter expects reasoning_details to be sent back in multi-turn conversations, but
278+
# to_openai_dict_format() strips reasoning, so we re-inject it into the formatted message dicts.
279+
for i, chat_msg in enumerate(messages):
280+
if chat_msg.reasoning and chat_msg.reasoning.extra.get("reasoning_details"):
281+
openai_formatted_messages[i]["reasoning_details"] = chat_msg.reasoning.extra["reasoning_details"]
282+
190283
flattened_tools = flatten_tools_or_toolsets(tools or self.tools)
191284
tools_strict = tools_strict if tools_strict is not None else self.tools_strict
192285
_check_duplicate_tool_names(flattened_tools)
@@ -227,3 +320,156 @@ def _prepare_api_call(
227320
if response_format:
228321
final_args["response_format"] = response_format
229322
return final_args
323+
324+
@component.output_types(replies=list[ChatMessage])
325+
def run(
326+
self,
327+
messages: list[ChatMessage] | str,
328+
streaming_callback: StreamingCallbackT | None = None,
329+
generation_kwargs: dict[str, Any] | None = None,
330+
*,
331+
tools: ToolsType | None = None,
332+
tools_strict: bool | None = None,
333+
) -> dict[str, list[ChatMessage]]:
334+
"""
335+
Invokes chat completion on the OpenRouter API.
336+
337+
:param messages:
338+
A list of ChatMessage instances representing the input messages.
339+
If a string is provided, it is converted to a list containing a ChatMessage with user role.
340+
:param streaming_callback:
341+
A callback function that is called when a new token is received from the stream.
342+
:param generation_kwargs:
343+
Additional keyword arguments for text generation. These parameters will
344+
override the parameters passed during component initialization.
345+
For details on OpenRouter API parameters, see
346+
[OpenRouter docs](https://openrouter.ai/docs/quickstart).
347+
:param tools: A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
348+
If set, it will override the `tools` parameter provided during initialization.
349+
:param tools_strict:
350+
Whether to enable strict schema adherence for tool calls.
351+
352+
:returns:
353+
A dictionary with the following key:
354+
- `replies`: A list containing the generated responses as ChatMessage instances.
355+
"""
356+
messages = _normalize_messages(messages)
357+
if not self._is_warmed_up:
358+
self.warm_up()
359+
360+
if len(messages) == 0:
361+
return {"replies": []}
362+
363+
streaming_callback = select_streaming_callback(
364+
init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False
365+
)
366+
367+
# Reasoning content is reconstructed from the full response message, which is not available while
368+
# streaming, so we warn the user that it will not be captured in this mode.
369+
if streaming_callback is not None:
370+
merged_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
371+
if merged_kwargs.get("reasoning"):
372+
logger.warning(
373+
"Streaming with reasoning is active. Reasoning content will not be captured during "
374+
"streaming. Use non-streaming mode to extract reasoning content."
375+
)
376+
377+
api_args = self._prepare_api_call(
378+
messages=messages,
379+
streaming_callback=streaming_callback,
380+
generation_kwargs=generation_kwargs,
381+
tools=tools,
382+
tools_strict=tools_strict,
383+
)
384+
openai_endpoint = api_args.pop("openai_endpoint")
385+
chat_completion = getattr(self.client.chat.completions, openai_endpoint)(**api_args)
386+
387+
if streaming_callback is not None:
388+
# streaming uses the inherited handler so reasoning extraction is intentionally skipped
389+
completions = self._handle_stream_response(chat_completion, streaming_callback)
390+
else:
391+
assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request."
392+
completions = [
393+
_convert_openrouter_completion_to_chat_message(chat_completion, choice)
394+
for choice in chat_completion.choices
395+
]
396+
397+
for message in completions:
398+
_check_finish_reason(message.meta)
399+
400+
return {"replies": completions}
401+
402+
@component.output_types(replies=list[ChatMessage])
403+
async def run_async(
404+
self,
405+
messages: list[ChatMessage] | str,
406+
streaming_callback: StreamingCallbackT | None = None,
407+
generation_kwargs: dict[str, Any] | None = None,
408+
*,
409+
tools: ToolsType | None = None,
410+
tools_strict: bool | None = None,
411+
) -> dict[str, list[ChatMessage]]:
412+
"""
413+
Asynchronously invokes chat completion on the OpenRouter API.
414+
415+
:param messages:
416+
A list of ChatMessage instances representing the input messages.
417+
If a string is provided, it is converted to a list containing a ChatMessage with user role.
418+
:param streaming_callback:
419+
A callback function that is called when a new token is received from the stream.
420+
Must be a coroutine.
421+
:param generation_kwargs:
422+
Additional keyword arguments for text generation.
423+
:param tools: A list of Tool and/or Toolset objects, or a single Toolset.
424+
:param tools_strict:
425+
Whether to enable strict schema adherence for tool calls.
426+
427+
:returns:
428+
A dictionary with the following key:
429+
- `replies`: A list containing the generated responses as ChatMessage instances.
430+
"""
431+
messages = _normalize_messages(messages)
432+
if not self._is_warmed_up:
433+
self.warm_up()
434+
435+
if len(messages) == 0:
436+
return {"replies": []}
437+
438+
streaming_callback = select_streaming_callback(
439+
init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=True
440+
)
441+
442+
# Reasoning content is reconstructed from the full response message, which is not available while
443+
# streaming, so we warn the user that it will not be captured in this mode.
444+
if streaming_callback is not None:
445+
merged_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})}
446+
if merged_kwargs.get("reasoning"):
447+
logger.warning(
448+
"Streaming with reasoning is active. Reasoning content will not be captured during "
449+
"streaming. Use non-streaming mode to extract reasoning content."
450+
)
451+
452+
api_args = self._prepare_api_call(
453+
messages=messages,
454+
streaming_callback=streaming_callback,
455+
generation_kwargs=generation_kwargs,
456+
tools=tools,
457+
tools_strict=tools_strict,
458+
)
459+
openai_endpoint = api_args.pop("openai_endpoint")
460+
chat_completion = await getattr(self.async_client.chat.completions, openai_endpoint)(**api_args)
461+
462+
if streaming_callback is not None:
463+
# streaming uses the inherited handler so reasoning extraction is intentionally skipped
464+
completions = await self._handle_async_stream_response(chat_completion, streaming_callback)
465+
else:
466+
assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request."
467+
completions = [
468+
_convert_openrouter_completion_to_chat_message(chat_completion, choice)
469+
for choice in chat_completion.choices
470+
]
471+
472+
for message in completions:
473+
_check_finish_reason(message.meta)
474+
475+
return {"replies": completions}

0 commit comments

Comments
 (0)