22#
33# SPDX-License-Identifier: Apache-2.0
44
5+ import json
56from typing import Any
67
78from haystack import component , default_to_dict , logging
89from haystack .components .generators .chat import OpenAIChatGenerator
9- from haystack .dataclasses import ChatMessage , StreamingCallbackT
10+ from haystack .components .generators .chat .openai import _check_finish_reason
11+ from haystack .components .generators .utils import _normalize_messages , _serialize_object
12+ from haystack .dataclasses import (
13+ ChatMessage ,
14+ ReasoningContent ,
15+ StreamingCallbackT ,
16+ ToolCall ,
17+ select_streaming_callback ,
18+ )
1019from haystack .tools import ToolsType , _check_duplicate_tool_names , flatten_tools_or_toolsets , serialize_tools_or_toolset
1120from haystack .utils import serialize_callable
1221from haystack .utils .auth import Secret
22+ from openai .types .chat import ChatCompletion , ParsedChatCompletion
23+ from openai .types .chat .chat_completion import Choice
1324
1425logger = logging .getLogger (__name__ )
1526
1627
28+ def _extract_reasoning (message : Any ) -> ReasoningContent | None :
29+ """Extract reasoning content from an OpenRouter API response message."""
30+ # OpenRouter attaches reasoning content as extra attributes on the standard OpenAI SDK message,
31+ # so we read them with getattr rather than relying on typed fields.
32+ reasoning_text = getattr (message , "reasoning" , None ) or ""
33+ raw_details = getattr (message , "reasoning_details" , None ) or []
34+
35+ if not reasoning_text and not raw_details :
36+ return None
37+
38+ details = []
39+ for d in raw_details :
40+ if isinstance (d , dict ):
41+ details .append (d )
42+ elif hasattr (d , "model_dump" ):
43+ details .append (d .model_dump ())
44+ else :
45+ details .append (vars (d ))
46+
47+ # Some models only return structured details without a flat `reasoning` string, so we
48+ # reconstruct the text from the known detail types.
49+ if not reasoning_text and details :
50+ parts = []
51+ for d in details :
52+ dtype = d .get ("type" , "" )
53+ if dtype == "reasoning.text" :
54+ parts .append (d .get ("text" , "" ))
55+ elif dtype == "reasoning.summary" :
56+ parts .append (d .get ("summary" , "" ))
57+ reasoning_text = "" .join (parts )
58+
59+ extra = {}
60+ if details :
61+ extra ["reasoning_details" ] = details
62+
63+ return ReasoningContent (reasoning_text = reasoning_text , extra = extra )
64+
65+
66+ def _convert_openrouter_completion_to_chat_message (
67+ completion : ChatCompletion | ParsedChatCompletion , choice : Choice
68+ ) -> ChatMessage :
69+ """Convert an OpenRouter chat completion to a ChatMessage, including reasoning content."""
70+ message = choice .message
71+ text = message .content
72+ tool_calls = []
73+ if message .tool_calls :
74+ for tc in message .tool_calls :
75+ func = getattr (tc , "function" , None )
76+ if func is None :
77+ continue
78+ try :
79+ arguments = json .loads (func .arguments )
80+ tool_calls .append (ToolCall (id = tc .id , tool_name = func .name , arguments = arguments ))
81+ except json .JSONDecodeError :
82+ logger .warning (
83+ "OpenRouter returned a malformed JSON string for tool call arguments. "
84+ "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}" ,
85+ _id = tc .id ,
86+ _name = func .name ,
87+ _arguments = func .arguments ,
88+ )
89+
90+ logprobs = _serialize_object (choice .logprobs ) if choice .logprobs else None
91+ meta = {
92+ "model" : completion .model ,
93+ "index" : choice .index ,
94+ "finish_reason" : choice .finish_reason ,
95+ "usage" : _serialize_object (completion .usage ),
96+ }
97+ if logprobs :
98+ meta ["logprobs" ] = logprobs
99+
100+ reasoning = _extract_reasoning (message )
101+ return ChatMessage .from_assistant (text = text , tool_calls = tool_calls , meta = meta , reasoning = reasoning )
102+
103+
17104@component
18105class OpenRouterChatGenerator (OpenAIChatGenerator ):
19106 """
@@ -26,9 +113,12 @@ class OpenRouterChatGenerator(OpenAIChatGenerator):
26113 parameter in `run` method.
27114
28115 Key Features and Compatibility:
29- - **Primary Compatibility**: Designed to work seamlessly with the OpenRouter chat completion endpoint.
116+ - **Primary Compatibility**: Compatible with the OpenRouter chat completion endpoint.
30117 - **Streaming Support**: Supports streaming responses from the OpenRouter chat completion endpoint.
31118 - **Customizability**: Supports all parameters supported by the OpenRouter chat completion endpoint.
119+ - **Reasoning Support**: Extracts reasoning/thinking content from models that support it
120+ (e.g., DeepSeek R1, Claude with extended thinking) and stores it in the `ReasoningContent`
121+ field on `ChatMessage`. Reasoning content is only captured for non-streaming requests.
32122
33123 This component uses the ChatMessage format for structuring both input and output,
34124 ensuring coherent and contextually relevant responses in chat-based text generation scenarios.
@@ -40,20 +130,20 @@ class OpenRouterChatGenerator(OpenAIChatGenerator):
40130
41131 Usage example:
42132 ```python
43- from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator
133+ from haystack_integrations.components.generators.openrouter import (
134+ OpenRouterChatGenerator,
135+ )
44136 from haystack.dataclasses import ChatMessage
45137
46138 messages = [ChatMessage.from_user("What's Natural Language Processing?")]
47139
48- client = OpenRouterChatGenerator()
140+ client = OpenRouterChatGenerator(
141+ model="deepseek/deepseek-r1",
142+ generation_kwargs={"reasoning": {"effort": "high"}},
143+ )
49144 response = client.run(messages)
50- print(response)
51-
52- >>{'replies': [ChatMessage(_content='Natural Language Processing (NLP) is a branch of artificial intelligence
53- >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is
54- >>meaningful and useful.', _role=<ChatRole.ASSISTANT: 'assistant'>, _name=None,
55- >>_meta={'model': 'openai/gpt-5-mini', 'index': 0, 'finish_reason': 'stop',
56- >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]}
145+ print(response["replies"][0].reasoning) # Access reasoning content
146+ print(response["replies"][0].text) # Access final answer
57147 ```
58148 """
59149
@@ -98,14 +188,11 @@ def __init__(
98188 events as they become available, with the stream terminated by a data: [DONE] message.
99189 - `safe_prompt`: Whether to inject a safety prompt before all conversations.
100190 - `random_seed`: The seed to use for random sampling.
191+ - `reasoning`: A dict to configure reasoning/thinking tokens for models that support it.
192+ Example: `{"effort": "high"}` or `{"max_tokens": 2000}`.
193+ Reasoning content is only captured for non-streaming requests.
194+ See [OpenRouter reasoning docs](https://openrouter.ai/docs/use-cases/reasoning-tokens).
101195 - `response_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response.
102- If provided, the output will always be validated against this
103- format (unless the model returns a tool call).
104- For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs).
105- Notes:
106- - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o.
107- - For structured outputs with streaming,
108- the `response_format` must be a JSON schema and not a Pydantic model.
109196 :param tools:
110197 A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a
111198 list of `Tool` objects or a `Toolset` instance.
@@ -187,6 +274,12 @@ def _prepare_api_call(
187274 # adapt ChatMessage(s) to the format expected by the OpenAI API
188275 openai_formatted_messages = [message .to_openai_dict_format () for message in messages ]
189276
277+ # OpenRouter expects reasoning_details to be sent back in multi-turn conversations, but
278+ # to_openai_dict_format() strips reasoning, so we re-inject it into the formatted message dicts.
279+ for i , chat_msg in enumerate (messages ):
280+ if chat_msg .reasoning and chat_msg .reasoning .extra .get ("reasoning_details" ):
281+ openai_formatted_messages [i ]["reasoning_details" ] = chat_msg .reasoning .extra ["reasoning_details" ]
282+
190283 flattened_tools = flatten_tools_or_toolsets (tools or self .tools )
191284 tools_strict = tools_strict if tools_strict is not None else self .tools_strict
192285 _check_duplicate_tool_names (flattened_tools )
@@ -227,3 +320,156 @@ def _prepare_api_call(
227320 if response_format :
228321 final_args ["response_format" ] = response_format
229322 return final_args
323+
324+ @component .output_types (replies = list [ChatMessage ])
325+ def run (
326+ self ,
327+ messages : list [ChatMessage ] | str ,
328+ streaming_callback : StreamingCallbackT | None = None ,
329+ generation_kwargs : dict [str , Any ] | None = None ,
330+ * ,
331+ tools : ToolsType | None = None ,
332+ tools_strict : bool | None = None ,
333+ ) -> dict [str , list [ChatMessage ]]:
334+ """
335+ Invokes chat completion on the OpenRouter API.
336+
337+ :param messages:
338+ A list of ChatMessage instances representing the input messages.
339+ If a string is provided, it is converted to a list containing a ChatMessage with user role.
340+ :param streaming_callback:
341+ A callback function that is called when a new token is received from the stream.
342+ :param generation_kwargs:
343+ Additional keyword arguments for text generation. These parameters will
344+ override the parameters passed during component initialization.
345+ For details on OpenRouter API parameters, see
346+ [OpenRouter docs](https://openrouter.ai/docs/quickstart).
347+ :param tools: A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls.
348+ If set, it will override the `tools` parameter provided during initialization.
349+ :param tools_strict:
350+ Whether to enable strict schema adherence for tool calls.
351+
352+ :returns:
353+ A dictionary with the following key:
354+ - `replies`: A list containing the generated responses as ChatMessage instances.
355+ """
356+ messages = _normalize_messages (messages )
357+ if not self ._is_warmed_up :
358+ self .warm_up ()
359+
360+ if len (messages ) == 0 :
361+ return {"replies" : []}
362+
363+ streaming_callback = select_streaming_callback (
364+ init_callback = self .streaming_callback , runtime_callback = streaming_callback , requires_async = False
365+ )
366+
367+ # Reasoning content is reconstructed from the full response message, which is not available while
368+ # streaming, so we warn the user that it will not be captured in this mode.
369+ if streaming_callback is not None :
370+ merged_kwargs = {** self .generation_kwargs , ** (generation_kwargs or {})}
371+ if merged_kwargs .get ("reasoning" ):
372+ logger .warning (
373+ "Streaming with reasoning is active. Reasoning content will not be captured during "
374+ "streaming. Use non-streaming mode to extract reasoning content."
375+ )
376+
377+ api_args = self ._prepare_api_call (
378+ messages = messages ,
379+ streaming_callback = streaming_callback ,
380+ generation_kwargs = generation_kwargs ,
381+ tools = tools ,
382+ tools_strict = tools_strict ,
383+ )
384+ openai_endpoint = api_args .pop ("openai_endpoint" )
385+ chat_completion = getattr (self .client .chat .completions , openai_endpoint )(** api_args )
386+
387+ if streaming_callback is not None :
388+ # streaming uses the inherited handler so reasoning extraction is intentionally skipped
389+ completions = self ._handle_stream_response (chat_completion , streaming_callback )
390+ else :
391+ assert isinstance (chat_completion , ChatCompletion ), "Unexpected response type for non-streaming request."
392+ completions = [
393+ _convert_openrouter_completion_to_chat_message (chat_completion , choice )
394+ for choice in chat_completion .choices
395+ ]
396+
397+ for message in completions :
398+ _check_finish_reason (message .meta )
399+
400+ return {"replies" : completions }
401+
402+ @component .output_types (replies = list [ChatMessage ])
403+ async def run_async (
404+ self ,
405+ messages : list [ChatMessage ] | str ,
406+ streaming_callback : StreamingCallbackT | None = None ,
407+ generation_kwargs : dict [str , Any ] | None = None ,
408+ * ,
409+ tools : ToolsType | None = None ,
410+ tools_strict : bool | None = None ,
411+ ) -> dict [str , list [ChatMessage ]]:
412+ """
413+ Asynchronously invokes chat completion on the OpenRouter API.
414+
415+ :param messages:
416+ A list of ChatMessage instances representing the input messages.
417+ If a string is provided, it is converted to a list containing a ChatMessage with user role.
418+ :param streaming_callback:
419+ A callback function that is called when a new token is received from the stream.
420+ Must be a coroutine.
421+ :param generation_kwargs:
422+ Additional keyword arguments for text generation.
423+ :param tools: A list of Tool and/or Toolset objects, or a single Toolset.
424+ :param tools_strict:
425+ Whether to enable strict schema adherence for tool calls.
426+
427+ :returns:
428+ A dictionary with the following key:
429+ - `replies`: A list containing the generated responses as ChatMessage instances.
430+ """
431+ messages = _normalize_messages (messages )
432+ if not self ._is_warmed_up :
433+ self .warm_up ()
434+
435+ if len (messages ) == 0 :
436+ return {"replies" : []}
437+
438+ streaming_callback = select_streaming_callback (
439+ init_callback = self .streaming_callback , runtime_callback = streaming_callback , requires_async = True
440+ )
441+
442+ # Reasoning content is reconstructed from the full response message, which is not available while
443+ # streaming, so we warn the user that it will not be captured in this mode.
444+ if streaming_callback is not None :
445+ merged_kwargs = {** self .generation_kwargs , ** (generation_kwargs or {})}
446+ if merged_kwargs .get ("reasoning" ):
447+ logger .warning (
448+ "Streaming with reasoning is active. Reasoning content will not be captured during "
449+ "streaming. Use non-streaming mode to extract reasoning content."
450+ )
451+
452+ api_args = self ._prepare_api_call (
453+ messages = messages ,
454+ streaming_callback = streaming_callback ,
455+ generation_kwargs = generation_kwargs ,
456+ tools = tools ,
457+ tools_strict = tools_strict ,
458+ )
459+ openai_endpoint = api_args .pop ("openai_endpoint" )
460+ chat_completion = await getattr (self .async_client .chat .completions , openai_endpoint )(** api_args )
461+
462+ if streaming_callback is not None :
463+ # streaming uses the inherited handler so reasoning extraction is intentionally skipped
464+ completions = await self ._handle_async_stream_response (chat_completion , streaming_callback )
465+ else :
466+ assert isinstance (chat_completion , ChatCompletion ), "Unexpected response type for non-streaming request."
467+ completions = [
468+ _convert_openrouter_completion_to_chat_message (chat_completion , choice )
469+ for choice in chat_completion .choices
470+ ]
471+
472+ for message in completions :
473+ _check_finish_reason (message .meta )
474+
475+ return {"replies" : completions }
0 commit comments