diff --git a/integrations/openrouter/pyproject.toml b/integrations/openrouter/pyproject.toml index 62b4613863..bd164300c0 100644 --- a/integrations/openrouter/pyproject.toml +++ b/integrations/openrouter/pyproject.toml @@ -23,7 +23,7 @@ classifiers = [ "Programming Language :: Python :: Implementation :: CPython", "Programming Language :: Python :: Implementation :: PyPy", ] -dependencies = ["haystack-ai>=2.22.0"] +dependencies = ["haystack-ai>=2.30.0"] [project.urls] Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/openrouter#readme" diff --git a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py index f79245cdcc..7647a92dde 100644 --- a/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py +++ b/integrations/openrouter/src/haystack_integrations/components/generators/openrouter/chat/chat_generator.py @@ -2,18 +2,105 @@ # # SPDX-License-Identifier: Apache-2.0 +import json from typing import Any from haystack import component, default_to_dict, logging from haystack.components.generators.chat import OpenAIChatGenerator -from haystack.dataclasses import ChatMessage, StreamingCallbackT +from haystack.components.generators.chat.openai import _check_finish_reason +from haystack.components.generators.utils import _normalize_messages, _serialize_object +from haystack.dataclasses import ( + ChatMessage, + ReasoningContent, + StreamingCallbackT, + ToolCall, + select_streaming_callback, +) from haystack.tools import ToolsType, _check_duplicate_tool_names, flatten_tools_or_toolsets, serialize_tools_or_toolset from haystack.utils import serialize_callable from haystack.utils.auth import Secret +from openai.types.chat import ChatCompletion, ParsedChatCompletion +from openai.types.chat.chat_completion import Choice logger = logging.getLogger(__name__) +def _extract_reasoning(message: Any) -> ReasoningContent | None: + """Extract reasoning content from an OpenRouter API response message.""" + # OpenRouter attaches reasoning content as extra attributes on the standard OpenAI SDK message, + # so we read them with getattr rather than relying on typed fields. + reasoning_text = getattr(message, "reasoning", None) or "" + raw_details = getattr(message, "reasoning_details", None) or [] + + if not reasoning_text and not raw_details: + return None + + details = [] + for d in raw_details: + if isinstance(d, dict): + details.append(d) + elif hasattr(d, "model_dump"): + details.append(d.model_dump()) + else: + details.append(vars(d)) + + # Some models only return structured details without a flat `reasoning` string, so we + # reconstruct the text from the known detail types. + if not reasoning_text and details: + parts = [] + for d in details: + dtype = d.get("type", "") + if dtype == "reasoning.text": + parts.append(d.get("text", "")) + elif dtype == "reasoning.summary": + parts.append(d.get("summary", "")) + reasoning_text = "".join(parts) + + extra = {} + if details: + extra["reasoning_details"] = details + + return ReasoningContent(reasoning_text=reasoning_text, extra=extra) + + +def _convert_openrouter_completion_to_chat_message( + completion: ChatCompletion | ParsedChatCompletion, choice: Choice +) -> ChatMessage: + """Convert an OpenRouter chat completion to a ChatMessage, including reasoning content.""" + message = choice.message + text = message.content + tool_calls = [] + if message.tool_calls: + for tc in message.tool_calls: + func = getattr(tc, "function", None) + if func is None: + continue + try: + arguments = json.loads(func.arguments) + tool_calls.append(ToolCall(id=tc.id, tool_name=func.name, arguments=arguments)) + except json.JSONDecodeError: + logger.warning( + "OpenRouter returned a malformed JSON string for tool call arguments. " + "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}", + _id=tc.id, + _name=func.name, + _arguments=func.arguments, + ) + + logprobs = _serialize_object(choice.logprobs) if choice.logprobs else None + meta = { + "model": completion.model, + "index": choice.index, + "finish_reason": choice.finish_reason, + "usage": _serialize_object(completion.usage), + } + if logprobs: + meta["logprobs"] = logprobs + + reasoning = _extract_reasoning(message) + return ChatMessage.from_assistant(text=text, tool_calls=tool_calls, meta=meta, reasoning=reasoning) + + @component class OpenRouterChatGenerator(OpenAIChatGenerator): """ @@ -26,9 +113,12 @@ class OpenRouterChatGenerator(OpenAIChatGenerator): parameter in `run` method. Key Features and Compatibility: - - **Primary Compatibility**: Designed to work seamlessly with the OpenRouter chat completion endpoint. + - **Primary Compatibility**: Compatible with the OpenRouter chat completion endpoint. - **Streaming Support**: Supports streaming responses from the OpenRouter chat completion endpoint. - **Customizability**: Supports all parameters supported by the OpenRouter chat completion endpoint. + - **Reasoning Support**: Extracts reasoning/thinking content from models that support it + (e.g., DeepSeek R1, Claude with extended thinking) and stores it in the `ReasoningContent` + field on `ChatMessage`. Reasoning content is only captured for non-streaming requests. This component uses the ChatMessage format for structuring both input and output, ensuring coherent and contextually relevant responses in chat-based text generation scenarios. @@ -40,20 +130,20 @@ class OpenRouterChatGenerator(OpenAIChatGenerator): Usage example: ```python - from haystack_integrations.components.generators.openrouter import OpenRouterChatGenerator + from haystack_integrations.components.generators.openrouter import ( + OpenRouterChatGenerator, + ) from haystack.dataclasses import ChatMessage messages = [ChatMessage.from_user("What's Natural Language Processing?")] - client = OpenRouterChatGenerator() + client = OpenRouterChatGenerator( + model="deepseek/deepseek-r1", + generation_kwargs={"reasoning": {"effort": "high"}}, + ) response = client.run(messages) - print(response) - - >>{'replies': [ChatMessage(_content='Natural Language Processing (NLP) is a branch of artificial intelligence - >>that focuses on enabling computers to understand, interpret, and generate human language in a way that is - >>meaningful and useful.', _role=, _name=None, - >>_meta={'model': 'openai/gpt-5-mini', 'index': 0, 'finish_reason': 'stop', - >>'usage': {'prompt_tokens': 15, 'completion_tokens': 36, 'total_tokens': 51}})]} + print(response["replies"][0].reasoning) # Access reasoning content + print(response["replies"][0].text) # Access final answer ``` """ @@ -98,14 +188,11 @@ def __init__( events as they become available, with the stream terminated by a data: [DONE] message. - `safe_prompt`: Whether to inject a safety prompt before all conversations. - `random_seed`: The seed to use for random sampling. + - `reasoning`: A dict to configure reasoning/thinking tokens for models that support it. + Example: `{"effort": "high"}` or `{"max_tokens": 2000}`. + Reasoning content is only captured for non-streaming requests. + See [OpenRouter reasoning docs](https://openrouter.ai/docs/use-cases/reasoning-tokens). - `response_format`: A JSON schema or a Pydantic model that enforces the structure of the model's response. - If provided, the output will always be validated against this - format (unless the model returns a tool call). - For details, see the [OpenAI Structured Outputs documentation](https://platform.openai.com/docs/guides/structured-outputs). - Notes: - - This parameter accepts Pydantic models and JSON schemas for latest models starting from GPT-4o. - - For structured outputs with streaming, - the `response_format` must be a JSON schema and not a Pydantic model. :param tools: A list of tools or a Toolset for which the model can prepare calls. This parameter can accept either a list of `Tool` objects or a `Toolset` instance. @@ -187,6 +274,12 @@ def _prepare_api_call( # adapt ChatMessage(s) to the format expected by the OpenAI API openai_formatted_messages = [message.to_openai_dict_format() for message in messages] + # OpenRouter expects reasoning_details to be sent back in multi-turn conversations, but + # to_openai_dict_format() strips reasoning, so we re-inject it into the formatted message dicts. + for i, chat_msg in enumerate(messages): + if chat_msg.reasoning and chat_msg.reasoning.extra.get("reasoning_details"): + openai_formatted_messages[i]["reasoning_details"] = chat_msg.reasoning.extra["reasoning_details"] + flattened_tools = flatten_tools_or_toolsets(tools or self.tools) tools_strict = tools_strict if tools_strict is not None else self.tools_strict _check_duplicate_tool_names(flattened_tools) @@ -227,3 +320,156 @@ def _prepare_api_call( if response_format: final_args["response_format"] = response_format return final_args + + @component.output_types(replies=list[ChatMessage]) + def run( + self, + messages: list[ChatMessage] | str, + streaming_callback: StreamingCallbackT | None = None, + generation_kwargs: dict[str, Any] | None = None, + *, + tools: ToolsType | None = None, + tools_strict: bool | None = None, + ) -> dict[str, list[ChatMessage]]: + """ + Invokes chat completion on the OpenRouter API. + + :param messages: + A list of ChatMessage instances representing the input messages. + If a string is provided, it is converted to a list containing a ChatMessage with user role. + :param streaming_callback: + A callback function that is called when a new token is received from the stream. + :param generation_kwargs: + Additional keyword arguments for text generation. These parameters will + override the parameters passed during component initialization. + For details on OpenRouter API parameters, see + [OpenRouter docs](https://openrouter.ai/docs/quickstart). + :param tools: A list of Tool and/or Toolset objects, or a single Toolset for which the model can prepare calls. + If set, it will override the `tools` parameter provided during initialization. + :param tools_strict: + Whether to enable strict schema adherence for tool calls. + + :returns: + A dictionary with the following key: + - `replies`: A list containing the generated responses as ChatMessage instances. + """ + messages = _normalize_messages(messages) + if not self._is_warmed_up: + self.warm_up() + + if len(messages) == 0: + return {"replies": []} + + streaming_callback = select_streaming_callback( + init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=False + ) + + # Reasoning content is reconstructed from the full response message, which is not available while + # streaming, so we warn the user that it will not be captured in this mode. + if streaming_callback is not None: + merged_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} + if merged_kwargs.get("reasoning"): + logger.warning( + "Streaming with reasoning is active. Reasoning content will not be captured during " + "streaming. Use non-streaming mode to extract reasoning content." + ) + + api_args = self._prepare_api_call( + messages=messages, + streaming_callback=streaming_callback, + generation_kwargs=generation_kwargs, + tools=tools, + tools_strict=tools_strict, + ) + openai_endpoint = api_args.pop("openai_endpoint") + chat_completion = getattr(self.client.chat.completions, openai_endpoint)(**api_args) + + if streaming_callback is not None: + # streaming uses the inherited handler so reasoning extraction is intentionally skipped + completions = self._handle_stream_response(chat_completion, streaming_callback) + else: + assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request." + completions = [ + _convert_openrouter_completion_to_chat_message(chat_completion, choice) + for choice in chat_completion.choices + ] + + for message in completions: + _check_finish_reason(message.meta) + + return {"replies": completions} + + @component.output_types(replies=list[ChatMessage]) + async def run_async( + self, + messages: list[ChatMessage] | str, + streaming_callback: StreamingCallbackT | None = None, + generation_kwargs: dict[str, Any] | None = None, + *, + tools: ToolsType | None = None, + tools_strict: bool | None = None, + ) -> dict[str, list[ChatMessage]]: + """ + Asynchronously invokes chat completion on the OpenRouter API. + + :param messages: + A list of ChatMessage instances representing the input messages. + If a string is provided, it is converted to a list containing a ChatMessage with user role. + :param streaming_callback: + A callback function that is called when a new token is received from the stream. + Must be a coroutine. + :param generation_kwargs: + Additional keyword arguments for text generation. + :param tools: A list of Tool and/or Toolset objects, or a single Toolset. + :param tools_strict: + Whether to enable strict schema adherence for tool calls. + + :returns: + A dictionary with the following key: + - `replies`: A list containing the generated responses as ChatMessage instances. + """ + messages = _normalize_messages(messages) + if not self._is_warmed_up: + self.warm_up() + + if len(messages) == 0: + return {"replies": []} + + streaming_callback = select_streaming_callback( + init_callback=self.streaming_callback, runtime_callback=streaming_callback, requires_async=True + ) + + # Reasoning content is reconstructed from the full response message, which is not available while + # streaming, so we warn the user that it will not be captured in this mode. + if streaming_callback is not None: + merged_kwargs = {**self.generation_kwargs, **(generation_kwargs or {})} + if merged_kwargs.get("reasoning"): + logger.warning( + "Streaming with reasoning is active. Reasoning content will not be captured during " + "streaming. Use non-streaming mode to extract reasoning content." + ) + + api_args = self._prepare_api_call( + messages=messages, + streaming_callback=streaming_callback, + generation_kwargs=generation_kwargs, + tools=tools, + tools_strict=tools_strict, + ) + openai_endpoint = api_args.pop("openai_endpoint") + chat_completion = await getattr(self.async_client.chat.completions, openai_endpoint)(**api_args) + + if streaming_callback is not None: + # streaming uses the inherited handler so reasoning extraction is intentionally skipped + completions = await self._handle_async_stream_response(chat_completion, streaming_callback) + else: + assert isinstance(chat_completion, ChatCompletion), "Unexpected response type for non-streaming request." + completions = [ + _convert_openrouter_completion_to_chat_message(chat_completion, choice) + for choice in chat_completion.choices + ] + + for message in completions: + _check_finish_reason(message.meta) + + return {"replies": completions} diff --git a/integrations/openrouter/tests/test_openrouter_chat_generator.py b/integrations/openrouter/tests/test_openrouter_chat_generator.py index cd593014f0..5e61518ebc 100644 --- a/integrations/openrouter/tests/test_openrouter_chat_generator.py +++ b/integrations/openrouter/tests/test_openrouter_chat_generator.py @@ -1,14 +1,16 @@ import json +import logging import os from datetime import datetime -from unittest.mock import patch +from types import SimpleNamespace +from unittest.mock import Mock, patch import pytest import pytz from haystack import Pipeline from haystack.components.generators.utils import print_streaming_chunk from haystack.components.tools import ToolInvoker -from haystack.dataclasses import ChatMessage, ChatRole, StreamingChunk, ToolCall +from haystack.dataclasses import ChatMessage, ChatRole, ReasoningContent, StreamingChunk, ToolCall from haystack.tools import Tool, Toolset from haystack.utils.auth import Secret from openai import OpenAIError @@ -19,7 +21,11 @@ from openai.types.completion_usage import CompletionTokensDetails, CompletionUsage, PromptTokensDetails from pydantic import BaseModel -from haystack_integrations.components.generators.openrouter.chat.chat_generator import OpenRouterChatGenerator +from haystack_integrations.components.generators.openrouter.chat.chat_generator import ( + OpenRouterChatGenerator, + _convert_openrouter_completion_to_chat_message, + _extract_reasoning, +) class CalendarEvent(BaseModel): @@ -422,6 +428,28 @@ def test_live_run_with_tools_and_response(self, tools): assert "paris" in final_message.text.lower() assert "berlin" in final_message.text.lower() + @pytest.mark.skipif( + not os.environ.get("OPENROUTER_API_KEY", None), + reason="Export an env var called OPENROUTER_API_KEY containing the OpenRouter API key to run this test.", + ) + @pytest.mark.integration + def test_live_run_with_reasoning(self): + chat_messages = [ChatMessage.from_user("If x + 3 = 7, what is x?")] + component = OpenRouterChatGenerator( + model="deepseek/deepseek-r1", + generation_kwargs={"reasoning": {"effort": "high"}}, + ) + results = component.run(chat_messages) + + assert len(results["replies"]) == 1 + message: ChatMessage = results["replies"][0] + assert message.reasoning is not None + assert message.reasoning.reasoning_text + assert message.reasoning.extra.get("reasoning_details") + assert message.text + assert "4" in message.text + assert message.meta["finish_reason"] == "stop" + @pytest.mark.skipif( not os.environ.get("OPENROUTER_API_KEY", None), reason="Export an env var called OPENROUTER_API_KEY containing the OpenAI API key to run this test.", @@ -1082,3 +1110,337 @@ def test_handle_stream_response(self): "cached_tokens": 0, }, } + + +class TestReasoningSupport: + def test_extract_reasoning_with_text_and_details(self): + msg = SimpleNamespace( + reasoning="Let me think step by step...", + reasoning_details=[{"type": "reasoning.text", "text": "Let me think step by step..."}], + ) + result = _extract_reasoning(msg) + assert result is not None + assert result.reasoning_text == "Let me think step by step..." + assert result.extra["reasoning_details"] == [{"type": "reasoning.text", "text": "Let me think step by step..."}] + + def test_extract_reasoning_returns_none_without_reasoning(self): + msg = SimpleNamespace() + result = _extract_reasoning(msg) + assert result is None + + def test_extract_reasoning_from_details_only(self): + msg = SimpleNamespace( + reasoning=None, + reasoning_details=[ + {"type": "reasoning.text", "text": "Step 1. "}, + {"type": "reasoning.summary", "summary": "Conclusion."}, + ], + ) + result = _extract_reasoning(msg) + assert result is not None + assert result.reasoning_text == "Step 1. Conclusion." + assert len(result.extra["reasoning_details"]) == 2 + + def test_extract_reasoning_handles_model_dump_objects(self): + detail = Mock() + detail.model_dump.return_value = {"type": "reasoning.text", "text": "Thinking..."} + msg = SimpleNamespace( + reasoning="Thinking...", + reasoning_details=[detail], + ) + result = _extract_reasoning(msg) + assert result is not None + assert result.extra["reasoning_details"] == [{"type": "reasoning.text", "text": "Thinking..."}] + + def test_extract_reasoning_vars_fallback(self): + detail = SimpleNamespace(type="reasoning.text", text="Fallback path") + msg = SimpleNamespace( + reasoning="Fallback path", + reasoning_details=[detail], + ) + result = _extract_reasoning(msg) + assert result is not None + assert result.extra["reasoning_details"] == [{"type": "reasoning.text", "text": "Fallback path"}] + + def test_extract_reasoning_unknown_detail_type(self): + msg = SimpleNamespace( + reasoning=None, + reasoning_details=[ + {"type": "reasoning.internal_monologue", "content": "hidden"}, + {"type": "reasoning.text", "text": "Visible."}, + ], + ) + result = _extract_reasoning(msg) + assert result is not None + assert result.reasoning_text == "Visible." + assert len(result.extra["reasoning_details"]) == 2 + + def test_convert_completion_with_reasoning_and_tool_calls(self): + completion = ChatCompletion( + id="test-reasoning-tools", + model="deepseek/deepseek-r1", + object="chat.completion", + choices=[ + Choice( + finish_reason="tool_calls", + logprobs=None, + index=0, + message=ChatCompletionMessage( + content=None, + role="assistant", + reasoning="I need to check the weather.", + reasoning_details=[{"type": "reasoning.text", "text": "I need to check the weather."}], + tool_calls=[ + { + "id": "call_abc123", + "type": "function", + "function": {"name": "weather", "arguments": '{"city": "Paris"}'}, + } + ], + ), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 30, "completion_tokens": 20, "total_tokens": 50}, + ) + + result = _convert_openrouter_completion_to_chat_message(completion, completion.choices[0]) + assert result.text is None + assert result.reasoning is not None + assert result.reasoning.reasoning_text == "I need to check the weather." + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].tool_name == "weather" + assert result.tool_calls[0].arguments == {"city": "Paris"} + assert result.meta["finish_reason"] == "tool_calls" + + def test_convert_completion_with_reasoning(self): + completion = ChatCompletion( + id="test-reasoning", + model="deepseek/deepseek-r1", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage( + content="The answer is 42.", + role="assistant", + reasoning="Let me think about this...", + reasoning_details=[{"type": "reasoning.text", "text": "Let me think about this..."}], + ), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + ) + + result = _convert_openrouter_completion_to_chat_message(completion, completion.choices[0]) + assert result.text == "The answer is 42." + assert result.reasoning is not None + assert result.reasoning.reasoning_text == "Let me think about this..." + assert result.reasoning.extra["reasoning_details"] == [ + {"type": "reasoning.text", "text": "Let me think about this..."} + ] + assert result.meta["model"] == "deepseek/deepseek-r1" + assert result.meta["finish_reason"] == "stop" + + def test_convert_completion_without_reasoning(self): + completion = ChatCompletion( + id="test-no-reasoning", + model="openai/gpt-5-mini", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage(content="Hello!", role="assistant"), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + ) + + result = _convert_openrouter_completion_to_chat_message(completion, completion.choices[0]) + assert result.text == "Hello!" + assert result.reasoning is None + + def test_run_with_reasoning(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + + with patch("openai.resources.chat.completions.Completions.create") as mock_create: + completion = ChatCompletion( + id="test-run-reasoning", + model="deepseek/deepseek-r1", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage( + content="Paris is the capital of France.", + role="assistant", + reasoning="The user asked about capitals. France's capital is Paris.", + reasoning_details=[ + { + "type": "reasoning.text", + "text": "The user asked about capitals. France's capital is Paris.", + } + ], + ), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + ) + mock_create.return_value = completion + + component = OpenRouterChatGenerator( + model="deepseek/deepseek-r1", + generation_kwargs={"reasoning": {"effort": "high"}}, + ) + response = component.run([ChatMessage.from_user("What's the capital of France?")]) + + assert len(response["replies"]) == 1 + reply = response["replies"][0] + assert reply.text == "Paris is the capital of France." + assert reply.reasoning is not None + assert "capitals" in reply.reasoning.reasoning_text + assert reply.reasoning.extra["reasoning_details"][0]["type"] == "reasoning.text" + + def test_run_with_string_input(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + + with patch("openai.resources.chat.completions.Completions.create") as mock_create: + completion = ChatCompletion( + id="test-string-input", + model="openai/gpt-5-mini", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage(content="Paris.", role="assistant"), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + ) + mock_create.return_value = completion + + component = OpenRouterChatGenerator() + response = component.run("What's the capital of France?") + + # the backend should receive exactly one user message + _, kwargs = mock_create.call_args + assert kwargs["messages"] == [{"role": "user", "content": "What's the capital of France?"}] + + assert len(response["replies"]) == 1 + assert response["replies"][0].text == "Paris." + + def test_streaming_with_reasoning_logs_warning(self, monkeypatch, caplog): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + component = OpenRouterChatGenerator( + generation_kwargs={"reasoning": {"effort": "high"}}, + streaming_callback=print_streaming_chunk, + ) + + # _prepare_api_call is patched to fail fast: we only care that the warning is emitted beforehand. + with ( + caplog.at_level(logging.WARNING), + patch.object(component, "_prepare_api_call", side_effect=RuntimeError), + pytest.raises(RuntimeError), + ): + component.run([ChatMessage.from_user("test")]) + + assert "Reasoning content will not be captured during streaming" in caplog.text + + def test_prepare_api_call_preserves_reasoning(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + component = OpenRouterChatGenerator() + + reasoning = ReasoningContent( + reasoning_text="Step by step analysis...", + extra={"reasoning_details": [{"type": "reasoning.text", "text": "Step by step analysis..."}]}, + ) + messages = [ + ChatMessage.from_user("Explain quantum computing."), + ChatMessage.from_assistant(text="Quantum computing uses qubits.", reasoning=reasoning), + ChatMessage.from_user("Tell me more."), + ] + + api_args = component._prepare_api_call(messages=messages) + formatted = api_args["messages"] + + assert "reasoning_details" in formatted[1] + assert formatted[1]["reasoning_details"] == [{"type": "reasoning.text", "text": "Step by step analysis..."}] + assert "reasoning_details" not in formatted[0] + assert "reasoning_details" not in formatted[2] + + def test_run_empty_messages(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + component = OpenRouterChatGenerator() + response = component.run([]) + assert response == {"replies": []} + + def test_convert_completion_with_logprobs(self): + completion = ChatCompletion( + id="test-logprobs", + model="openai/gpt-5-mini", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs={ + "content": [ + {"token": "Hello", "logprob": -0.5, "top_logprobs": [], "bytes": [72, 101, 108, 108, 111]} + ] + }, + index=0, + message=ChatCompletionMessage(content="Hello!", role="assistant"), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 5, "completion_tokens": 5, "total_tokens": 10}, + ) + result = _convert_openrouter_completion_to_chat_message(completion, completion.choices[0]) + assert result.text == "Hello!" + assert "logprobs" in result.meta + + def test_convert_completion_malformed_tool_call_json(self): + completion = ChatCompletion( + id="test-bad-json", + model="openai/gpt-5-mini", + object="chat.completion", + choices=[ + Choice( + finish_reason="tool_calls", + logprobs=None, + index=0, + message=ChatCompletionMessage( + content=None, + role="assistant", + tool_calls=[ + { + "id": "call_bad", + "type": "function", + "function": {"name": "weather", "arguments": "{invalid json}"}, + }, + { + "id": "call_good", + "type": "function", + "function": {"name": "weather", "arguments": '{"city": "Paris"}'}, + }, + ], + ), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 10, "completion_tokens": 15, "total_tokens": 25}, + ) + result = _convert_openrouter_completion_to_chat_message(completion, completion.choices[0]) + assert len(result.tool_calls) == 1 + assert result.tool_calls[0].id == "call_good" diff --git a/integrations/openrouter/tests/test_openrouter_chat_generator_async.py b/integrations/openrouter/tests/test_openrouter_chat_generator_async.py index 323403f9ef..602977ed4e 100644 --- a/integrations/openrouter/tests/test_openrouter_chat_generator_async.py +++ b/integrations/openrouter/tests/test_openrouter_chat_generator_async.py @@ -325,3 +325,93 @@ def echo_function(text: str) -> str: tool_call = message.tool_calls[0] assert tool_call.tool_name == "echo" assert tool_call.arguments == {"text": "Hello World"} + + @pytest.mark.asyncio + async def test_run_async_with_reasoning(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + + with patch( + "openai.resources.chat.completions.AsyncCompletions.create", + new_callable=AsyncMock, + ) as mock_create: + completion = ChatCompletion( + id="test-async-reasoning", + model="deepseek/deepseek-r1", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage( + content="Paris is the capital of France.", + role="assistant", + reasoning="The user asked about capitals. France's capital is Paris.", + reasoning_details=[ + { + "type": "reasoning.text", + "text": "The user asked about capitals. France's capital is Paris.", + } + ], + ), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 57, "completion_tokens": 40, "total_tokens": 97}, + ) + mock_create.return_value = completion + + component = OpenRouterChatGenerator( + model="deepseek/deepseek-r1", + generation_kwargs={"reasoning": {"effort": "high"}}, + ) + response = await component.run_async([ChatMessage.from_user("What's the capital of France?")]) + + assert len(response["replies"]) == 1 + reply = response["replies"][0] + assert reply.text == "Paris is the capital of France." + assert reply.reasoning is not None + assert "capitals" in reply.reasoning.reasoning_text + assert reply.reasoning.extra["reasoning_details"][0]["type"] == "reasoning.text" + + @pytest.mark.asyncio + async def test_run_async_empty_messages(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + component = OpenRouterChatGenerator() + response = await component.run_async([]) + assert response == {"replies": []} + + @pytest.mark.asyncio + async def test_run_async_with_string_input(self, monkeypatch): + monkeypatch.setenv("OPENROUTER_API_KEY", "fake-api-key") + + with patch( + "openai.resources.chat.completions.AsyncCompletions.create", + new_callable=AsyncMock, + ) as mock_create: + completion = ChatCompletion( + id="test-async-string-input", + model="openai/gpt-5-mini", + object="chat.completion", + choices=[ + Choice( + finish_reason="stop", + logprobs=None, + index=0, + message=ChatCompletionMessage(content="Paris.", role="assistant"), + ) + ], + created=int(datetime.now(tz=pytz.timezone("UTC")).timestamp()), + usage={"prompt_tokens": 10, "completion_tokens": 5, "total_tokens": 15}, + ) + mock_create.return_value = completion + + component = OpenRouterChatGenerator() + response = await component.run_async("What's the capital of France?") + + # the backend should receive exactly one user message + _, kwargs = mock_create.call_args + assert kwargs["messages"] == [{"role": "user", "content": "What's the capital of France?"}] + + assert len(response["replies"]) == 1 + assert response["replies"][0].text == "Paris."