deepset-ai
diff --git a/‎.github/workflows/ollama.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ollama.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integrations/ollama/pyproject.toml‎
Lines changed: 1 addition & 1 deletion b/‎integrations/ollama/pyproject.toml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py‎
Lines changed: 67 additions & 88 deletions b/‎integrations/ollama/src/haystack_integrations/components/generators/ollama/chat/chat_generator.py‎
Lines changed: 67 additions & 88 deletions
@@ -24,7 +24,7 @@ env:
   FORCE_COLOR: "1"
   LLM_FOR_TESTS: "qwen3:0.6b"
   VISION_LLM_FOR_TESTS: "moondream:1.8b"
-  EMBEDDER_FOR_TESTS: "nomic-embed-text"
+  EMBEDDER_FOR_TESTS: "all-minilm"
 
 jobs:
   run:
 
@@ -27,7 +27,7 @@ classifiers = [
     "Programming Language :: Python :: Implementation :: CPython",
     "Programming Language :: Python :: Implementation :: PyPy",
 ]
-dependencies = ["haystack-ai>=2.16.1", "ollama>=0.4.0", "pydantic"]
+dependencies = ["haystack-ai>=2.17.1", "ollama>=0.4.0", "pydantic"]
 
 [project.urls]
 Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"
 
@@ -58,6 +58,8 @@ def _convert_chatmessage_to_ollama_format(message: ChatMessage) -> Dict[str, Any
             {"type": "function", "function": {"name": tool_call.tool_name, "arguments": tool_call.arguments}}
             for tool_call in tool_calls
         ]
+    if message.reasoning:
+        ollama_msg["thinking"] = message.reasoning.reasoning_text
     return ollama_msg
 
 
@@ -131,81 +133,84 @@ def _convert_ollama_response_to_chatmessage(ollama_response: ChatResponse) -> Ch
                 )
             )
 
-    chat_msg = ChatMessage.from_assistant(text=text, tool_calls=tool_calls)
+    reasoning = ollama_message.get("thinking", None)
+
+    chat_msg = ChatMessage.from_assistant(text=text or None, tool_calls=tool_calls, reasoning=reasoning)
 
     chat_msg._meta = _convert_ollama_meta_to_openai_format(response_dict)
 
-    thinking = ollama_message.get("thinking", None)
+    return chat_msg
 
-    if thinking is not None:
-        chat_msg._meta["thinking"] = thinking
 
-    return chat_msg
+def _build_chunk(
+    chunk_response: ChatResponse, component_info: ComponentInfo, index: int, tool_call_index: int
+) -> StreamingChunk:
+    """
+    Convert one Ollama stream-chunk to Haystack StreamingChunk.
+    """
+    chunk_response_dict = chunk_response.model_dump()
+    finish_reason = FINISH_REASON_MAPPING.get(chunk_response.done_reason or "")
+    tool_calls_list = []
+
+    content = chunk_response_dict["message"]["content"]
+
+    meta = {key: value for key, value in chunk_response_dict.items() if key != "message"}
+    meta["role"] = chunk_response_dict["message"]["role"]
+
+    # until a specific field in StreamingChunk is available, we store the thinking in the meta
+    meta["reasoning"] = chunk_response_dict["message"].get("thinking", None)
+
+    if tool_calls := chunk_response_dict["message"].get("tool_calls"):
+        for tool_call in tool_calls:
+            tool_calls_list.append(
+                ToolCallDelta(
+                    index=tool_call_index,
+                    tool_name=tool_call["function"]["name"],
+                    arguments=json.dumps(tool_call["function"]["arguments"])
+                    if tool_call["function"]["arguments"]
+                    else "",
+                )
+            )
+
+    return StreamingChunk(
+        content=content,
+        meta=meta,
+        index=index,
+        finish_reason=finish_reason,
+        component_info=component_info,
+        tool_calls=tool_calls_list,
+    )
 
 
 @component
 class OllamaChatGenerator:
     """
-    Haystack generator for models served by Ollama (https://ollama.ai).
+    Haystack Chat Generator for models served with Ollama (https://ollama.ai).
 
-    * Fully supports streaming.
-    * Correctly passes tool-calls to Haystack when `stream=True`.
+    Supports streaming, tool calls, reasoning, and structured outputs.
 
     Usage example:
     ```python
-    from haystack.components.generators.utils import print_streaming_chunk
-    from haystack.components.agents import Agent
     from haystack_integrations.components.generators.ollama.chat import OllamaChatGenerator
     from haystack.dataclasses import ChatMessage
-    from haystack.tools import Tool
-
-    def echo(query: str) -> str:
-        print(f"Tool executed with QUERY: {query}")
-        return query
 
-    echo_tool = Tool(
-        name="echo_tool",
-        description="Echoes the query (demo tool).",
-        function=echo,
-        parameters={"query": {"type": "string", "description": "Search query"}},
-    )
-    agent = Agent(
-        chat_generator=OllamaChatGenerator(model="mistral-small3.1:24b"),
-        tools=[echo_tool],
-        system_prompt=(
-            "Use tool to print the query to test tools. Do not answer the question, just send the query to the tool"
-        ),
-        max_agent_steps=5,
-        raise_on_tool_invocation_failure=True,
-        streaming_callback=print_streaming_chunk,
-    )
-    messages = [ChatMessage.from_user("This is stream test of tool usage")]
-    result = agent.run(messages=messages)
-    for message in result["messages"]:
-        print("\n======")
-        if message.role == "system":
-            continue
-        elif message.role == "tool":
-            print(f"{message.role}:")
-            print(f"Tool Results: {[tool.result for tool in message.tool_call_results]}")
-            print(f"Used Tools: {[tool.origin.tool_name for tool in message.tool_call_results]}\n")
-        else:
-            print(f"{message.role}: {message.text}")
-            print(f"Used Tools: {[tool.tool_name for tool in message.tool_calls]}\n")
+    llm = OllamaChatGenerator(model="qwen3:0.6b")
+    result = llm.run(messages=[ChatMessage.from_user("What is the capital of France?")])
+    print(result)
     ```
     """
 
     def __init__(
         self,
-        model: str = "orca-mini",
+        model: str = "qwen3:0.6b",
         url: str = "http://localhost:11434",
         generation_kwargs: Optional[Dict[str, Any]] = None,
         timeout: int = 120,
         keep_alive: Optional[Union[float, str]] = None,
         streaming_callback: Optional[Callable[[StreamingChunk], None]] = None,
         tools: Optional[Union[List[Tool], Toolset]] = None,
         response_format: Optional[Union[None, Literal["json"], JsonSchemaValue]] = None,
-        think: bool = False,
+        think: Union[bool, Literal["low", "medium", "high"]] = False,
     ):
         """
         :param model:
@@ -219,9 +224,11 @@ def __init__(
         :param timeout:
             The number of seconds before throwing a timeout error from the Ollama API.
         :param think
-            If True, the modell will "think" before producing a response.
+            If True, the model will "think" before producing a response.
             Only [thinking models](https://ollama.com/search?c=thinking) support this feature.
-            The intermediate "thinking" output can be found in the `meta` property of the returned `ChatMessage`.
+            Some models like gpt-oss support different levels of thinking: "low", "medium", "high".
+            The intermediate "thinking" output can be found by inspecting the `reasoning` property of the returned
+            `ChatMessage`.
         :param keep_alive:
             The option that controls how long the model will stay loaded into memory following the request.
             If not set, it will use the default value from the Ollama (5 minutes).
@@ -295,42 +302,6 @@ def from_dict(cls, data: Dict[str, Any]) -> "OllamaChatGenerator":
             data["init_parameters"]["streaming_callback"] = deserialize_callable(callback_ser)
         return default_from_dict(cls, data)
 
-    @staticmethod
-    def _build_chunk(
-        chunk_response: ChatResponse, component_info: ComponentInfo, index: int, tool_call_index: int
-    ) -> StreamingChunk:
-        """
-        Convert one Ollama stream-chunk to Haystack StreamingChunk.
-        """
-        chunk_response_dict = chunk_response.model_dump()
-        finish_reason = FINISH_REASON_MAPPING.get(chunk_response.done_reason or "")
-        tool_calls_list = []
-
-        content = chunk_response_dict["message"]["content"]
-
-        meta = {key: value for key, value in chunk_response_dict.items() if key != "message"}
-        meta["role"] = chunk_response_dict["message"]["role"]
-        if tool_calls := chunk_response_dict["message"].get("tool_calls"):
-            for tool_call in tool_calls:
-                tool_calls_list.append(
-                    ToolCallDelta(
-                        index=tool_call_index,
-                        tool_name=tool_call["function"]["name"],
-                        arguments=json.dumps(tool_call["function"]["arguments"])
-                        if tool_call["function"]["arguments"]
-                        else "",
-                    )
-                )
-
-        return StreamingChunk(
-            content=content,
-            meta=meta,
-            index=index,
-            finish_reason=finish_reason,
-            component_info=component_info,
-            tool_calls=tool_calls_list,
-        )
-
     def _handle_streaming_response(
         self,
         response_iter: Iterator[ChatResponse],
@@ -355,13 +326,15 @@ def _handle_streaming_response(
         for index, raw in enumerate(response_iter):
             if raw.message.tool_calls:
                 tool_call_index += 1
-            chunk = self._build_chunk(
+            chunk = _build_chunk(
                 chunk_response=raw, component_info=component_info, index=index, tool_call_index=tool_call_index
             )
             chunks.append(chunk)
 
+            start = index == 0 or bool(chunk.tool_calls)
+            chunk.start = start
+
             if chunk.tool_calls:
-                chunk.start = True
                 for tool_call in chunk.tool_calls:
                     # the Ollama server doesn't guarantee an id field in every tool_calls entry.
                     # OpenAI-compatible endpoint (/v1/chat/completions) - recent releases do add an auto-generated id
@@ -389,8 +362,13 @@ def _handle_streaming_response(
 
             if callback:
                 callback(chunk)
+
         # Compose final reply
-        text = "".join(c.content for c in chunks)
+        text = ""
+        reasoning = ""
+        for c in chunks:
+            text += c.content
+            reasoning += c.meta.get("reasoning", None) or ""
 
         tool_calls = []
         for tool_call_id in id_order:
@@ -400,8 +378,9 @@ def _handle_streaming_response(
         # We can't use _convert_streaming_chunks_to_chat_message because
         # we need to map tool_call name and args by order.
         reply = ChatMessage.from_assistant(
-            text=text,
+            text=text or None,
             tool_calls=tool_calls or None,
+            reasoning=reasoning or None,
             meta=_convert_ollama_meta_to_openai_format(chunks[-1].meta) if chunks else {},
         )
Original file line number	Diff line number	Diff line change
`@@ -27,7 +27,7 @@ classifiers = [`
`27`	`27`	`"Programming Language :: Python :: Implementation :: CPython",`
`28`	`28`	`"Programming Language :: Python :: Implementation :: PyPy",`
`29`	`29`	`]`
`30`		`-dependencies = ["haystack-ai>=2.16.1", "ollama>=0.4.0", "pydantic"]`
	`30`	`+dependencies = ["haystack-ai>=2.17.1", "ollama>=0.4.0", "pydantic"]`
`31`	`31`
`32`	`32`	`[project.urls]`
`33`	`33`	`Documentation = "https://github.com/deepset-ai/haystack-core-integrations/tree/main/integrations/ollama#readme"`