deepset-ai
diff --git a/‎haystack/components/generators/chat/openai.py‎
Lines changed: 54 additions & 49 deletions b/‎haystack/components/generators/chat/openai.py‎
Lines changed: 54 additions & 49 deletions
diff --git a/‎haystack/components/generators/openai.py‎
Lines changed: 1 addition & 1 deletion b/‎haystack/components/generators/openai.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎haystack/components/generators/utils.py‎
Lines changed: 37 additions & 28 deletions b/‎haystack/components/generators/utils.py‎
Lines changed: 37 additions & 28 deletions
diff --git a/‎haystack/dataclasses/streaming_chunk.py‎
Lines changed: 9 additions & 6 deletions b/‎haystack/dataclasses/streaming_chunk.py‎
Lines changed: 9 additions & 6 deletions
diff --git a/‎releasenotes/notes/update-streaming-chunk-more-info-9008e05b21eef349.yaml‎
Lines changed: 2 additions & 2 deletions b/‎releasenotes/notes/update-streaming-chunk-more-info-9008e05b21eef349.yaml‎
Lines changed: 2 additions & 2 deletions
@@ -427,12 +427,11 @@ def _handle_stream_response(self, chat_completion: Stream, callback: SyncStreami
         chunks: List[StreamingChunk] = []
         for chunk in chat_completion:  # pylint: disable=not-an-iterable
             assert len(chunk.choices) <= 1, "Streaming responses should have at most one choice."
-            chunk_deltas = _convert_chat_completion_chunk_to_streaming_chunk(
+            chunk_delta = _convert_chat_completion_chunk_to_streaming_chunk(
                 chunk=chunk, previous_chunks=chunks, component_info=component_info
             )
-            for chunk_delta in chunk_deltas:
-                chunks.append(chunk_delta)
-                callback(chunk_delta)
+            chunks.append(chunk_delta)
+            callback(chunk_delta)
         return [_convert_streaming_chunks_to_chat_message(chunks=chunks)]
 
     async def _handle_async_stream_response(
@@ -442,12 +441,11 @@ async def _handle_async_stream_response(
         chunks: List[StreamingChunk] = []
         async for chunk in chat_completion:  # pylint: disable=not-an-iterable
             assert len(chunk.choices) <= 1, "Streaming responses should have at most one choice."
-            chunk_deltas = _convert_chat_completion_chunk_to_streaming_chunk(
+            chunk_delta = _convert_chat_completion_chunk_to_streaming_chunk(
                 chunk=chunk, previous_chunks=chunks, component_info=component_info
             )
-            for chunk_delta in chunk_deltas:
-                chunks.append(chunk_delta)
-                await callback(chunk_delta)
+            chunks.append(chunk_delta)
+            await callback(chunk_delta)
         return [_convert_streaming_chunks_to_chat_message(chunks=chunks)]
 
 
@@ -509,7 +507,7 @@ def _convert_chat_completion_to_chat_message(completion: ChatCompletion, choice:
 
 def _convert_chat_completion_chunk_to_streaming_chunk(
     chunk: ChatCompletionChunk, previous_chunks: List[StreamingChunk], component_info: Optional[ComponentInfo] = None
-) -> List[StreamingChunk]:
+) -> StreamingChunk:
     """
     Converts the streaming response chunk from the OpenAI API to a StreamingChunk.
 
@@ -521,61 +519,68 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
     :returns:
         A list of StreamingChunk objects representing the content of the chunk from the OpenAI API.
     """
-    # Choices is empty on the very first chunk which provides role information (e.g. "assistant").
-    # It is also empty if include_usage is set to True where the usage information is returned.
+    # On very first chunk so len(previous_chunks) == 0, the Choices field only provides role info (e.g. "assistant")
+    # Choices is empty if include_usage is set to True where the usage information is returned.
     if len(chunk.choices) == 0:
-        return [
-            StreamingChunk(
-                content="",
-                component_info=component_info,
-                # Index is None since it's only set to an int when a content block is present
-                index=None,
-                meta={
-                    "model": chunk.model,
-                    "received_at": datetime.now().isoformat(),
-                    "usage": _serialize_usage(chunk.usage),
-                },
-            )
-        ]
+        return StreamingChunk(
+            content="",
+            component_info=component_info,
+            # Index is None since it's only set to an int when a content block is present
+            index=None,
+            meta={
+                "model": chunk.model,
+                "received_at": datetime.now().isoformat(),
+                "usage": _serialize_usage(chunk.usage),
+            },
+        )
 
     choice: ChunkChoice = chunk.choices[0]
-    content = choice.delta.content or ""
 
     # create a list of ToolCallDelta objects from the tool calls
     if choice.delta.tool_calls:
-        chunk_messages = []
+        tool_calls_deltas = []
         for tool_call in choice.delta.tool_calls:
             function = tool_call.function
-            chunk_message = StreamingChunk(
-                content=content,
-                # We adopt the tool_call.index as the index of the chunk
-                component_info=component_info,
-                index=tool_call.index,
-                tool_call=ToolCallDelta(
+            tool_calls_deltas.append(
+                ToolCallDelta(
+                    index=tool_call.index,
                     id=tool_call.id,
                     tool_name=function.name if function else None,
                     arguments=function.arguments if function and function.arguments else None,
-                ),
-                start=function.name is not None if function else False,
-                meta={
-                    "model": chunk.model,
-                    "index": choice.index,
-                    "tool_calls": choice.delta.tool_calls,
-                    "finish_reason": choice.finish_reason,
-                    "received_at": datetime.now().isoformat(),
-                    "usage": _serialize_usage(chunk.usage),
-                },
+                )
             )
-            chunk_messages.append(chunk_message)
-        return chunk_messages
+        chunk_message = StreamingChunk(
+            content=choice.delta.content or "",
+            component_info=component_info,
+            # We adopt the first tool_calls_deltas.index as the overall index of the chunk.
+            index=tool_calls_deltas[0].index,
+            tool_calls=tool_calls_deltas,
+            start=tool_calls_deltas[0].tool_name is not None,
+            meta={
+                "model": chunk.model,
+                "index": choice.index,
+                "tool_calls": choice.delta.tool_calls,
+                "finish_reason": choice.finish_reason,
+                "received_at": datetime.now().isoformat(),
+                "usage": _serialize_usage(chunk.usage),
+            },
+        )
+        return chunk_message
 
-    chunk_message = StreamingChunk(
-        content=content,
-        component_info=component_info,
+    # On very first chunk the choice field only provides role info (e.g. "assistant") so we set index to None
+    # We set all chunks missing the content field to index of None. E.g. can happen if chunk only contains finish
+    # reason.
+    if choice.delta.content is None or choice.delta.role is not None:
+        resolved_index = None
+    else:
         # We set the index to be 0 since if text content is being streamed then no tool calls are being streamed
         # NOTE: We may need to revisit this if OpenAI allows planning/thinking content before tool calls like
         #       Anthropic Claude
-        index=0,
+        resolved_index = 0
+    chunk_message = StreamingChunk(
+        content=choice.delta.content or "",
+        component_info=component_info,
+        index=resolved_index,
         # The first chunk is always a start message chunk that only contains role information, so if we reach here
         # and previous_chunks is length 1 then this is the start of text content.
         start=len(previous_chunks) == 1,
@@ -588,7 +593,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk(
             "usage": _serialize_usage(chunk.usage),
         },
     )
-    return [chunk_message]
+    return chunk_message
 
 
 def _serialize_usage(usage):
 
@@ -249,7 +249,7 @@ def run(
                     chunk=chunk,  # type: ignore
                     previous_chunks=chunks,
                     component_info=component_info,
-                )[0]
+                )
                 chunks.append(chunk_delta)
                 streaming_callback(chunk_delta)
 
 
@@ -31,17 +31,24 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None:
         print("\n\n", flush=True, end="")
 
     ## Tool Call streaming
-    if chunk.tool_call:
-        # If chunk.start is True indicates beginning of a tool call
-        # Also presence of chunk.tool_call.name indicates the start of a tool call too
-        if chunk.start:
-            print("[TOOL CALL]\n", flush=True, end="")
-            print(f"Tool: {chunk.tool_call.tool_name} ", flush=True, end="")
-            print("\nArguments: ", flush=True, end="")
-
-        # print the tool arguments
-        if chunk.tool_call.arguments:
-            print(chunk.tool_call.arguments, flush=True, end="")
+    if chunk.tool_calls:
+        # Typically, if there are multiple tool calls in the chunk this means that the tool calls are fully formed and
+        # not just a delta.
+        for tool_call in chunk.tool_calls:
+            # If chunk.start is True indicates beginning of a tool call
+            # Also presence of tool_call.tool_name indicates the start of a tool call too
+            if chunk.start:
+                # If there is more than one tool call in the chunk, we print two new lines to separate them
+                # We know there is more than one tool call if the index of the tool call is greater than the index of
+                # the chunk.
+                if chunk.index and tool_call.index > chunk.index:
+                    print("\n\n", flush=True, end="")
+
+                print("[TOOL CALL]\nTool: {tool_call.tool_name} \nArguments: ", flush=True, end="")
+
+            # print the tool arguments
+            if tool_call.arguments:
+                print(tool_call.arguments, flush=True, end="")
 
     ## Tool Call Result streaming
     # Print tool call results if available (from ToolInvoker)
@@ -76,39 +83,41 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C
     # Process tool calls if present in any chunk
     tool_call_data: Dict[int, Dict[str, str]] = {}  # Track tool calls by index
     for chunk in chunks:
-        if chunk.tool_call:
+        if chunk.tool_calls:
             # We do this to make sure mypy is happy, but we enforce index is not None in the StreamingChunk dataclass if
             # tool_call is present
             assert chunk.index is not None
 
-            # We use the index of the chunk to track the tool call across chunks since the ID is not always provided
-            if chunk.index not in tool_call_data:
-                tool_call_data[chunk.index] = {"id": "", "name": "", "arguments": ""}
+            for tool_call in chunk.tool_calls:
+                # We use the index of the tool_call to track the tool call across chunks since the ID is not always
+                # provided
+                if tool_call.index not in tool_call_data:
+                    tool_call_data[chunk.index] = {"id": "", "name": "", "arguments": ""}
 
-            # Save the ID if present
-            if chunk.tool_call.id is not None:
-                tool_call_data[chunk.index]["id"] = chunk.tool_call.id
+                # Save the ID if present
+                if tool_call.id is not None:
+                    tool_call_data[chunk.index]["id"] = tool_call.id
 
-            if chunk.tool_call.tool_name is not None:
-                tool_call_data[chunk.index]["name"] += chunk.tool_call.tool_name
-            if chunk.tool_call.arguments is not None:
-                tool_call_data[chunk.index]["arguments"] += chunk.tool_call.arguments
+                if tool_call.tool_name is not None:
+                    tool_call_data[chunk.index]["name"] += tool_call.tool_name
+                if tool_call.arguments is not None:
+                    tool_call_data[chunk.index]["arguments"] += tool_call.arguments
 
     # Convert accumulated tool call data into ToolCall objects
     sorted_keys = sorted(tool_call_data.keys())
     for key in sorted_keys:
-        tool_call = tool_call_data[key]
+        tool_call_dict = tool_call_data[key]
         try:
-            arguments = json.loads(tool_call["arguments"])
-            tool_calls.append(ToolCall(id=tool_call["id"], tool_name=tool_call["name"], arguments=arguments))
+            arguments = json.loads(tool_call_dict["arguments"])
+            tool_calls.append(ToolCall(id=tool_call_dict["id"], tool_name=tool_call_dict["name"], arguments=arguments))
         except json.JSONDecodeError:
             logger.warning(
                 "OpenAI returned a malformed JSON string for tool call arguments. This tool call "
                 "will be skipped. To always generate a valid JSON, set `tools_strict` to `True`. "
                 "Tool call ID: {_id}, Tool name: {_name}, Arguments: {_arguments}",
-                _id=tool_call["id"],
-                _name=tool_call["name"],
-                _arguments=tool_call["arguments"],
+                _id=tool_call_dict["id"],
+                _name=tool_call_dict["name"],
+                _arguments=tool_call_dict["arguments"],
             )
 
     # finish_reason can appear in different places so we look for the last one
 
@@ -3,7 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 from dataclasses import dataclass, field
-from typing import Any, Awaitable, Callable, Dict, Literal, Optional, Union, overload
+from typing import Any, Awaitable, Callable, Dict, List, Literal, Optional, Union, overload
 
 from haystack.core.component import Component
 from haystack.dataclasses.chat_message import ToolCallResult
@@ -15,11 +15,13 @@ class ToolCallDelta:
     """
     Represents a Tool call prepared by the model, usually contained in an assistant message.
 
+    :param index: The index of the Tool call in the list of Tool calls.
     :param tool_name: The name of the Tool to call.
     :param arguments: Either the full arguments in JSON format or a delta of the arguments.
     :param id: The ID of the Tool call.
     """
 
+    index: int
     tool_name: Optional[str] = field(default=None)
     arguments: Optional[str] = field(default=None)
     id: Optional[str] = field(default=None)  # noqa: A003
@@ -71,7 +73,8 @@ class StreamingChunk:
     :param component_info: A `ComponentInfo` object containing information about the component that generated the chunk,
         such as the component name and type.
     :param index: An optional integer index representing which content block this chunk belongs to.
-    :param tool_call: An optional ToolCallDelta object representing a tool call associated with the message chunk.
+    :param tool_calls: An optional list of ToolCallDelta object representing a tool call associated with the message
+        chunk.
     :param tool_call_result: An optional ToolCallResult object representing the result of a tool call.
     :param start: A boolean indicating whether this chunk marks the start of a content block.
     """
@@ -80,21 +83,21 @@ class StreamingChunk:
     meta: Dict[str, Any] = field(default_factory=dict, hash=False)
     component_info: Optional[ComponentInfo] = field(default=None)
     index: Optional[int] = field(default=None)
-    tool_call: Optional[ToolCallDelta] = field(default=None)
+    tool_calls: Optional[List[ToolCallDelta]] = field(default=None)
     tool_call_result: Optional[ToolCallResult] = field(default=None)
     start: bool = field(default=False)
 
     def __post_init__(self):
-        fields_set = sum(bool(x) for x in (self.content, self.tool_call, self.tool_call_result))
+        fields_set = sum(bool(x) for x in (self.content, self.tool_calls, self.tool_call_result))
         if fields_set > 1:
             raise ValueError(
                 "Only one of `content`, `tool_call`, or `tool_call_result` may be set in a StreamingChunk. "
-                f"Got content: '{self.content}', tool_call: '{self.tool_call}', "
+                f"Got content: '{self.content}', tool_call: '{self.tool_calls}', "
                 f"tool_call_result: '{self.tool_call_result}'"
             )
 
         # NOTE: We don't enforce this for self.content otherwise it would be a breaking change
-        if (self.tool_call or self.tool_call_result) and self.index is None:
+        if (self.tool_calls or self.tool_call_result) and self.index is None:
             raise ValueError("If `tool_call`, or `tool_call_result` is set, `index` must also be set.")
 
 
 
@@ -1,8 +1,8 @@
 ---
 features:
   - |
-    Updated StreamingChunk to add the fields `tool_call`, `tool_call_result`, `index`, and `start` to make it easier to format the stream in a streaming callback.
-    - Added new dataclass ToolCallDelta for the `StreamingChunk.tool_call` field to reflect that the arguments can be a string delta.
+    Updated StreamingChunk to add the fields `tool_calls`, `tool_call_result`, `index`, and `start` to make it easier to format the stream in a streaming callback.
+    - Added new dataclass ToolCallDelta for the `StreamingChunk.tool_calls` field to reflect that the arguments can be a string delta.
     - Updated `print_streaming_chunk` and `_convert_streaming_chunks_to_chat_message` utility methods to use these new fields. This especially improves the formatting when using `print_streaming_chunk` with Agent.
     - Updated `OpenAIGenerator`, `OpenAIChatGenerator`, `HuggingFaceAPIGenerator`, `HuggingFaceAPIChatGenerator`, `HuggingFaceLocalGenerator` and `HuggingFaceLocalChatGenerator` to follow the new dataclasses.
     - Updated `ToolInvoker` to follow the StreamingChunk dataclass.