feat: Update WatsonXChatGenerator to use the StreamingChunk fields

maxdswain · maxdswain · commit ddb7b5ca9772 · 2026-03-01T18:14:36.000Z
diff --git a/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/chat/chat_generator.py b/integrations/watsonx/src/haystack_integrations/components/generators/watsonx/chat/chat_generator.py
@@ -6,10 +6,12 @@
 from typing import Any, Literal, get_args
 
 from haystack import component, default_from_dict, default_to_dict, logging
+from haystack.components.generators.utils import _convert_streaming_chunks_to_chat_message
 from haystack.dataclasses import (
     AsyncStreamingCallbackT,
     ChatMessage,
     ChatRole,
+    FinishReason,
     ImageContent,
     StreamingCallbackT,
     StreamingChunk,
@@ -29,6 +31,17 @@
 ImageFormat = Literal["image/jpeg", "image/png"]
 IMAGE_SUPPORTED_FORMATS: list[ImageFormat] = list(get_args(ImageFormat))
 
+# See https://ibm.github.io/watsonx-ai-node-sdk/enums/1_6_x.WatsonXAI.TextChatResultChoiceStream.Constants.FinishReason.html
+# for possible finish reasons
+FINISH_REASON_MAPPING: dict[str, FinishReason] = {
+    "cancelled": "stop",
+    "error": "stop",
+    "length": "length",
+    "stop": "stop",
+    "time_limit": "stop",
+    "tool_calls": "tool_calls",
+}
+
 
 @component
 class WatsonxChatGenerator:
@@ -327,6 +340,22 @@ def _prepare_api_call(
 
         return {"messages": watsonx_messages, "params": merged_kwargs}
 
+    def _convert_chunk_to_streaming_chunk(self, content: str, chunk: dict[str, Any]) -> StreamingChunk:
+        """
+        Convert one Watsonx AI stream-chunk to Haystack StreamingChunk.
+        """
+        chunk_meta = {
+            "model": self.model,
+            "received_at": datetime.now(timezone.utc).isoformat(),
+        }
+        streaming_chunk = StreamingChunk(
+            content=content,
+            meta=chunk_meta,
+            index=chunk["choices"][0].get("index", 0),
+            finish_reason=FINISH_REASON_MAPPING.get(chunk["choices"][0].get("finish_reason")),
+        )
+        return streaming_chunk
+
     def _handle_streaming(
         self,
         *,
@@ -350,17 +379,11 @@ def _handle_streaming(
 
             content = chunk["choices"][0].get("delta", {}).get("content", "")
             if content:
-                chunk_meta = {
-                    "model": self.model,
-                    "index": chunk["choices"][0].get("index", 0),
-                    "finish_reason": chunk["choices"][0].get("finish_reason"),
-                    "received_at": datetime.now(timezone.utc).isoformat(),
-                }
-                streaming_chunk = StreamingChunk(content=content, meta=chunk_meta)
+                streaming_chunk = self._convert_chunk_to_streaming_chunk(content, chunk)
                 chunks.append(streaming_chunk)
                 callback(streaming_chunk)
 
-        return {"replies": [self._convert_streaming_chunks_to_chat_message(chunks)]}
+        return {"replies": [_convert_streaming_chunks_to_chat_message(chunks)]}
 
     def _handle_standard(self, api_args: dict[str, Any]) -> dict[str, list[ChatMessage]]:
         """Handle synchronous standard response."""
@@ -383,35 +406,11 @@ async def _handle_async_streaming(
 
             content = chunk["choices"][0].get("delta", {}).get("content", "")
             if content:
-                chunk_meta = {
-                    "model": self.model,
-                    "index": chunk["choices"][0].get("index", 0),
-                    "finish_reason": chunk["choices"][0].get("finish_reason"),
-                    "received_at": datetime.now(timezone.utc).isoformat(),
-                }
-                streaming_chunk = StreamingChunk(content=content, meta=chunk_meta)
+                streaming_chunk = self._convert_chunk_to_streaming_chunk(content, chunk)
                 chunks.append(streaming_chunk)
                 await callback(streaming_chunk)
 
-        return {"replies": [self._convert_streaming_chunks_to_chat_message(chunks)]}
-
-    def _convert_streaming_chunks_to_chat_message(self, chunks: list[StreamingChunk]) -> ChatMessage:
-        """Convert list of streaming chunks to a single ChatMessage."""
-        if not chunks:
-            return ChatMessage.from_assistant("")
-
-        content = "".join(chunk.content for chunk in chunks)
-        last_chunk_meta = chunks[-1].meta if chunks else {}
-
-        return ChatMessage.from_assistant(
-            text=content,
-            meta={
-                "model": self.model,
-                "finish_reason": last_chunk_meta.get("finish_reason"),
-                "usage": last_chunk_meta.get("usage", {}),
-                "chunks_count": len(chunks),
-            },
-        )
+        return {"replies": [_convert_streaming_chunks_to_chat_message(chunks)]}
 
     async def _handle_async_standard(self, api_args: dict[str, Any]) -> dict[str, list[ChatMessage]]:
         """Handle asynchronous standard response."""
diff --git a/integrations/watsonx/tests/test_chat_generator.py b/integrations/watsonx/tests/test_chat_generator.py
@@ -41,7 +41,7 @@ def mock_watsonx(self, monkeypatch):
                         {
                             "message": {"content": "This is a generated response", "role": "assistant"},
                             "index": 0,
-                            "finish_reason": "completed",
+                            "finish_reason": "stop",
                         }
                     ],
                     "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
@@ -53,7 +53,7 @@ def mock_watsonx(self, monkeypatch):
                         {
                             "message": {"content": "Async generated response", "role": "assistant"},
                             "index": 0,
-                            "finish_reason": "completed",
+                            "finish_reason": "stop",
                         }
                     ],
                     "usage": {"prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30},
@@ -62,7 +62,7 @@ def mock_watsonx(self, monkeypatch):
             mock_model_instance.chat_stream = MagicMock(
                 return_value=[
                     {"choices": [{"delta": {"content": "Streaming"}, "index": 0, "finish_reason": None}]},
-                    {"choices": [{"delta": {"content": " response"}, "index": 0, "finish_reason": "completed"}]},
+                    {"choices": [{"delta": {"content": " response"}, "index": 0, "finish_reason": "stop"}]},
                 ]
             )
 
@@ -85,7 +85,7 @@ async def __anext__(self):
                         elif self._count == 2:
                             return {
                                 "choices": [
-                                    {"delta": {"content": " response"}, "finish_reason": "completed", "index": 0}
+                                    {"delta": {"content": " response"}, "finish_reason": "stop", "index": 0}
                                 ]
                             }
                         else:
@@ -227,7 +227,7 @@ def test_run_single_message(self, mock_watsonx):
 
         assert len(result["replies"]) == 1
         assert result["replies"][0].text == "This is a generated response"
-        assert result["replies"][0].meta["finish_reason"] == "completed"
+        assert result["replies"][0].meta["finish_reason"] == "stop"
 
         mock_watsonx["model_instance"].chat.assert_called_once_with(
             messages=[{"role": "user", "content": "Test prompt"}], params={}
@@ -273,7 +273,7 @@ def test_run_with_streaming(self, mock_watsonx):
 
         assert len(result["replies"]) == 1
         assert result["replies"][0].text == "Streaming response"
-        assert result["replies"][0].meta["finish_reason"] == "completed"
+        assert result["replies"][0].meta["finish_reason"] == "stop"
 
     def test_run_with_empty_messages(self, mock_watsonx):
         generator = WatsonxChatGenerator(
@@ -338,7 +338,7 @@ async def test_run_async_single_message(self, mock_watsonx):
 
         assert len(result["replies"]) == 1
         assert result["replies"][0].text == "Async generated response"
-        assert result["replies"][0].meta["finish_reason"] == "completed"
+        assert result["replies"][0].meta["finish_reason"] == "stop"
 
     @pytest.mark.asyncio
     async def test_run_async_streaming(self, mock_watsonx):