From 0d9a576fb07002492679e6946668df6fc7757307 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 19 Jun 2025 11:13:13 +0200 Subject: [PATCH 01/19] Initial commit --- haystack/components/generators/chat/openai.py | 5 ++ haystack/components/generators/utils.py | 12 ++-- haystack/dataclasses/__init__.py | 2 + haystack/dataclasses/streaming_chunk.py | 39 ++++++++++++ test/dataclasses/test_streaming_chunk.py | 61 ++++++++++++++++++- 5 files changed, 112 insertions(+), 7 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index fda06e6ebd..03592828c3 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -530,10 +530,13 @@ def _convert_chat_completion_chunk_to_streaming_chunk( component_info=component_info, # Index is None since it's only set to an int when a content block is present index=None, + finish_reason=None, meta={ "model": chunk.model, "received_at": datetime.now().isoformat(), "usage": _serialize_usage(chunk.usage), + # NOTE: finish_reason in meta is deprecated, use the dedicated finish_reason field + "finish_reason": None, }, ) ] @@ -557,6 +560,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( arguments=function.arguments if function and function.arguments else None, ), start=function.name is not None if function else False, + finish_reason=choice.finish_reason, meta={ "model": chunk.model, "index": choice.index, @@ -579,6 +583,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( # The first chunk is always a start message chunk that only contains role information, so if we reach here # and previous_chunks is length 1 then this is the start of text content. start=len(previous_chunks) == 1, + finish_reason=choice.finish_reason, meta={ "model": chunk.model, "index": choice.index, diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index e66b13786e..bd003b0797 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -58,7 +58,7 @@ def print_streaming_chunk(chunk: StreamingChunk) -> None: # End of LLM assistant message so we add two new lines # This ensures spacing between multiple LLM messages (e.g. Agent) or multiple Tool Call Results - if chunk.meta.get("finish_reason") is not None: + if chunk.finish_reason is not None: print("\n\n", flush=True, end="") @@ -112,9 +112,13 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C ) # finish_reason can appear in different places so we look for the last one - finish_reasons = [ - chunk.meta.get("finish_reason") for chunk in chunks if chunk.meta.get("finish_reason") is not None - ] + # First check the dedicated finish_reason field, then fall back to meta for backward compatibility + finish_reasons = [] + for chunk in chunks: + if chunk.finish_reason is not None: + finish_reasons.append(chunk.finish_reason) + elif chunk.meta.get("finish_reason") is not None: + finish_reasons.append(chunk.meta.get("finish_reason")) finish_reason = finish_reasons[-1] if finish_reasons else None meta = { diff --git a/haystack/dataclasses/__init__.py b/haystack/dataclasses/__init__.py index 1356df131a..442a2c0004 100644 --- a/haystack/dataclasses/__init__.py +++ b/haystack/dataclasses/__init__.py @@ -22,6 +22,7 @@ "SyncStreamingCallbackT", "ToolCallDelta", "select_streaming_callback", + "FinishReason", ], } @@ -40,6 +41,7 @@ from .state import State as State from .streaming_chunk import AsyncStreamingCallbackT as AsyncStreamingCallbackT from .streaming_chunk import ComponentInfo as ComponentInfo + from .streaming_chunk import FinishReason as FinishReason from .streaming_chunk import StreamingCallbackT as StreamingCallbackT from .streaming_chunk import StreamingChunk as StreamingChunk from .streaming_chunk import SyncStreamingCallbackT as SyncStreamingCallbackT diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index d01a61bc05..f98fbfb97f 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -2,6 +2,7 @@ # # SPDX-License-Identifier: Apache-2.0 +import warnings from dataclasses import dataclass, field from typing import Any, Awaitable, Callable, Dict, Literal, Optional, Union, overload @@ -9,6 +10,9 @@ from haystack.dataclasses.chat_message import ToolCallResult from haystack.utils.asynchronous import is_callable_async_compatible +# Type alias for standard finish_reason values following OpenAI's convention +FinishReason = Literal["stop", "length", "tool_calls", "content_filter"] + @dataclass class ToolCallDelta: @@ -68,12 +72,16 @@ class StreamingChunk: :param content: The content of the message chunk as a string. :param meta: A dictionary containing metadata related to the message chunk. + NOTE: The 'finish_reason' field in meta is deprecated and will be removed in a future release. + Use the dedicated 'finish_reason' field instead. :param component_info: A `ComponentInfo` object containing information about the component that generated the chunk, such as the component name and type. :param index: An optional integer index representing which content block this chunk belongs to. :param tool_call: An optional ToolCallDelta object representing a tool call associated with the message chunk. :param tool_call_result: An optional ToolCallResult object representing the result of a tool call. :param start: A boolean indicating whether this chunk marks the start of a content block. + :param finish_reason: An optional string indicating the reason the generation finished. + Standard values follow OpenAI's convention: "stop", "length", "tool_calls", "content_filter". """ content: str @@ -83,6 +91,7 @@ class StreamingChunk: tool_call: Optional[ToolCallDelta] = field(default=None) tool_call_result: Optional[ToolCallResult] = field(default=None) start: bool = field(default=False) + finish_reason: Optional[Union[FinishReason, str]] = field(default=None) def __post_init__(self): fields_set = sum(bool(x) for x in (self.content, self.tool_call, self.tool_call_result)) @@ -97,6 +106,10 @@ def __post_init__(self): if (self.tool_call or self.tool_call_result) and self.index is None: raise ValueError("If `tool_call`, or `tool_call_result` is set, `index` must also be set.") + # Convert meta to _DeprecationWarningDict to show warnings when accessing deprecated fields + if not isinstance(self.meta, _DeprecationWarningDict): + self.meta = _DeprecationWarningDict(self.meta) + SyncStreamingCallbackT = Callable[[StreamingChunk], None] AsyncStreamingCallbackT = Callable[[StreamingChunk], Awaitable[None]] @@ -148,3 +161,29 @@ def select_streaming_callback( raise ValueError("The runtime callback cannot be a coroutine.") return runtime_callback or init_callback + + +class _DeprecationWarningDict(dict): + """ + A dictionary subclass that issues deprecation warnings when accessing 'finish_reason'. + """ + + def __getitem__(self, key): + if key == "finish_reason": + warnings.warn( + "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in a future " + "release. Use StreamingChunk.finish_reason instead.", + DeprecationWarning, + stacklevel=2, + ) + return super().__getitem__(key) + + def get(self, key, default=None): + if key == "finish_reason": + warnings.warn( + "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in a future " + "release. Use StreamingChunk.finish_reason instead.", + DeprecationWarning, + stacklevel=2, + ) + return super().get(key, default) diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index 695d155483..56d1479a2c 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -4,9 +4,8 @@ import pytest -from haystack.dataclasses import StreamingChunk, ComponentInfo, ToolCallDelta, ToolCallResult, ToolCall -from haystack import component -from haystack import Pipeline +from haystack import Pipeline, component +from haystack.dataclasses import ComponentInfo, FinishReason, StreamingChunk, ToolCall, ToolCallDelta, ToolCallResult @component @@ -101,3 +100,59 @@ def test_tool_call_delta(): def test_tool_call_delta_with_missing_fields(): with pytest.raises(ValueError): _ = ToolCallDelta(id="123") + + +def test_create_chunk_with_finish_reason(): + """Test creating a chunk with the new finish_reason field.""" + chunk = StreamingChunk(content="Test content", finish_reason="stop") + + assert chunk.content == "Test content" + assert chunk.finish_reason == "stop" + assert chunk.meta == {} + + +def test_create_chunk_with_finish_reason_and_meta(): + """Test creating a chunk with both finish_reason field and meta.""" + chunk = StreamingChunk( + content="Test content", finish_reason="stop", meta={"model": "gpt-4", "usage": {"tokens": 10}} + ) + + assert chunk.content == "Test content" + assert chunk.finish_reason == "stop" + assert chunk.meta["model"] == "gpt-4" + assert chunk.meta["usage"]["tokens"] == 10 + + +def test_finish_reason_deprecation_warning(): + """Test that accessing finish_reason via meta shows deprecation warning.""" + import warnings + + chunk = StreamingChunk(content="Test content", meta={"finish_reason": "length"}) + + with warnings.catch_warnings(record=True) as w: + warnings.simplefilter("always") + result = chunk.meta.get("finish_reason") + + assert len(w) == 1 + assert issubclass(w[0].category, DeprecationWarning) + assert "finish_reason" in str(w[0].message) + assert "deprecated" in str(w[0].message) + assert result == "length" + + +def test_finish_reason_openai_standard_values(): + """Test that the finish_reason field accepts OpenAI standard values.""" + openai_values = ["stop", "length", "tool_calls", "content_filter"] + + for value in openai_values: + chunk = StreamingChunk(content="Test content", finish_reason=value) + assert chunk.finish_reason == value + + +def test_finish_reason_custom_values(): + """Test that custom finish_reason values still work (for migration compatibility).""" + custom_values = ["custom_stop", "provider_specific", "unknown"] + + for value in custom_values: + chunk = StreamingChunk(content="Test content", finish_reason=value) + assert chunk.finish_reason == value From ee41abf5e0c2c05d7b3c82724ce3629e61d3f8d6 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 19 Jun 2025 11:22:30 +0200 Subject: [PATCH 02/19] Update deprecation version --- haystack/dataclasses/streaming_chunk.py | 12 +++++++----- test/dataclasses/test_streaming_chunk.py | 15 ++++----------- 2 files changed, 11 insertions(+), 16 deletions(-) diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index f98fbfb97f..a34d41fcaf 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -72,7 +72,7 @@ class StreamingChunk: :param content: The content of the message chunk as a string. :param meta: A dictionary containing metadata related to the message chunk. - NOTE: The 'finish_reason' field in meta is deprecated and will be removed in a future release. + NOTE: The 'finish_reason' field in meta is deprecated and will be removed in Haystack 2.17. Use the dedicated 'finish_reason' field instead. :param component_info: A `ComponentInfo` object containing information about the component that generated the chunk, such as the component name and type. @@ -91,6 +91,8 @@ class StreamingChunk: tool_call: Optional[ToolCallDelta] = field(default=None) tool_call_result: Optional[ToolCallResult] = field(default=None) start: bool = field(default=False) + # NOTE: We allow for the finish_reason to be a string during migration phase + # we will change finish_reason to be a FinishReason in a future release finish_reason: Optional[Union[FinishReason, str]] = field(default=None) def __post_init__(self): @@ -171,8 +173,8 @@ class _DeprecationWarningDict(dict): def __getitem__(self, key): if key == "finish_reason": warnings.warn( - "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in a future " - "release. Use StreamingChunk.finish_reason instead.", + "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in " + "Haystack 2.17. Use StreamingChunk.finish_reason instead.", DeprecationWarning, stacklevel=2, ) @@ -181,8 +183,8 @@ def __getitem__(self, key): def get(self, key, default=None): if key == "finish_reason": warnings.warn( - "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in a future " - "release. Use StreamingChunk.finish_reason instead.", + "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in " + "Haystack 2.17. Use StreamingChunk.finish_reason instead.", DeprecationWarning, stacklevel=2, ) diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index 56d1479a2c..c1435d965b 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -4,8 +4,9 @@ import pytest -from haystack import Pipeline, component -from haystack.dataclasses import ComponentInfo, FinishReason, StreamingChunk, ToolCall, ToolCallDelta, ToolCallResult +from haystack.dataclasses import StreamingChunk, ComponentInfo, ToolCallDelta, ToolCallResult, ToolCall, FinishReason +from haystack import component +from haystack import Pipeline @component @@ -137,18 +138,10 @@ def test_finish_reason_deprecation_warning(): assert issubclass(w[0].category, DeprecationWarning) assert "finish_reason" in str(w[0].message) assert "deprecated" in str(w[0].message) + assert "Haystack 2.17" in str(w[0].message) assert result == "length" -def test_finish_reason_openai_standard_values(): - """Test that the finish_reason field accepts OpenAI standard values.""" - openai_values = ["stop", "length", "tool_calls", "content_filter"] - - for value in openai_values: - chunk = StreamingChunk(content="Test content", finish_reason=value) - assert chunk.finish_reason == value - - def test_finish_reason_custom_values(): """Test that custom finish_reason values still work (for migration compatibility).""" custom_values = ["custom_stop", "provider_specific", "unknown"] From e59c10aee65e7dac156a16c353d5a3f1e66059ad Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 19 Jun 2025 11:29:54 +0200 Subject: [PATCH 03/19] Improve comment --- haystack/components/generators/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index bd003b0797..740377945e 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -113,6 +113,8 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C # finish_reason can appear in different places so we look for the last one # First check the dedicated finish_reason field, then fall back to meta for backward compatibility + # NOTE: This fallback is required during migration period until all chat generators + # are updated to populate the dedicated finish_reason field. finish_reasons = [] for chunk in chunks: if chunk.finish_reason is not None: From 41b0d9ad5e460fafce9ba35c1b6619a2c882a247 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 19 Jun 2025 15:10:14 +0200 Subject: [PATCH 04/19] Minor simplification --- haystack/components/generators/utils.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index 740377945e..7169ad11f8 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -114,13 +114,12 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C # finish_reason can appear in different places so we look for the last one # First check the dedicated finish_reason field, then fall back to meta for backward compatibility # NOTE: This fallback is required during migration period until all chat generators - # are updated to populate the dedicated finish_reason field. - finish_reasons = [] - for chunk in chunks: - if chunk.finish_reason is not None: - finish_reasons.append(chunk.finish_reason) - elif chunk.meta.get("finish_reason") is not None: - finish_reasons.append(chunk.meta.get("finish_reason")) + # are updated to populate the dedicated StreamingChunk.finish_reason field. + finish_reasons = [ + chunk.finish_reason or chunk.meta.get("finish_reason") + for chunk in chunks + if chunk.finish_reason or chunk.meta.get("finish_reason") + ] finish_reason = finish_reasons[-1] if finish_reasons else None meta = { From 632780a5ec8147eafa996bfd790a2195bff09953 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Thu, 19 Jun 2025 15:16:36 +0200 Subject: [PATCH 05/19] Add reno note --- ...nish-reason-field-streaming-chunk-89828ec09c6e6385.yaml | 7 +++++++ 1 file changed, 7 insertions(+) create mode 100644 releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml diff --git a/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml new file mode 100644 index 0000000000..ac002a480b --- /dev/null +++ b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml @@ -0,0 +1,7 @@ +--- +features: + - | + Added dedicated `finish_reason` field to `StreamingChunk` class to improve type safety and enable sophisticated streaming UI logic. The field follows OpenAI's convention with standard values: "stop", "length", "tool_calls", "content_filter". +deprecations: + - | + Accessing `finish_reason` from `StreamingChunk.meta` is now deprecated and will be removed in Haystack 2.17. Use the dedicated `StreamingChunk.finish_reason` field instead. During the migration period, both approaches work but accessing via meta shows deprecation warnings. From 29ee6ba54dcefdbc555c4b6cf062020717cb309f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 10:45:19 +0200 Subject: [PATCH 06/19] Remove deprecation warning --- haystack/components/generators/chat/openai.py | 2 +- haystack/components/generators/utils.py | 4 +-- haystack/dataclasses/streaming_chunk.py | 34 ++----------------- test/dataclasses/test_streaming_chunk.py | 29 ++++++++++------ 4 files changed, 24 insertions(+), 45 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 03592828c3..87f83cb1d2 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -535,7 +535,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( "model": chunk.model, "received_at": datetime.now().isoformat(), "usage": _serialize_usage(chunk.usage), - # NOTE: finish_reason in meta is deprecated, use the dedicated finish_reason field + # NOTE: Both finish_reason field and meta["finish_reason"] are supported for flexibility "finish_reason": None, }, ) diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index 7169ad11f8..e3addabf36 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -113,8 +113,8 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C # finish_reason can appear in different places so we look for the last one # First check the dedicated finish_reason field, then fall back to meta for backward compatibility - # NOTE: This fallback is required during migration period until all chat generators - # are updated to populate the dedicated StreamingChunk.finish_reason field. + # NOTE: Support both finish_reason field and meta fallback for maximum flexibility. + # Users can choose either approach based on their preferences. finish_reasons = [ chunk.finish_reason or chunk.meta.get("finish_reason") for chunk in chunks diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index a34d41fcaf..2c2726983d 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -72,8 +72,8 @@ class StreamingChunk: :param content: The content of the message chunk as a string. :param meta: A dictionary containing metadata related to the message chunk. - NOTE: The 'finish_reason' field in meta is deprecated and will be removed in Haystack 2.17. - Use the dedicated 'finish_reason' field instead. + NOTE: Both 'finish_reason' field and meta['finish_reason'] are supported for flexibility. + The dedicated field provides better type safety and IDE support. :param component_info: A `ComponentInfo` object containing information about the component that generated the chunk, such as the component name and type. :param index: An optional integer index representing which content block this chunk belongs to. @@ -108,10 +108,6 @@ def __post_init__(self): if (self.tool_call or self.tool_call_result) and self.index is None: raise ValueError("If `tool_call`, or `tool_call_result` is set, `index` must also be set.") - # Convert meta to _DeprecationWarningDict to show warnings when accessing deprecated fields - if not isinstance(self.meta, _DeprecationWarningDict): - self.meta = _DeprecationWarningDict(self.meta) - SyncStreamingCallbackT = Callable[[StreamingChunk], None] AsyncStreamingCallbackT = Callable[[StreamingChunk], Awaitable[None]] @@ -163,29 +159,3 @@ def select_streaming_callback( raise ValueError("The runtime callback cannot be a coroutine.") return runtime_callback or init_callback - - -class _DeprecationWarningDict(dict): - """ - A dictionary subclass that issues deprecation warnings when accessing 'finish_reason'. - """ - - def __getitem__(self, key): - if key == "finish_reason": - warnings.warn( - "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in " - "Haystack 2.17. Use StreamingChunk.finish_reason instead.", - DeprecationWarning, - stacklevel=2, - ) - return super().__getitem__(key) - - def get(self, key, default=None): - if key == "finish_reason": - warnings.warn( - "Accessing 'finish_reason' from StreamingChunk.meta is deprecated and will be removed in " - "Haystack 2.17. Use StreamingChunk.finish_reason instead.", - DeprecationWarning, - stacklevel=2, - ) - return super().get(key, default) diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index c1435d965b..b7b3817cf9 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -124,22 +124,31 @@ def test_create_chunk_with_finish_reason_and_meta(): assert chunk.meta["usage"]["tokens"] == 10 -def test_finish_reason_deprecation_warning(): - """Test that accessing finish_reason via meta shows deprecation warning.""" +def test_dual_finish_reason_support(): + """Test that both finish_reason approaches work without warnings.""" import warnings + # Test that accessing finish_reason via meta does NOT show warnings chunk = StreamingChunk(content="Test content", meta={"finish_reason": "length"}) with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") - result = chunk.meta.get("finish_reason") - - assert len(w) == 1 - assert issubclass(w[0].category, DeprecationWarning) - assert "finish_reason" in str(w[0].message) - assert "deprecated" in str(w[0].message) - assert "Haystack 2.17" in str(w[0].message) - assert result == "length" + result_meta = chunk.meta.get("finish_reason") + result_direct = chunk.meta["finish_reason"] + + # No warnings should be raised + assert len(w) == 0 + assert result_meta == "length" + assert result_direct == "length" + + # Test that both approaches can be used simultaneously + chunk_dual = StreamingChunk( + content="Test content", finish_reason="stop", meta={"finish_reason": "length", "model": "gpt-4"} + ) + + assert chunk_dual.finish_reason == "stop" + assert chunk_dual.meta["finish_reason"] == "length" + assert chunk_dual.meta.get("finish_reason") == "length" def test_finish_reason_custom_values(): From 4504bbf7643fa60fc616d381ce87a6549ec51fe5 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 11:51:13 +0200 Subject: [PATCH 07/19] Remove fallback in haystack/components/generators/utils.py --- haystack/components/generators/utils.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/haystack/components/generators/utils.py b/haystack/components/generators/utils.py index e3addabf36..b66dd9d7d8 100644 --- a/haystack/components/generators/utils.py +++ b/haystack/components/generators/utils.py @@ -112,14 +112,7 @@ def _convert_streaming_chunks_to_chat_message(chunks: List[StreamingChunk]) -> C ) # finish_reason can appear in different places so we look for the last one - # First check the dedicated finish_reason field, then fall back to meta for backward compatibility - # NOTE: Support both finish_reason field and meta fallback for maximum flexibility. - # Users can choose either approach based on their preferences. - finish_reasons = [ - chunk.finish_reason or chunk.meta.get("finish_reason") - for chunk in chunks - if chunk.finish_reason or chunk.meta.get("finish_reason") - ] + finish_reasons = [chunk.finish_reason for chunk in chunks if chunk.finish_reason] finish_reason = finish_reasons[-1] if finish_reasons else None meta = { From a329d87ef2ca76648a275bda38becd8f563ad84f Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 11:53:55 +0200 Subject: [PATCH 08/19] FinishReason alphabetical import --- haystack/dataclasses/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/haystack/dataclasses/__init__.py b/haystack/dataclasses/__init__.py index 442a2c0004..91071227f5 100644 --- a/haystack/dataclasses/__init__.py +++ b/haystack/dataclasses/__init__.py @@ -17,12 +17,12 @@ "streaming_chunk": [ "AsyncStreamingCallbackT", "ComponentInfo", + "FinishReason", "StreamingCallbackT", "StreamingChunk", "SyncStreamingCallbackT", "ToolCallDelta", "select_streaming_callback", - "FinishReason", ], } From 7aa2f9c94404ccda2762c509a0327c189d6a392c Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 12:03:27 +0200 Subject: [PATCH 09/19] Add tool_call_results finish reason, adapt codebase --- haystack/components/tools/tool_invoker.py | 12 ++++- haystack/dataclasses/streaming_chunk.py | 3 +- test/components/tools/test_tool_invoker.py | 52 ++++++++++++++++++++++ test/dataclasses/test_streaming_chunk.py | 18 ++++++++ 4 files changed, 82 insertions(+), 3 deletions(-) diff --git a/haystack/components/tools/tool_invoker.py b/haystack/components/tools/tool_invoker.py index 7c7dd9faed..47f2f0eca5 100644 --- a/haystack/components/tools/tool_invoker.py +++ b/haystack/components/tools/tool_invoker.py @@ -553,7 +553,11 @@ def run( # We stream one more chunk that contains a finish_reason if tool_messages were generated if len(tool_messages) > 0 and streaming_callback is not None: - streaming_callback(StreamingChunk(content="", meta={"finish_reason": "tool_call_results"})) + streaming_callback( + StreamingChunk( + content="", finish_reason="tool_call_results", meta={"finish_reason": "tool_call_results"} + ) + ) return {"tool_messages": tool_messages, "state": state} @@ -685,7 +689,11 @@ async def run_async( # We stream one more chunk that contains a finish_reason if tool_messages were generated if len(tool_messages) > 0 and streaming_callback is not None: - await streaming_callback(StreamingChunk(content="", meta={"finish_reason": "tool_call_results"})) + await streaming_callback( + StreamingChunk( + content="", finish_reason="tool_call_results", meta={"finish_reason": "tool_call_results"} + ) + ) return {"tool_messages": tool_messages, "state": state} diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index 2c2726983d..f10b4b8ead 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -11,7 +11,8 @@ from haystack.utils.asynchronous import is_callable_async_compatible # Type alias for standard finish_reason values following OpenAI's convention -FinishReason = Literal["stop", "length", "tool_calls", "content_filter"] +# plus Haystack-specific values +FinishReason = Literal["stop", "length", "tool_calls", "content_filter", "tool_call_results"] @dataclass diff --git a/test/components/tools/test_tool_invoker.py b/test/components/tools/test_tool_invoker.py index a21f5ae31a..9e14fcac56 100644 --- a/test/components/tools/test_tool_invoker.py +++ b/test/components/tools/test_tool_invoker.py @@ -203,6 +203,28 @@ def streaming_callback(chunk: StreamingChunk) -> None: assert tool_call_result.origin == tool_call assert not tool_call_result.error + def test_run_with_streaming_callback_finish_reason(self, invoker): + streaming_chunks = [] + + def streaming_callback(chunk: StreamingChunk) -> None: + streaming_chunks.append(chunk) + + tool_call = ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"}) + message = ChatMessage.from_assistant(tool_calls=[tool_call]) + + result = invoker.run(messages=[message], streaming_callback=streaming_callback) + assert "tool_messages" in result + assert len(result["tool_messages"]) == 1 + + # Check that we received streaming chunks + assert len(streaming_chunks) >= 2 # At least one for tool result and one for finish reason + + # The last chunk should have finish_reason set to "tool_call_results" + final_chunk = streaming_chunks[-1] + assert final_chunk.finish_reason == "tool_call_results" + assert final_chunk.meta["finish_reason"] == "tool_call_results" + assert final_chunk.content == "" + @pytest.mark.asyncio async def test_run_async_with_streaming_callback(self, thread_executor, weather_tool): streaming_callback_called = False @@ -245,6 +267,36 @@ async def streaming_callback(chunk: StreamingChunk) -> None: # check we called the streaming callback assert streaming_callback_called + @pytest.mark.asyncio + async def test_run_async_with_streaming_callback_finish_reason(self, thread_executor, weather_tool): + streaming_chunks = [] + + async def streaming_callback(chunk: StreamingChunk) -> None: + streaming_chunks.append(chunk) + + tool_invoker = ToolInvoker( + tools=[weather_tool], + raise_on_failure=True, + convert_result_to_json_string=False, + async_executor=thread_executor, + ) + + tool_call = ToolCall(tool_name="weather_tool", arguments={"location": "Berlin"}) + message = ChatMessage.from_assistant(tool_calls=[tool_call]) + + result = await tool_invoker.run_async(messages=[message], streaming_callback=streaming_callback) + assert "tool_messages" in result + assert len(result["tool_messages"]) == 1 + + # Check that we received streaming chunks + assert len(streaming_chunks) >= 2 # At least one for tool result and one for finish reason + + # The last chunk should have finish_reason set to "tool_call_results" + final_chunk = streaming_chunks[-1] + assert final_chunk.finish_reason == "tool_call_results" + assert final_chunk.meta["finish_reason"] == "tool_call_results" + assert final_chunk.content == "" + def test_run_with_toolset(self, tool_set): tool_invoker = ToolInvoker(tools=tool_set, raise_on_failure=True, convert_result_to_json_string=False) tool_call = ToolCall(tool_name="addition_tool", arguments={"num1": 5, "num2": 3}) diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index b7b3817cf9..7a2f5c1fe7 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -158,3 +158,21 @@ def test_finish_reason_custom_values(): for value in custom_values: chunk = StreamingChunk(content="Test content", finish_reason=value) assert chunk.finish_reason == value + + +def test_finish_reason_standard_values(): + """Test all standard finish_reason values including the new Haystack-specific ones.""" + standard_values = ["stop", "length", "tool_calls", "content_filter", "tool_call_results"] + + for value in standard_values: + chunk = StreamingChunk(content="Test content", finish_reason=value) + assert chunk.finish_reason == value + + +def test_finish_reason_tool_call_results(): + """Test specifically the new tool_call_results finish reason.""" + chunk = StreamingChunk(content="", finish_reason="tool_call_results", meta={"finish_reason": "tool_call_results"}) + + assert chunk.finish_reason == "tool_call_results" + assert chunk.meta["finish_reason"] == "tool_call_results" + assert chunk.content == "" From ba143a0510b323439a0e5e82656518cf76a330b9 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 12:09:11 +0200 Subject: [PATCH 10/19] Define finish_reason to be Optional[FinishReason] --- haystack/dataclasses/streaming_chunk.py | 9 ++++----- test/components/generators/test_utils.py | 1 + test/dataclasses/test_streaming_chunk.py | 9 --------- 3 files changed, 5 insertions(+), 14 deletions(-) diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index f10b4b8ead..7d7a45385f 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -81,8 +81,9 @@ class StreamingChunk: :param tool_call: An optional ToolCallDelta object representing a tool call associated with the message chunk. :param tool_call_result: An optional ToolCallResult object representing the result of a tool call. :param start: A boolean indicating whether this chunk marks the start of a content block. - :param finish_reason: An optional string indicating the reason the generation finished. - Standard values follow OpenAI's convention: "stop", "length", "tool_calls", "content_filter". + :param finish_reason: An optional value indicating the reason the generation finished. + Standard values follow OpenAI's convention: "stop", "length", "tool_calls", "content_filter", + plus Haystack-specific values like "tool_call_results". """ content: str @@ -92,9 +93,7 @@ class StreamingChunk: tool_call: Optional[ToolCallDelta] = field(default=None) tool_call_result: Optional[ToolCallResult] = field(default=None) start: bool = field(default=False) - # NOTE: We allow for the finish_reason to be a string during migration phase - # we will change finish_reason to be a FinishReason in a future release - finish_reason: Optional[Union[FinishReason, str]] = field(default=None) + finish_reason: Optional[FinishReason] = field(default=None) def __post_init__(self): fields_set = sum(bool(x) for x in (self.content, self.tool_call, self.tool_call_result)) diff --git a/test/components/generators/test_utils.py b/test/components/generators/test_utils.py index cc6d6edfdf..5dc2f0cee6 100644 --- a/test/components/generators/test_utils.py +++ b/test/components/generators/test_utils.py @@ -262,6 +262,7 @@ def test_convert_streaming_chunks_to_chat_message_tool_calls_in_any_chunk(): "received_at": "2025-02-19T16:02:55.948772", }, component_info=ComponentInfo(name="test", type="test"), + finish_reason="tool_calls", ), StreamingChunk( content="", diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index 7a2f5c1fe7..268d222b52 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -151,15 +151,6 @@ def test_dual_finish_reason_support(): assert chunk_dual.meta.get("finish_reason") == "length" -def test_finish_reason_custom_values(): - """Test that custom finish_reason values still work (for migration compatibility).""" - custom_values = ["custom_stop", "provider_specific", "unknown"] - - for value in custom_values: - chunk = StreamingChunk(content="Test content", finish_reason=value) - assert chunk.finish_reason == value - - def test_finish_reason_standard_values(): """Test all standard finish_reason values including the new Haystack-specific ones.""" standard_values = ["stop", "length", "tool_calls", "content_filter", "tool_call_results"] From 88f65da06837c9ea197fa33db5bd58998074497b Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 12:21:25 +0200 Subject: [PATCH 11/19] Add StreamingChunk finish_reason in HF generators --- haystack/components/generators/chat/hugging_face_api.py | 1 + haystack/components/generators/hugging_face_api.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index a9f12e7f6c..a79d281d18 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -141,6 +141,7 @@ def _convert_chat_completion_stream_output_to_streaming_chunk( index=0 if choice.finish_reason is None else None, # start is True at the very beginning since first chunk contains role information + first part of the answer. start=len(previous_chunks) == 0, + finish_reason=choice.finish_reason, ) return stream_chunk diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py index 89fb659bed..375cef8d12 100644 --- a/haystack/components/generators/hugging_face_api.py +++ b/haystack/components/generators/hugging_face_api.py @@ -242,7 +242,12 @@ def _stream_and_build_response( first_chunk_time = datetime.now().isoformat() stream_chunk = StreamingChunk( - content=token.text, meta=chunk_metadata, component_info=component_info, index=0, start=len(chunks) == 0 + content=token.text, + meta=chunk_metadata, + component_info=component_info, + index=0, + start=len(chunks) == 0, + finish_reason=chunk_metadata.get("finish_reason"), ) chunks.append(stream_chunk) streaming_callback(stream_chunk) From 816fc8b9e0e04c09c1bf9564b1600dee71dd18fa Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 12:21:59 +0200 Subject: [PATCH 12/19] Update reno note --- ...finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml index ac002a480b..b48e045ff8 100644 --- a/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml +++ b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml @@ -1,7 +1,6 @@ --- features: - | - Added dedicated `finish_reason` field to `StreamingChunk` class to improve type safety and enable sophisticated streaming UI logic. The field follows OpenAI's convention with standard values: "stop", "length", "tool_calls", "content_filter". -deprecations: + Added dedicated `finish_reason` field to `StreamingChunk` class to improve type safety and enable sophisticated streaming UI logic. The field uses a `FinishReason` type alias with standard values: "stop", "length", "tool_calls", "content_filter", plus Haystack-specific values like "tool_call_results" (used by ToolInvoker to indicate tool execution completion). Both the dedicated field and legacy meta access (`chunk.meta["finish_reason"]`) are permanently supported for maximum flexibility. Users can choose either approach based on their preferences, with the dedicated field providing better type safety and IDE support. - | - Accessing `finish_reason` from `StreamingChunk.meta` is now deprecated and will be removed in Haystack 2.17. Use the dedicated `StreamingChunk.finish_reason` field instead. During the migration period, both approaches work but accessing via meta shows deprecation warnings. + Updated `ToolInvoker` component to use the new `finish_reason` field when streaming tool results. The component now sets `finish_reason="tool_call_results"` in the final streaming chunk to indicate that tool execution has completed, while maintaining backward compatibility by also setting the value in `meta["finish_reason"]`. From ece472515715116c375feb32c8f1f824cc6c8d28 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 14:15:37 +0200 Subject: [PATCH 13/19] Repair merge issue --- haystack/components/generators/chat/openai.py | 43 +++++++------------ .../components/generators/chat/test_openai.py | 15 ++++++- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 0acfe4e625..1f84cb4da2 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -517,27 +517,23 @@ def _convert_chat_completion_chunk_to_streaming_chunk( generated the chunk, such as the component name and type. :returns: - A list of StreamingChunk objects representing the content of the chunk from the OpenAI API. + A StreamingChunk object representing the content of the chunk from the OpenAI API. """ # On very first chunk so len(previous_chunks) == 0, the Choices field only provides role info (e.g. "assistant") # Choices is empty if include_usage is set to True where the usage information is returned. if len(chunk.choices) == 0: - return [ - StreamingChunk( - content="", - component_info=component_info, - # Index is None since it's only set to an int when a content block is present - index=None, - finish_reason=None, - meta={ - "model": chunk.model, - "received_at": datetime.now().isoformat(), - "usage": _serialize_usage(chunk.usage), - # NOTE: Both finish_reason field and meta["finish_reason"] are supported for flexibility - "finish_reason": None, - }, - ) - ] + return StreamingChunk( + content="", + component_info=component_info, + # Index is None since it's only set to an int when a content block is present + index=None, + finish_reason=None, + meta={ + "model": chunk.model, + "received_at": datetime.now().isoformat(), + "usage": _serialize_usage(chunk.usage), + }, + ) choice: ChunkChoice = chunk.choices[0] @@ -552,17 +548,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( id=tool_call.id, tool_name=function.name if function else None, arguments=function.arguments if function and function.arguments else None, - ), - start=function.name is not None if function else False, - finish_reason=choice.finish_reason, - meta={ - "model": chunk.model, - "index": choice.index, - "tool_calls": choice.delta.tool_calls, - "finish_reason": choice.finish_reason, - "received_at": datetime.now().isoformat(), - "usage": _serialize_usage(chunk.usage), - }, + ) ) chunk_message = StreamingChunk( content=choice.delta.content or "", @@ -571,6 +557,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( index=tool_calls_deltas[0].index, tool_calls=tool_calls_deltas, start=tool_calls_deltas[0].tool_name is not None, + finish_reason=choice.finish_reason, meta={ "model": chunk.model, "index": choice.index, diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index fd4c227e1e..3fdd53bb34 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -986,6 +986,7 @@ def streaming_chunks(): "received_at": ANY, "usage": None, }, + finish_reason=None, ), StreamingChunk( content="", @@ -1007,6 +1008,7 @@ def streaming_chunks(): index=0, tool_calls=[ToolCallDelta(tool_name="weather", id="call_zcvlnVaTeJWRjLAFfYxX69z4", index=0)], start=True, + finish_reason=None, ), StreamingChunk( content="", @@ -1020,6 +1022,7 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='{"ci', index=0)], + finish_reason=None, ), StreamingChunk( content="", @@ -1033,6 +1036,7 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='ty": ', index=0)], + finish_reason=None, ), StreamingChunk( content="", @@ -1046,6 +1050,7 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='"Paris', index=0)], + finish_reason=None, ), StreamingChunk( content="", @@ -1059,6 +1064,7 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='"}', index=0)], + finish_reason=None, ), StreamingChunk( content="", @@ -1080,6 +1086,7 @@ def streaming_chunks(): index=1, tool_calls=[ToolCallDelta(tool_name="weather", id="call_C88m67V16CrETq6jbNXjdZI9", index=1)], start=True, + finish_reason=None, ), StreamingChunk( content="", @@ -1093,6 +1100,7 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='{"ci', index=1)], + finish_reason=None, ), StreamingChunk( content="", @@ -1106,6 +1114,7 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='ty": ', index=1)], + finish_reason=None, ), StreamingChunk( content="", @@ -1119,6 +1128,7 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='"Berli', index=1)], + finish_reason=None, ), StreamingChunk( content="", @@ -1132,6 +1142,7 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='n"}', index=1)], + finish_reason=None, ), StreamingChunk( content="", @@ -1143,6 +1154,7 @@ def streaming_chunks(): "received_at": ANY, "usage": None, }, + finish_reason="tool_calls", ), StreamingChunk( content="", @@ -1162,6 +1174,7 @@ def streaming_chunks(): "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, }, }, + finish_reason=None, ), ] @@ -1174,7 +1187,7 @@ def test_convert_chat_completion_chunk_to_streaming_chunk(self, chat_completion_ chunk=openai_chunk, previous_chunks=previous_chunks ) assert stream_chunk == haystack_chunk - previous_chunks.append(openai_chunk) + previous_chunks.append(stream_chunk) def test_handle_stream_response(self, chat_completion_chunks): openai_chunks = chat_completion_chunks From 7db10d7dbb782700c3dd0312d6b0985f4f8785f6 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 14:21:13 +0200 Subject: [PATCH 14/19] Update tests for finish_reason --- test/components/generators/chat/test_hugging_face_api.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index 6698a743ee..a4db4da2b4 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -687,6 +687,7 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self): }, index=0, start=True, + finish_reason=None, ), [], ), @@ -711,6 +712,7 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self): "model": "microsoft/Phi-3.5-mini-instruct", "finish_reason": "stop", }, + finish_reason="stop", ), [0], ), @@ -730,6 +732,7 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self): "model": "microsoft/Phi-3.5-mini-instruct", "usage": {"completion_tokens": 2, "prompt_tokens": 21}, }, + finish_reason=None, ), [0, 1], ), From 0408d779d0ec66488c40d4bc632d6d3969911a9e Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 15:05:05 +0200 Subject: [PATCH 15/19] Resolve mypy issues --- haystack/components/generators/chat/hugging_face_api.py | 4 +++- haystack/dataclasses/streaming_chunk.py | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index a79d281d18..44af62446a 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -141,7 +141,9 @@ def _convert_chat_completion_stream_output_to_streaming_chunk( index=0 if choice.finish_reason is None else None, # start is True at the very beginning since first chunk contains role information + first part of the answer. start=len(previous_chunks) == 0, - finish_reason=choice.finish_reason, + # there is no way to constrain here as many models can return any finish reason + # so we need to ignore the type error + finish_reason=choice.finish_reason, # type: ignore[arg-type] ) return stream_chunk diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index 3198b7e460..84d868774f 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -12,7 +12,7 @@ # Type alias for standard finish_reason values following OpenAI's convention # plus Haystack-specific values -FinishReason = Literal["stop", "length", "tool_calls", "content_filter", "tool_call_results"] +FinishReason = Literal["stop", "length", "tool_calls", "function_call", "content_filter", "tool_call_results"] @dataclass From bd8153520f29473a79cc904af4909577ceea3cef Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Mon, 23 Jun 2025 15:16:50 +0200 Subject: [PATCH 16/19] Lint issue --- haystack/dataclasses/streaming_chunk.py | 1 - 1 file changed, 1 deletion(-) diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index 84d868774f..642358e839 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -2,7 +2,6 @@ # # SPDX-License-Identifier: Apache-2.0 -import warnings from dataclasses import dataclass, field from typing import Any, Awaitable, Callable, Dict, List, Literal, Optional, Union, overload From 59a13494953a838a3e613679260800d21c847294 Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 24 Jun 2025 10:20:13 +0200 Subject: [PATCH 17/19] Enhance HF finish_reason translation --- .../generators/chat/hugging_face_api.py | 43 +++++++++++++++++-- 1 file changed, 40 insertions(+), 3 deletions(-) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index 44af62446a..d9e69e5023 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -18,6 +18,7 @@ ToolCall, select_streaming_callback, ) +from haystack.dataclasses.streaming_chunk import FinishReason from haystack.lazy_imports import LazyImport from haystack.tools import ( Tool, @@ -41,6 +42,7 @@ ChatCompletionOutput, ChatCompletionOutputToolCall, ChatCompletionStreamOutput, + ChatCompletionStreamOutputChoice, InferenceClient, ) @@ -110,6 +112,42 @@ def _convert_tools_to_hfapi_tools( return hf_tools +def _map_hf_finish_reason_to_haystack(choice: "ChatCompletionStreamOutputChoice") -> Optional[FinishReason]: + """ + Map HuggingFace finish reasons to Haystack FinishReason literals. + + Uses the full choice object to detect tool calls and provide accurate mapping. + + HuggingFace finish reasons: + - "length": number of generated tokens == `max_new_tokens` + - "eos_token": the model generated its end of sequence token + - "stop_sequence": the model generated a text included in `stop_sequences` + + Additionally detects tool calls from delta.tool_calls or delta.tool_call_id. + + :param choice: The HuggingFace ChatCompletionStreamOutputChoice object. + :returns: The corresponding Haystack FinishReason or None. + """ + if choice.finish_reason is None: + return None + + # Check if this choice contains tool call information + has_tool_calls = choice.delta.tool_calls is not None or choice.delta.tool_call_id is not None + + # If we detect tool calls, override the finish reason + if has_tool_calls: + return "tool_calls" + + # Map HuggingFace finish reasons to Haystack standard ones + mapping: Dict[str, FinishReason] = { + "length": "length", # Direct match + "eos_token": "stop", # EOS token means natural stop + "stop_sequence": "stop", # Stop sequence means natural stop + } + + return mapping.get(choice.finish_reason, "stop") # Default to "stop" for unknown reasons + + def _convert_chat_completion_stream_output_to_streaming_chunk( chunk: "ChatCompletionStreamOutput", previous_chunks: List[StreamingChunk], @@ -133,6 +171,7 @@ def _convert_chat_completion_stream_output_to_streaming_chunk( # the argument is probably allowed for compatibility with OpenAI # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n choice = chunk.choices[0] + mapped_finish_reason = _map_hf_finish_reason_to_haystack(choice) stream_chunk = StreamingChunk( content=choice.delta.content or "", meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "finish_reason": choice.finish_reason}, @@ -141,9 +180,7 @@ def _convert_chat_completion_stream_output_to_streaming_chunk( index=0 if choice.finish_reason is None else None, # start is True at the very beginning since first chunk contains role information + first part of the answer. start=len(previous_chunks) == 0, - # there is no way to constrain here as many models can return any finish reason - # so we need to ignore the type error - finish_reason=choice.finish_reason, # type: ignore[arg-type] + finish_reason=mapped_finish_reason, ) return stream_chunk From 399956cd570603c958bfabc41873b525091f0d1e Mon Sep 17 00:00:00 2001 From: Vladimir Blagojevic Date: Tue, 24 Jun 2025 10:50:56 +0200 Subject: [PATCH 18/19] Remove irrlevant test --- test/dataclasses/test_streaming_chunk.py | 27 ------------------------ 1 file changed, 27 deletions(-) diff --git a/test/dataclasses/test_streaming_chunk.py b/test/dataclasses/test_streaming_chunk.py index 3ee702259d..1d53633026 100644 --- a/test/dataclasses/test_streaming_chunk.py +++ b/test/dataclasses/test_streaming_chunk.py @@ -125,33 +125,6 @@ def test_create_chunk_with_finish_reason_and_meta(): assert chunk.meta["usage"]["tokens"] == 10 -def test_dual_finish_reason_support(): - """Test that both finish_reason approaches work without warnings.""" - import warnings - - # Test that accessing finish_reason via meta does NOT show warnings - chunk = StreamingChunk(content="Test content", meta={"finish_reason": "length"}) - - with warnings.catch_warnings(record=True) as w: - warnings.simplefilter("always") - result_meta = chunk.meta.get("finish_reason") - result_direct = chunk.meta["finish_reason"] - - # No warnings should be raised - assert len(w) == 0 - assert result_meta == "length" - assert result_direct == "length" - - # Test that both approaches can be used simultaneously - chunk_dual = StreamingChunk( - content="Test content", finish_reason="stop", meta={"finish_reason": "length", "model": "gpt-4"} - ) - - assert chunk_dual.finish_reason == "stop" - assert chunk_dual.meta["finish_reason"] == "length" - assert chunk_dual.meta.get("finish_reason") == "length" - - def test_finish_reason_standard_values(): """Test all standard finish_reason values including the new Haystack-specific ones.""" standard_values = ["stop", "length", "tool_calls", "content_filter", "tool_call_results"] From c2d846734783d618bba58f9fa11585e0e8b3b5d4 Mon Sep 17 00:00:00 2001 From: Sebastian Husch Lee Date: Wed, 25 Jun 2025 10:56:59 +0200 Subject: [PATCH 19/19] PR comments --- .../components/generators/chat/hugging_face_api.py | 5 +++-- haystack/components/generators/chat/openai.py | 12 ++++++++++-- haystack/components/generators/hugging_face_api.py | 11 ++++++++++- haystack/dataclasses/streaming_chunk.py | 8 +++----- ...eason-field-streaming-chunk-89828ec09c6e6385.yaml | 2 +- .../generators/chat/test_hugging_face_api.py | 2 -- test/components/generators/chat/test_openai.py | 12 ------------ 7 files changed, 27 insertions(+), 25 deletions(-) diff --git a/haystack/components/generators/chat/hugging_face_api.py b/haystack/components/generators/chat/hugging_face_api.py index d9e69e5023..7a34a5aa3c 100644 --- a/haystack/components/generators/chat/hugging_face_api.py +++ b/haystack/components/generators/chat/hugging_face_api.py @@ -118,7 +118,8 @@ def _map_hf_finish_reason_to_haystack(choice: "ChatCompletionStreamOutputChoice" Uses the full choice object to detect tool calls and provide accurate mapping. - HuggingFace finish reasons: + HuggingFace finish reasons (can be found here https://huggingface.github.io/text-generation-inference/ under + FinishReason): - "length": number of generated tokens == `max_new_tokens` - "eos_token": the model generated its end of sequence token - "stop_sequence": the model generated a text included in `stop_sequences` @@ -171,7 +172,7 @@ def _convert_chat_completion_stream_output_to_streaming_chunk( # the argument is probably allowed for compatibility with OpenAI # see https://huggingface.co/docs/huggingface_hub/package_reference/inference_client#huggingface_hub.InferenceClient.chat_completion.n choice = chunk.choices[0] - mapped_finish_reason = _map_hf_finish_reason_to_haystack(choice) + mapped_finish_reason = _map_hf_finish_reason_to_haystack(choice) if choice.finish_reason else None stream_chunk = StreamingChunk( content=choice.delta.content or "", meta={"model": chunk.model, "received_at": datetime.now().isoformat(), "finish_reason": choice.finish_reason}, diff --git a/haystack/components/generators/chat/openai.py b/haystack/components/generators/chat/openai.py index 1f84cb4da2..b9e0988dfb 100644 --- a/haystack/components/generators/chat/openai.py +++ b/haystack/components/generators/chat/openai.py @@ -18,6 +18,7 @@ AsyncStreamingCallbackT, ChatMessage, ComponentInfo, + FinishReason, StreamingCallbackT, StreamingChunk, SyncStreamingCallbackT, @@ -519,6 +520,13 @@ def _convert_chat_completion_chunk_to_streaming_chunk( :returns: A StreamingChunk object representing the content of the chunk from the OpenAI API. """ + finish_reason_mapping: Dict[str, FinishReason] = { + "stop": "stop", + "length": "length", + "content_filter": "content_filter", + "tool_calls": "tool_calls", + "function_call": "tool_calls", + } # On very first chunk so len(previous_chunks) == 0, the Choices field only provides role info (e.g. "assistant") # Choices is empty if include_usage is set to True where the usage information is returned. if len(chunk.choices) == 0: @@ -557,7 +565,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( index=tool_calls_deltas[0].index, tool_calls=tool_calls_deltas, start=tool_calls_deltas[0].tool_name is not None, - finish_reason=choice.finish_reason, + finish_reason=finish_reason_mapping.get(choice.finish_reason) if choice.finish_reason else None, meta={ "model": chunk.model, "index": choice.index, @@ -586,7 +594,7 @@ def _convert_chat_completion_chunk_to_streaming_chunk( # The first chunk is always a start message chunk that only contains role information, so if we reach here # and previous_chunks is length 1 then this is the start of text content. start=len(previous_chunks) == 1, - finish_reason=choice.finish_reason, + finish_reason=finish_reason_mapping.get(choice.finish_reason) if choice.finish_reason else None, meta={ "model": chunk.model, "index": choice.index, diff --git a/haystack/components/generators/hugging_face_api.py b/haystack/components/generators/hugging_face_api.py index 375cef8d12..0da42871ab 100644 --- a/haystack/components/generators/hugging_face_api.py +++ b/haystack/components/generators/hugging_face_api.py @@ -9,6 +9,7 @@ from haystack import component, default_from_dict, default_to_dict from haystack.dataclasses import ( ComponentInfo, + FinishReason, StreamingCallbackT, StreamingChunk, SyncStreamingCallbackT, @@ -241,13 +242,21 @@ def _stream_and_build_response( if first_chunk_time is None: first_chunk_time = datetime.now().isoformat() + mapping: Dict[str, FinishReason] = { + "length": "length", # Direct match + "eos_token": "stop", # EOS token means natural stop + "stop_sequence": "stop", # Stop sequence means natural stop + } + mapped_finish_reason = ( + mapping.get(chunk_metadata["finish_reason"], "stop") if chunk_metadata.get("finish_reason") else None + ) stream_chunk = StreamingChunk( content=token.text, meta=chunk_metadata, component_info=component_info, index=0, start=len(chunks) == 0, - finish_reason=chunk_metadata.get("finish_reason"), + finish_reason=mapped_finish_reason, ) chunks.append(stream_chunk) streaming_callback(stream_chunk) diff --git a/haystack/dataclasses/streaming_chunk.py b/haystack/dataclasses/streaming_chunk.py index 642358e839..7cd30e466f 100644 --- a/haystack/dataclasses/streaming_chunk.py +++ b/haystack/dataclasses/streaming_chunk.py @@ -10,8 +10,8 @@ from haystack.utils.asynchronous import is_callable_async_compatible # Type alias for standard finish_reason values following OpenAI's convention -# plus Haystack-specific values -FinishReason = Literal["stop", "length", "tool_calls", "function_call", "content_filter", "tool_call_results"] +# plus Haystack-specific value ("tool_call_results") +FinishReason = Literal["stop", "length", "tool_calls", "content_filter", "tool_call_results"] @dataclass @@ -74,8 +74,6 @@ class StreamingChunk: :param content: The content of the message chunk as a string. :param meta: A dictionary containing metadata related to the message chunk. - NOTE: Both 'finish_reason' field and meta['finish_reason'] are supported for flexibility. - The dedicated field provides better type safety and IDE support. :param component_info: A `ComponentInfo` object containing information about the component that generated the chunk, such as the component name and type. :param index: An optional integer index representing which content block this chunk belongs to. @@ -85,7 +83,7 @@ class StreamingChunk: :param start: A boolean indicating whether this chunk marks the start of a content block. :param finish_reason: An optional value indicating the reason the generation finished. Standard values follow OpenAI's convention: "stop", "length", "tool_calls", "content_filter", - plus Haystack-specific values like "tool_call_results". + plus Haystack-specific value "tool_call_results". """ content: str diff --git a/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml index b48e045ff8..fdceffe5ee 100644 --- a/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml +++ b/releasenotes/notes/add-finish-reason-field-streaming-chunk-89828ec09c6e6385.yaml @@ -1,6 +1,6 @@ --- features: - | - Added dedicated `finish_reason` field to `StreamingChunk` class to improve type safety and enable sophisticated streaming UI logic. The field uses a `FinishReason` type alias with standard values: "stop", "length", "tool_calls", "content_filter", plus Haystack-specific values like "tool_call_results" (used by ToolInvoker to indicate tool execution completion). Both the dedicated field and legacy meta access (`chunk.meta["finish_reason"]`) are permanently supported for maximum flexibility. Users can choose either approach based on their preferences, with the dedicated field providing better type safety and IDE support. + Added dedicated `finish_reason` field to `StreamingChunk` class to improve type safety and enable sophisticated streaming UI logic. The field uses a `FinishReason` type alias with standard values: "stop", "length", "tool_calls", "content_filter", plus Haystack-specific value "tool_call_results" (used by ToolInvoker to indicate tool execution completion). - | Updated `ToolInvoker` component to use the new `finish_reason` field when streaming tool results. The component now sets `finish_reason="tool_call_results"` in the final streaming chunk to indicate that tool execution has completed, while maintaining backward compatibility by also setting the value in `meta["finish_reason"]`. diff --git a/test/components/generators/chat/test_hugging_face_api.py b/test/components/generators/chat/test_hugging_face_api.py index a4db4da2b4..5a8be6c3c5 100644 --- a/test/components/generators/chat/test_hugging_face_api.py +++ b/test/components/generators/chat/test_hugging_face_api.py @@ -687,7 +687,6 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self): }, index=0, start=True, - finish_reason=None, ), [], ), @@ -732,7 +731,6 @@ def test_convert_hfapi_tool_calls_invalid_type_arguments(self): "model": "microsoft/Phi-3.5-mini-instruct", "usage": {"completion_tokens": 2, "prompt_tokens": 21}, }, - finish_reason=None, ), [0, 1], ), diff --git a/test/components/generators/chat/test_openai.py b/test/components/generators/chat/test_openai.py index 3fdd53bb34..dfbb2a39d5 100644 --- a/test/components/generators/chat/test_openai.py +++ b/test/components/generators/chat/test_openai.py @@ -986,7 +986,6 @@ def streaming_chunks(): "received_at": ANY, "usage": None, }, - finish_reason=None, ), StreamingChunk( content="", @@ -1008,7 +1007,6 @@ def streaming_chunks(): index=0, tool_calls=[ToolCallDelta(tool_name="weather", id="call_zcvlnVaTeJWRjLAFfYxX69z4", index=0)], start=True, - finish_reason=None, ), StreamingChunk( content="", @@ -1022,7 +1020,6 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='{"ci', index=0)], - finish_reason=None, ), StreamingChunk( content="", @@ -1036,7 +1033,6 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='ty": ', index=0)], - finish_reason=None, ), StreamingChunk( content="", @@ -1050,7 +1046,6 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='"Paris', index=0)], - finish_reason=None, ), StreamingChunk( content="", @@ -1064,7 +1059,6 @@ def streaming_chunks(): }, index=0, tool_calls=[ToolCallDelta(arguments='"}', index=0)], - finish_reason=None, ), StreamingChunk( content="", @@ -1086,7 +1080,6 @@ def streaming_chunks(): index=1, tool_calls=[ToolCallDelta(tool_name="weather", id="call_C88m67V16CrETq6jbNXjdZI9", index=1)], start=True, - finish_reason=None, ), StreamingChunk( content="", @@ -1100,7 +1093,6 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='{"ci', index=1)], - finish_reason=None, ), StreamingChunk( content="", @@ -1114,7 +1106,6 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='ty": ', index=1)], - finish_reason=None, ), StreamingChunk( content="", @@ -1128,7 +1119,6 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='"Berli', index=1)], - finish_reason=None, ), StreamingChunk( content="", @@ -1142,7 +1132,6 @@ def streaming_chunks(): }, index=1, tool_calls=[ToolCallDelta(arguments='n"}', index=1)], - finish_reason=None, ), StreamingChunk( content="", @@ -1174,7 +1163,6 @@ def streaming_chunks(): "prompt_tokens_details": {"audio_tokens": 0, "cached_tokens": 0}, }, }, - finish_reason=None, ), ]