Skip to content

Commit a89aac3

Browse files
authored
Merge branch 'main' into add-perplexity-search
2 parents 9a683c9 + 9c47f1e commit a89aac3

11 files changed

Lines changed: 383 additions & 5 deletions

File tree

.github/workflows/docling_serve.yml

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ jobs:
4242
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
4343

4444
- name: Set up Python ${{ matrix.python-version }}
45-
uses: actions/setup-python@a26af69be951a213d495a4c3e4e4022e16d87065 # v5.6.0
45+
uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0
4646
with:
4747
python-version: ${{ matrix.python-version }}
4848

@@ -59,7 +59,7 @@ jobs:
5959
- name: Store unit tests coverage
6060
id: coverage_comment
6161
if: matrix.python-version == '3.10' && runner.os == 'Linux' && github.event_name != 'schedule'
62-
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
62+
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
6363
with:
6464
GITHUB_TOKEN: ${{ github.token }}
6565
COVERAGE_PATH: integrations/docling_serve
@@ -97,7 +97,7 @@ jobs:
9797

9898
- name: Store combined coverage
9999
if: github.event_name == 'push'
100-
uses: py-cov-action/python-coverage-comment-action@7188638f871f721a365d644f505d1ff3df20d683 # v3.40
100+
uses: py-cov-action/python-coverage-comment-action@63f52f4fbbffada6e8dee8ec432de7e01df9ba79 # v3.41
101101
with:
102102
GITHUB_TOKEN: ${{ github.token }}
103103
COVERAGE_PATH: integrations/docling_serve

integrations/amazon_bedrock/CHANGELOG.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,19 @@
11
# Changelog
22

3+
## [integrations/amazon_bedrock-v6.10.0] - 2026-05-12
4+
5+
### 🚀 Features
6+
7+
- Support adaptive_thinking_effort flat param for Claude (#3297)
8+
9+
10+
## [integrations/amazon_bedrock-v6.9.0] - 2026-05-11
11+
12+
### 🐛 Bug Fixes
13+
14+
- Normalize token usage conversion in AmazonBedrockGenerator (#3247)
15+
16+
317
## [integrations/amazon_bedrock-v6.8.2] - 2026-05-06
418

519
### 🐛 Bug Fixes

integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/adapters.py

Lines changed: 125 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,86 @@
55
from botocore.eventstream import EventStream
66
from haystack.dataclasses import StreamingChunk, SyncStreamingCallbackT
77

8+
_USAGE_HEADER_MAP = {
9+
"input_tokens": "x-amzn-bedrock-input-token-count",
10+
"output_tokens": "x-amzn-bedrock-output-token-count",
11+
"cache_read_input_tokens": "x-amzn-bedrock-cache-read-input-token-count",
12+
"cache_write_input_tokens": "x-amzn-bedrock-cache-write-input-token-count",
13+
}
14+
15+
_USAGE_FIELD_MAP = {
16+
"input_tokens": "input_tokens",
17+
"output_tokens": "output_tokens",
18+
"cache_read_input_tokens": "cache_read_input_tokens",
19+
"cache_write_input_tokens": "cache_creation_input_tokens",
20+
}
21+
22+
23+
def _set_usage_value(usage: dict[str, int], key: str, value: Any) -> None:
24+
"""
25+
Sets a usage value coerced to int, ignoring values that are None or not int-convertible.
26+
27+
:param usage: The usage dictionary to update in place.
28+
:param key: The destination key.
29+
:param value: The raw value to coerce and store.
30+
"""
31+
if value is None:
32+
return
33+
try:
34+
usage[key] = int(value)
35+
except (TypeError, ValueError):
36+
return
37+
38+
39+
def _apply_usage(usage: dict[str, int], source: dict[str, Any], field_map: dict[str, str]) -> None:
40+
"""
41+
Copies usage values from a source dictionary into the usage dictionary using the given field map.
42+
43+
:param usage: The usage dictionary to update in place.
44+
:param source: The source dictionary holding raw usage values.
45+
:param field_map: A mapping from destination key to source key.
46+
"""
47+
for dst, src in field_map.items():
48+
_set_usage_value(usage, dst, source.get(src))
49+
50+
51+
def _usage_from_response_metadata(metadata: dict[str, Any]) -> dict[str, int]:
52+
"""
53+
Extracts normalized token usage from Bedrock InvokeModel ResponseMetadata HTTP headers.
54+
55+
:param metadata: The Bedrock response metadata dictionary.
56+
:returns: A normalized usage dictionary, or an empty dictionary when no usage headers are present.
57+
"""
58+
headers = metadata.get("HTTPHeaders") or metadata.get("http_headers") or {}
59+
if not isinstance(headers, dict):
60+
return {}
61+
62+
normalized_headers = {str(key).lower(): value for key, value in headers.items()}
63+
usage: dict[str, int] = {}
64+
_apply_usage(usage, normalized_headers, _USAGE_HEADER_MAP)
65+
return usage
66+
67+
68+
def _merge_usage(metadata: dict[str, Any], usage: dict[str, int]) -> None:
69+
"""
70+
Merges a usage dictionary into the metadata under the ``usage`` key.
71+
72+
Recomputes ``total_tokens`` after merging when both ``input_tokens`` and ``output_tokens``
73+
are present, so partial usage from multiple sources is summed correctly.
74+
75+
:param metadata: The metadata dictionary to update in place.
76+
:param usage: The normalized usage dictionary to merge in.
77+
"""
78+
if not usage:
79+
return
80+
81+
existing_usage = metadata.get("usage")
82+
base = existing_usage if isinstance(existing_usage, dict) else {}
83+
merged_usage = {**base, **usage}
84+
if "input_tokens" in merged_usage and "output_tokens" in merged_usage:
85+
merged_usage["total_tokens"] = merged_usage["input_tokens"] + merged_usage["output_tokens"]
86+
metadata["usage"] = merged_usage
87+
888

989
class BedrockModelAdapter(ABC):
1090
"""
@@ -54,6 +134,20 @@ def get_stream_responses(self, stream: EventStream, streaming_callback: SyncStre
54134
:param streaming_callback: The handler for the streaming response.
55135
:returns: A list of string responses.
56136
"""
137+
responses, _ = self.get_stream_responses_and_metadata(stream, streaming_callback)
138+
return responses
139+
140+
def get_stream_responses_and_metadata(
141+
self, stream: EventStream, streaming_callback: SyncStreamingCallbackT
142+
) -> tuple[list[str], dict[str, Any]]:
143+
"""
144+
Extracts both the responses and normalized metadata from the Amazon Bedrock streaming response.
145+
146+
:param stream: The streaming response from the Amazon Bedrock request.
147+
:param streaming_callback: The handler for the streaming response.
148+
:returns: A tuple of ``(responses, metadata)`` where ``responses`` is a list of string
149+
responses and ``metadata`` is a dictionary that may contain a normalized ``usage`` block.
150+
"""
57151
streaming_chunks: list[StreamingChunk] = []
58152
for event in stream:
59153
chunk = event.get("chunk")
@@ -64,7 +158,37 @@ def get_stream_responses(self, stream: EventStream, streaming_callback: SyncStre
64158
streaming_callback(streaming_chunk)
65159

66160
responses = ["".join(streaming_chunk.content for streaming_chunk in streaming_chunks).lstrip()]
67-
return responses
161+
metadata = self._extract_streaming_metadata(streaming_chunks)
162+
return responses, metadata
163+
164+
def _extract_streaming_metadata(self, streaming_chunks: list[StreamingChunk]) -> dict[str, Any]:
165+
"""
166+
Extracts normalized metadata from Bedrock streaming chunks.
167+
168+
The default implementation handles Anthropic Claude Messages API stream events, which
169+
expose input usage in ``message_start.message.usage`` and output usage in
170+
``message_delta.usage``.
171+
172+
:param streaming_chunks: The streaming chunks emitted during the response.
173+
:returns: A metadata dictionary with a ``usage`` block, or an empty dictionary when no
174+
usage information is present.
175+
"""
176+
usage: dict[str, int] = {}
177+
178+
for streaming_chunk in streaming_chunks:
179+
meta = streaming_chunk.meta
180+
if not isinstance(meta, dict):
181+
continue
182+
message = meta.get("message")
183+
chunk_usage = meta.get("usage")
184+
if message is None and chunk_usage is None:
185+
continue
186+
if isinstance(message, dict) and isinstance(message.get("usage"), dict):
187+
_apply_usage(usage, message["usage"], _USAGE_FIELD_MAP)
188+
if isinstance(chunk_usage, dict):
189+
_apply_usage(usage, chunk_usage, _USAGE_FIELD_MAP)
190+
191+
return {"usage": usage} if usage else {}
68192

69193
def _get_params(self, inference_kwargs: dict[str, Any], default_params: dict[str, Any]) -> dict[str, Any]:
70194
"""

integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -508,6 +508,13 @@ def _resolve_flattened_generation_kwargs(generation_kwargs: dict[str, Any]) -> d
508508
thinking["budget_tokens"] = thinking_budget_tokens
509509
thinking.setdefault("type", "enabled")
510510

511+
adaptive_thinking_effort = generation_kwargs.pop("adaptive_thinking_effort", None)
512+
if adaptive_thinking_effort is not None:
513+
thinking = generation_kwargs.setdefault("thinking", {})
514+
thinking.setdefault("type", "adaptive")
515+
output_config = generation_kwargs.setdefault("output_config", {})
516+
output_config["effort"] = adaptive_thinking_effort
517+
511518
return generation_kwargs
512519

513520
@component.output_types(replies=list[ChatMessage])

integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/generator.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
CohereCommandRAdapter,
2626
MetaLlamaAdapter,
2727
MistralAdapter,
28+
_merge_usage,
29+
_usage_from_response_metadata,
2830
)
2931

3032
logger = logging.getLogger(__name__)
@@ -215,6 +217,7 @@ def run(
215217
generation_kwargs["stream"] = streaming_callback is not None
216218

217219
body = self.model_adapter.prepare_body(prompt=prompt, **generation_kwargs)
220+
stream_metadata: dict[str, Any] = {}
218221
try:
219222
if streaming_callback:
220223
response = self.client.invoke_model_with_response_stream(
@@ -224,7 +227,7 @@ def run(
224227
contentType="application/json",
225228
)
226229
response_stream = response["body"]
227-
replies = self.model_adapter.get_stream_responses(
230+
replies, stream_metadata = self.model_adapter.get_stream_responses_and_metadata(
228231
stream=response_stream, streaming_callback=streaming_callback
229232
)
230233
else:
@@ -238,6 +241,8 @@ def run(
238241
replies = self.model_adapter.get_responses(response_body=response_body)
239242

240243
metadata = response.get("ResponseMetadata", {})
244+
_merge_usage(metadata, _usage_from_response_metadata(metadata))
245+
_merge_usage(metadata, stream_metadata.get("usage", {}))
241246

242247
except ClientError as exception:
243248
msg = f"Could not perform inference for Amazon Bedrock model {self.model} due to:\n{exception}"

integrations/amazon_bedrock/tests/test_chat_generator.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -628,9 +628,19 @@ def tool_fn(city: str) -> str:
628628
"thinking_budget_tokens": None,
629629
"parallel_tool_use": None,
630630
"tool_choice_type": None,
631+
"adaptive_thinking_effort": None,
631632
},
632633
{},
633634
),
635+
(
636+
{
637+
"adaptive_thinking_effort": "max",
638+
},
639+
{
640+
"thinking": {"type": "adaptive"},
641+
"output_config": {"effort": "max"},
642+
},
643+
),
634644
],
635645
)
636646
def test_prepare_request_params_with_flattened_generation_kwargs(

0 commit comments

Comments
 (0)