Skip to content

Commit 1413474

Browse files
Merge branch 'main' into chore/removing-duplicated-standardised-tests
2 parents 8621761 + a630ac4 commit 1413474

5 files changed

Lines changed: 313 additions & 36 deletions

File tree

integrations/amazon_bedrock/CHANGELOG.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,16 @@
11
# Changelog
22

3+
## [integrations/amazon_bedrock-v6.4.0] - 2026-02-05
4+
5+
### 🚀 Features
6+
7+
- Bedrock - support prompt caching (#2796)
8+
9+
### 🧹 Chores
10+
11+
- *(amazon_bedrock)* Simplify Secret (de-)serialization (#2808)
12+
13+
314
## [integrations/amazon_bedrock-v6.3.0] - 2026-01-28
415

516
### 🌀 Miscellaneous

integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py

Lines changed: 36 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
_parse_completion_response,
2828
_parse_streaming_response,
2929
_parse_streaming_response_async,
30+
_validate_and_format_cache_point,
3031
_validate_guardrail_config,
3132
)
3233

@@ -41,7 +42,7 @@ class AmazonBedrockChatGenerator:
4142
For example, to use the Anthropic Claude 3 Sonnet model, initialize this component with the
4243
'anthropic.claude-3-5-sonnet-20240620-v1:0' model name.
4344
44-
### Usage example
45+
**Usage example**
4546
4647
```python
4748
from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator
@@ -57,7 +58,8 @@ class AmazonBedrockChatGenerator:
5758
client.run(messages, generation_kwargs={"max_tokens": 512})
5859
```
5960
60-
### Multimodal example
61+
**Multimodal example**
62+
6163
```python
6264
from haystack.dataclasses import ChatMessage, ImageContent
6365
from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator
@@ -72,11 +74,13 @@ class AmazonBedrockChatGenerator:
7274
7375
print(response)
7476
> The image shows a red apple.
77+
```
78+
79+
**Tool usage example**
7580
76-
### Tool usage example
77-
# AmazonBedrockChatGenerator supports Haystack's unified tool architecture, allowing tools to be used
78-
# across different chat generators. The same tool definitions and usage patterns work consistently
79-
# whether using Amazon Bedrock, OpenAI, Ollama, or any other supported LLM providers.
81+
AmazonBedrockChatGenerator supports Haystack's unified tool architecture, allowing tools to be used
82+
across different chat generators. The same tool definitions and usage patterns work consistently
83+
whether using Amazon Bedrock, OpenAI, Ollama, or any other supported LLM providers.
8084
8185
```python
8286
from haystack.dataclasses import ChatMessage
@@ -129,18 +133,31 @@ def weather(city: str):
129133
130134
> Based on the information I've received, I can tell you that the weather in Paris is
131135
> currently sunny with a temperature of 32°C (which is about 90°F).
136+
```
137+
138+
**Prompt caching**
139+
140+
This component supports prompt caching. You can use the `tools_cachepoint_config` parameter to configure the cache
141+
point for tools.
142+
To cache messages, you can use the `cachePoint` key in `ChatMessage.meta` attribute.
132143
144+
```python
145+
ChatMessage.from_user("Long message...", meta={"cachePoint": {"type": "default"}})
133146
```
134147
148+
For more information, see the [Amazon Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html).
149+
150+
**Authentication**
151+
135152
AmazonBedrockChatGenerator uses AWS for authentication. You can use the AWS CLI to authenticate through your IAM.
136153
For more information on setting up an IAM identity-based policy, see [Amazon Bedrock documentation]
137154
(https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html).
138155
139156
If the AWS environment is configured correctly, the AWS credentials are not required as they're loaded
140157
automatically from the environment or the AWS configuration file.
141158
If the AWS environment is not configured, set `aws_access_key_id`, `aws_secret_access_key`,
142-
and `aws_region_name` as environment variables or pass them as
143-
[Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
159+
and `aws_region_name` as environment variables or pass them as
160+
[Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
144161
supports Amazon Bedrock.
145162
"""
146163

@@ -160,6 +177,7 @@ def __init__(
160177
tools: ToolsType | None = None,
161178
*,
162179
guardrail_config: dict[str, str] | None = None,
180+
tools_cachepoint_config: dict[str, str] | None = None,
163181
) -> None:
164182
"""
165183
Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the
@@ -201,6 +219,10 @@ def __init__(
201219
See the
202220
[Guardrails Streaming documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-streaming.html)
203221
for more information.
222+
:param tools_cachepoint_config: Optional configuration to use prompt caching for tools.
223+
The dictionary must match the
224+
[CachePointBlock schema](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html).
225+
Example: `{"type": "default", "ttl": "5m"}`
204226
205227
206228
:raises ValueError: If the model name is empty or None.
@@ -225,6 +247,10 @@ def __init__(
225247
_validate_guardrail_config(guardrail_config=guardrail_config, streaming=streaming_callback is not None)
226248
self.guardrail_config = guardrail_config
227249

250+
self.tools_cachepoint_config = (
251+
_validate_and_format_cache_point(tools_cachepoint_config) if tools_cachepoint_config else None
252+
)
253+
228254
def resolve_secret(secret: Secret | None) -> str | None:
229255
return secret.resolve_value() if secret else None
230256

@@ -310,6 +336,7 @@ def to_dict(self) -> dict[str, Any]:
310336
boto3_config=self.boto3_config,
311337
tools=serialize_tools_or_toolset(self.tools),
312338
guardrail_config=self.guardrail_config,
339+
tools_cachepoint_config=self.tools_cachepoint_config,
313340
)
314341

315342
@classmethod
@@ -385,7 +412,7 @@ def _prepare_request_params(
385412
tool_config = merged_kwargs.pop("toolConfig", None)
386413
if flattened_tools:
387414
# Format Haystack tools to Bedrock format
388-
tool_config = _format_tools(flattened_tools)
415+
tool_config = _format_tools(flattened_tools, tools_cachepoint_config=self.tools_cachepoint_config)
389416

390417
# Any remaining kwargs go to additionalModelRequestFields
391418
additional_fields = merged_kwargs if merged_kwargs else None

integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py

Lines changed: 61 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@
4040

4141

4242
# Haystack to Bedrock util methods
43-
def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
43+
def _format_tools(
44+
tools: list[Tool] | None = None, tools_cachepoint_config: dict[str, dict[str, str]] | None = None
45+
) -> dict[str, Any] | None:
4446
"""
4547
Format Haystack Tool(s) to Amazon Bedrock toolConfig format.
4648
@@ -51,13 +53,16 @@ def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
5153
if not tools:
5254
return None
5355

54-
tool_specs = []
56+
tool_specs: list[dict[str, Any]] = []
5557
for tool in tools:
5658
tool_specs.append(
5759
{"toolSpec": {"name": tool.name, "description": tool.description, "inputSchema": {"json": tool.parameters}}}
5860
)
5961

60-
return {"tools": tool_specs} if tool_specs else None
62+
if tools_cachepoint_config:
63+
tool_specs.append({"cachePoint": tools_cachepoint_config})
64+
65+
return {"tools": tool_specs}
6166

6267

6368
def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dict[str, Any]:
@@ -181,20 +186,23 @@ def _repair_tool_result_messages(bedrock_formatted_messages: list[dict[str, Any]
181186
original_idx = None
182187
for tool_call_id in tool_call_ids:
183188
for idx, tool_result in tool_result_messages:
184-
tool_result_contents = [c for c in tool_result["content"] if "toolResult" in c]
189+
tool_result_contents = [c for c in tool_result["content"] if "toolResult" in c or "cachePoint" in c]
185190
for content in tool_result_contents:
186-
if content["toolResult"]["toolUseId"] == tool_call_id:
191+
if "toolResult" in content and content["toolResult"]["toolUseId"] == tool_call_id:
187192
regrouped_tool_result.append(content)
188193
# Keep track of the original index of the last tool result message
189194
original_idx = idx
195+
elif "cachePoint" in content and content not in regrouped_tool_result:
196+
regrouped_tool_result.append(content)
197+
190198
if regrouped_tool_result and original_idx is not None:
191199
repaired_tool_result_prompts.append((original_idx, {"role": "user", "content": regrouped_tool_result}))
192200

193201
# Remove the tool result messages from bedrock_formatted_messages
194202
bedrock_formatted_messages_minus_tool_results: list[tuple[int, Any]] = []
195203
for idx, msg in enumerate(bedrock_formatted_messages):
196-
# Assumes the content of tool result messages only contains 'toolResult': {...} objects (e.g. no 'text')
197-
if msg.get("content") and "toolResult" not in msg["content"][0]:
204+
# Filter out messages that contain toolResult (they are handled by repaired_tool_result_prompts)
205+
if msg.get("content") and not any("toolResult" in c for c in msg["content"]):
198206
bedrock_formatted_messages_minus_tool_results.append((idx, msg))
199207

200208
# Add the repaired tool result messages and sort to maintain the correct order
@@ -251,6 +259,32 @@ def _format_text_image_message(message: ChatMessage) -> dict[str, Any]:
251259
return {"role": message.role.value, "content": bedrock_content_blocks}
252260

253261

262+
def _validate_and_format_cache_point(cache_point: dict[str, str] | None) -> dict[str, dict[str, str]] | None:
263+
"""
264+
Validate and format a cache point dictionary.
265+
266+
Schema available at https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
267+
268+
:param cache_point: Cache point dictionary to validate and format.
269+
:returns: Dictionary in Bedrock cachePoint format or None if no cache point is provided.
270+
:raises ValueError: If cache point is not valid.
271+
"""
272+
if not cache_point:
273+
return None
274+
275+
if "type" not in cache_point or cache_point["type"] != "default":
276+
err_msg = "Cache point must have a 'type' key with value 'default'."
277+
raise ValueError(err_msg)
278+
if not set(cache_point).issubset({"type", "ttl"}):
279+
err_msg = "Cache point can only contain 'type' and 'ttl' keys."
280+
raise ValueError(err_msg)
281+
if "ttl" in cache_point and cache_point["ttl"] not in ("5m", "1h"):
282+
err_msg = "Cache point 'ttl' must be one of '5m', '1h'."
283+
raise ValueError(err_msg)
284+
285+
return {"cachePoint": cache_point}
286+
287+
254288
def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
255289
"""
256290
Format a list of Haystack ChatMessages to the format expected by Bedrock API.
@@ -264,21 +298,30 @@ def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]],
264298
non_system_messages is a list of properly formatted message dictionaries.
265299
"""
266300
# Separate system messages, tool calls, and tool results
267-
system_prompts = []
301+
system_prompts: list[dict[str, Any]] = []
268302
bedrock_formatted_messages = []
269303
for msg in messages:
304+
cache_point = _validate_and_format_cache_point(msg.meta.get("cachePoint"))
270305
if msg.is_from(ChatRole.SYSTEM):
271306
# Assuming system messages can only contain text
272307
# Don't need to track idx since system_messages are handled separately
273308
system_prompts.append({"text": msg.text})
274-
elif msg.tool_calls:
275-
bedrock_formatted_messages.append(_format_tool_call_message(msg))
309+
if cache_point:
310+
system_prompts.append(cache_point)
311+
continue
312+
313+
if msg.tool_calls:
314+
formatted_msg = _format_tool_call_message(msg)
276315
elif msg.tool_call_results:
277-
bedrock_formatted_messages.append(_format_tool_result_message(msg))
316+
formatted_msg = _format_tool_result_message(msg)
278317
else:
279-
bedrock_formatted_messages.append(_format_text_image_message(msg))
318+
formatted_msg = _format_text_image_message(msg)
319+
if cache_point:
320+
formatted_msg["content"].append(cache_point)
321+
bedrock_formatted_messages.append(formatted_msg)
280322

281323
repaired_bedrock_formatted_messages = _repair_tool_result_messages(bedrock_formatted_messages)
324+
282325
return system_prompts, repaired_bedrock_formatted_messages
283326

284327

@@ -310,6 +353,9 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
310353
"prompt_tokens": response_body.get("usage", {}).get("inputTokens", 0),
311354
"completion_tokens": response_body.get("usage", {}).get("outputTokens", 0),
312355
"total_tokens": response_body.get("usage", {}).get("totalTokens", 0),
356+
"cache_read_input_tokens": response_body.get("usage", {}).get("cacheReadInputTokens", 0),
357+
"cache_write_input_tokens": response_body.get("usage", {}).get("cacheWriteInputTokens", 0),
358+
"cache_details": response_body.get("usage", {}).get("CacheDetails", {}),
313359
},
314360
}
315361
# guardrail trace
@@ -461,6 +507,9 @@ def _convert_event_to_streaming_chunk(
461507
"prompt_tokens": usage.get("inputTokens", 0),
462508
"completion_tokens": usage.get("outputTokens", 0),
463509
"total_tokens": usage.get("totalTokens", 0),
510+
"cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
511+
"cache_write_input_tokens": usage.get("cacheWriteInputTokens", 0),
512+
"cache_details": usage.get("cacheDetails", {}),
464513
}
465514
if "trace" in event_meta:
466515
chunk_meta["trace"] = event_meta["trace"]

integrations/amazon_bedrock/tests/test_chat_generator.py

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,10 @@
3939
"us.anthropic.claude-sonnet-4-20250514-v1:0",
4040
]
4141

42+
MODELS_TO_TEST_WITH_PROMPT_CACHING = [
43+
"amazon.nova-micro-v1:0" # cheap, fast model
44+
]
45+
4246

4347
def hello_world():
4448
return "Hello, World!"
@@ -164,6 +168,7 @@ def test_to_dict(self, mock_boto3_session, boto3_config):
164168
"boto3_config": boto3_config,
165169
"tools": None,
166170
"guardrail_config": {"guardrailIdentifier": "test", "guardrailVersion": "test"},
171+
"tools_cachepoint_config": None,
167172
},
168173
}
169174

@@ -298,6 +303,7 @@ def test_serde_in_pipeline(self, mock_boto3_session, monkeypatch):
298303
}
299304
],
300305
"guardrail_config": None,
306+
"tools_cachepoint_config": None,
301307
},
302308
}
303309
},
@@ -945,6 +951,28 @@ def test_live_run_with_guardrail(self, streaming_callback):
945951
assert "trace" in results["replies"][0].meta
946952
assert "guardrail" in results["replies"][0].meta["trace"]
947953

954+
@pytest.mark.parametrize("streaming_callback", [None, print_streaming_chunk])
955+
@pytest.mark.parametrize("model_name", MODELS_TO_TEST_WITH_PROMPT_CACHING)
956+
def test_prompt_caching_live_run_with_user_message(self, model_name, streaming_callback):
957+
generator = AmazonBedrockChatGenerator(model=model_name, streaming_callback=streaming_callback)
958+
959+
system_message = ChatMessage.from_system("Always respond with: 'Life is beautiful' (and nothing else).")
960+
961+
user_message = ChatMessage.from_user(
962+
"User message that should be long enough to cache. " * 100, meta={"cachePoint": {"type": "default"}}
963+
)
964+
messages = [system_message, user_message]
965+
result = generator.run(messages=messages)
966+
967+
assert "replies" in result
968+
assert len(result["replies"]) == 1
969+
usage = result["replies"][0].meta["usage"]
970+
971+
# tests run in parallel based on the workflow matrix, so this request should either hit the cache (read tokens)
972+
# or populate it (write tokens)
973+
assert usage["cache_read_input_tokens"] > 1000 or usage["cache_write_input_tokens"] > 1000
974+
assert "cache_details" in usage
975+
948976
@pytest.mark.parametrize("model_name", [MODELS_TO_TEST_WITH_TOOLS[0]]) # just one model is enough
949977
def test_pipeline_with_amazon_bedrock_chat_generator(self, model_name, tools):
950978
"""

0 commit comments

Comments
 (0)