Merge branch 'main' into chore/removing-duplicated-standardised-tests

davidsbatista · web-flow · commit 1413474bddfc · 2026-02-05T11:39:41.000Z
diff --git a/integrations/amazon_bedrock/CHANGELOG.md b/integrations/amazon_bedrock/CHANGELOG.md
@@ -1,5 +1,16 @@
 # Changelog
 
+## [integrations/amazon_bedrock-v6.4.0] - 2026-02-05
+
+### 🚀 Features
+
+- Bedrock - support prompt caching (#2796)
+
+### 🧹 Chores
+
+- *(amazon_bedrock)* Simplify Secret (de-)serialization (#2808)
+
+
 ## [integrations/amazon_bedrock-v6.3.0] - 2026-01-28
 
 ### 🌀 Miscellaneous
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/chat_generator.py
@@ -27,6 +27,7 @@
     _parse_completion_response,
     _parse_streaming_response,
     _parse_streaming_response_async,
+    _validate_and_format_cache_point,
     _validate_guardrail_config,
 )
 
@@ -41,7 +42,7 @@ class AmazonBedrockChatGenerator:
     For example, to use the Anthropic Claude 3 Sonnet model, initialize this component with the
     'anthropic.claude-3-5-sonnet-20240620-v1:0' model name.
 
-    ### Usage example
+    **Usage example**
 
     ```python
     from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator
@@ -57,7 +58,8 @@ class AmazonBedrockChatGenerator:
     client.run(messages, generation_kwargs={"max_tokens": 512})
     ```
 
-    ### Multimodal example
+    **Multimodal example**
+
     ```python
     from haystack.dataclasses import ChatMessage, ImageContent
     from haystack_integrations.components.generators.amazon_bedrock import AmazonBedrockChatGenerator
@@ -72,11 +74,13 @@ class AmazonBedrockChatGenerator:
 
     print(response)
     > The image shows a red apple.
+    ```
+
+    **Tool usage example**
 
-    ### Tool usage example
-    # AmazonBedrockChatGenerator supports Haystack's unified tool architecture, allowing tools to be used
-    # across different chat generators. The same tool definitions and usage patterns work consistently
-    # whether using Amazon Bedrock, OpenAI, Ollama, or any other supported LLM providers.
+    AmazonBedrockChatGenerator supports Haystack's unified tool architecture, allowing tools to be used
+    across different chat generators. The same tool definitions and usage patterns work consistently
+    whether using Amazon Bedrock, OpenAI, Ollama, or any other supported LLM providers.
 
     ```python
     from haystack.dataclasses import ChatMessage
@@ -129,18 +133,31 @@ def weather(city: str):
 
     > Based on the information I've received, I can tell you that the weather in Paris is
     > currently sunny with a temperature of 32°C (which is about 90°F).
+    ```
+
+    **Prompt caching**
+
+    This component supports prompt caching. You can use the `tools_cachepoint_config` parameter to configure the cache
+    point for tools.
+    To cache messages, you can use the `cachePoint` key in `ChatMessage.meta` attribute.
 
+    ```python
+    ChatMessage.from_user("Long message...", meta={"cachePoint": {"type": "default"}})
     ```
 
+    For more information, see the [Amazon Bedrock documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html).
+
+    **Authentication**
+
     AmazonBedrockChatGenerator uses AWS for authentication. You can use the AWS CLI to authenticate through your IAM.
     For more information on setting up an IAM identity-based policy, see [Amazon Bedrock documentation]
     (https://docs.aws.amazon.com/bedrock/latest/userguide/security_iam_id-based-policy-examples.html).
 
     If the AWS environment is configured correctly, the AWS credentials are not required as they're loaded
     automatically from the environment or the AWS configuration file.
     If the AWS environment is not configured, set `aws_access_key_id`, `aws_secret_access_key`,
-      and `aws_region_name` as environment variables or pass them as
-     [Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
+    and `aws_region_name` as environment variables or pass them as
+    [Secret](https://docs.haystack.deepset.ai/docs/secret-management) arguments. Make sure the region you set
     supports Amazon Bedrock.
     """
 
@@ -160,6 +177,7 @@ def __init__(
         tools: ToolsType | None = None,
         *,
         guardrail_config: dict[str, str] | None = None,
+        tools_cachepoint_config: dict[str, str] | None = None,
     ) -> None:
         """
         Initializes the `AmazonBedrockChatGenerator` with the provided parameters. The parameters are passed to the
@@ -201,6 +219,10 @@ def __init__(
             See the
             [Guardrails Streaming documentation](https://docs.aws.amazon.com/bedrock/latest/userguide/guardrails-streaming.html)
             for more information.
+        :param tools_cachepoint_config: Optional configuration to use prompt caching for tools.
+            The dictionary must match the
+            [CachePointBlock schema](https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html).
+            Example: `{"type": "default", "ttl": "5m"}`
 
 
         :raises ValueError: If the model name is empty or None.
@@ -225,6 +247,10 @@ def __init__(
         _validate_guardrail_config(guardrail_config=guardrail_config, streaming=streaming_callback is not None)
         self.guardrail_config = guardrail_config
 
+        self.tools_cachepoint_config = (
+            _validate_and_format_cache_point(tools_cachepoint_config) if tools_cachepoint_config else None
+        )
+
         def resolve_secret(secret: Secret | None) -> str | None:
             return secret.resolve_value() if secret else None
 
@@ -310,6 +336,7 @@ def to_dict(self) -> dict[str, Any]:
             boto3_config=self.boto3_config,
             tools=serialize_tools_or_toolset(self.tools),
             guardrail_config=self.guardrail_config,
+            tools_cachepoint_config=self.tools_cachepoint_config,
         )
 
     @classmethod
@@ -385,7 +412,7 @@ def _prepare_request_params(
         tool_config = merged_kwargs.pop("toolConfig", None)
         if flattened_tools:
             # Format Haystack tools to Bedrock format
-            tool_config = _format_tools(flattened_tools)
+            tool_config = _format_tools(flattened_tools, tools_cachepoint_config=self.tools_cachepoint_config)
 
         # Any remaining kwargs go to additionalModelRequestFields
         additional_fields = merged_kwargs if merged_kwargs else None
diff --git a/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py b/integrations/amazon_bedrock/src/haystack_integrations/components/generators/amazon_bedrock/chat/utils.py
@@ -40,7 +40,9 @@
 
 
 # Haystack to Bedrock util methods
-def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
+def _format_tools(
+    tools: list[Tool] | None = None, tools_cachepoint_config: dict[str, dict[str, str]] | None = None
+) -> dict[str, Any] | None:
     """
     Format Haystack Tool(s) to Amazon Bedrock toolConfig format.
 
@@ -51,13 +53,16 @@ def _format_tools(tools: list[Tool] | None = None) -> dict[str, Any] | None:
     if not tools:
         return None
 
-    tool_specs = []
+    tool_specs: list[dict[str, Any]] = []
     for tool in tools:
         tool_specs.append(
             {"toolSpec": {"name": tool.name, "description": tool.description, "inputSchema": {"json": tool.parameters}}}
         )
 
-    return {"tools": tool_specs} if tool_specs else None
+    if tools_cachepoint_config:
+        tool_specs.append({"cachePoint": tools_cachepoint_config})
+
+    return {"tools": tool_specs}
 
 
 def _convert_image_content_to_bedrock_format(image_content: ImageContent) -> dict[str, Any]:
@@ -181,20 +186,23 @@ def _repair_tool_result_messages(bedrock_formatted_messages: list[dict[str, Any]
         original_idx = None
         for tool_call_id in tool_call_ids:
             for idx, tool_result in tool_result_messages:
-                tool_result_contents = [c for c in tool_result["content"] if "toolResult" in c]
+                tool_result_contents = [c for c in tool_result["content"] if "toolResult" in c or "cachePoint" in c]
                 for content in tool_result_contents:
-                    if content["toolResult"]["toolUseId"] == tool_call_id:
+                    if "toolResult" in content and content["toolResult"]["toolUseId"] == tool_call_id:
                         regrouped_tool_result.append(content)
                         # Keep track of the original index of the last tool result message
                         original_idx = idx
+                    elif "cachePoint" in content and content not in regrouped_tool_result:
+                        regrouped_tool_result.append(content)
+
         if regrouped_tool_result and original_idx is not None:
             repaired_tool_result_prompts.append((original_idx, {"role": "user", "content": regrouped_tool_result}))
 
     # Remove the tool result messages from bedrock_formatted_messages
     bedrock_formatted_messages_minus_tool_results: list[tuple[int, Any]] = []
     for idx, msg in enumerate(bedrock_formatted_messages):
-        # Assumes the content of tool result messages only contains 'toolResult': {...} objects (e.g. no 'text')
-        if msg.get("content") and "toolResult" not in msg["content"][0]:
+        # Filter out messages that contain toolResult (they are handled by repaired_tool_result_prompts)
+        if msg.get("content") and not any("toolResult" in c for c in msg["content"]):
             bedrock_formatted_messages_minus_tool_results.append((idx, msg))
 
     # Add the repaired tool result messages and sort to maintain the correct order
@@ -251,6 +259,32 @@ def _format_text_image_message(message: ChatMessage) -> dict[str, Any]:
     return {"role": message.role.value, "content": bedrock_content_blocks}
 
 
+def _validate_and_format_cache_point(cache_point: dict[str, str] | None) -> dict[str, dict[str, str]] | None:
+    """
+    Validate and format a cache point dictionary.
+
+    Schema available at https://docs.aws.amazon.com/bedrock/latest/APIReference/API_runtime_CachePointBlock.html
+
+    :param cache_point: Cache point dictionary to validate and format.
+    :returns: Dictionary in Bedrock cachePoint format or None if no cache point is provided.
+    :raises ValueError: If cache point is not valid.
+    """
+    if not cache_point:
+        return None
+
+    if "type" not in cache_point or cache_point["type"] != "default":
+        err_msg = "Cache point must have a 'type' key with value 'default'."
+        raise ValueError(err_msg)
+    if not set(cache_point).issubset({"type", "ttl"}):
+        err_msg = "Cache point can only contain 'type' and 'ttl' keys."
+        raise ValueError(err_msg)
+    if "ttl" in cache_point and cache_point["ttl"] not in ("5m", "1h"):
+        err_msg = "Cache point 'ttl' must be one of '5m', '1h'."
+        raise ValueError(err_msg)
+
+    return {"cachePoint": cache_point}
+
+
 def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]:
     """
     Format a list of Haystack ChatMessages to the format expected by Bedrock API.
@@ -264,21 +298,30 @@ def _format_messages(messages: list[ChatMessage]) -> tuple[list[dict[str, Any]],
               non_system_messages is a list of properly formatted message dictionaries.
     """
     # Separate system messages, tool calls, and tool results
-    system_prompts = []
+    system_prompts: list[dict[str, Any]] = []
     bedrock_formatted_messages = []
     for msg in messages:
+        cache_point = _validate_and_format_cache_point(msg.meta.get("cachePoint"))
         if msg.is_from(ChatRole.SYSTEM):
             # Assuming system messages can only contain text
             # Don't need to track idx since system_messages are handled separately
             system_prompts.append({"text": msg.text})
-        elif msg.tool_calls:
-            bedrock_formatted_messages.append(_format_tool_call_message(msg))
+            if cache_point:
+                system_prompts.append(cache_point)
+            continue
+
+        if msg.tool_calls:
+            formatted_msg = _format_tool_call_message(msg)
         elif msg.tool_call_results:
-            bedrock_formatted_messages.append(_format_tool_result_message(msg))
+            formatted_msg = _format_tool_result_message(msg)
         else:
-            bedrock_formatted_messages.append(_format_text_image_message(msg))
+            formatted_msg = _format_text_image_message(msg)
+        if cache_point:
+            formatted_msg["content"].append(cache_point)
+        bedrock_formatted_messages.append(formatted_msg)
 
     repaired_bedrock_formatted_messages = _repair_tool_result_messages(bedrock_formatted_messages)
+
     return system_prompts, repaired_bedrock_formatted_messages
 
 
@@ -310,6 +353,9 @@ def _parse_completion_response(response_body: dict[str, Any], model: str) -> lis
                     "prompt_tokens": response_body.get("usage", {}).get("inputTokens", 0),
                     "completion_tokens": response_body.get("usage", {}).get("outputTokens", 0),
                     "total_tokens": response_body.get("usage", {}).get("totalTokens", 0),
+                    "cache_read_input_tokens": response_body.get("usage", {}).get("cacheReadInputTokens", 0),
+                    "cache_write_input_tokens": response_body.get("usage", {}).get("cacheWriteInputTokens", 0),
+                    "cache_details": response_body.get("usage", {}).get("CacheDetails", {}),
                 },
             }
             # guardrail trace
@@ -461,6 +507,9 @@ def _convert_event_to_streaming_chunk(
                 "prompt_tokens": usage.get("inputTokens", 0),
                 "completion_tokens": usage.get("outputTokens", 0),
                 "total_tokens": usage.get("totalTokens", 0),
+                "cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
+                "cache_write_input_tokens": usage.get("cacheWriteInputTokens", 0),
+                "cache_details": usage.get("cacheDetails", {}),
             }
         if "trace" in event_meta:
             chunk_meta["trace"] = event_meta["trace"]
diff --git a/integrations/amazon_bedrock/tests/test_chat_generator.py b/integrations/amazon_bedrock/tests/test_chat_generator.py
@@ -39,6 +39,10 @@
     "us.anthropic.claude-sonnet-4-20250514-v1:0",
 ]
 
+MODELS_TO_TEST_WITH_PROMPT_CACHING = [
+    "amazon.nova-micro-v1:0"  # cheap, fast model
+]
+
 
 def hello_world():
     return "Hello, World!"
@@ -164,6 +168,7 @@ def test_to_dict(self, mock_boto3_session, boto3_config):
                 "boto3_config": boto3_config,
                 "tools": None,
                 "guardrail_config": {"guardrailIdentifier": "test", "guardrailVersion": "test"},
+                "tools_cachepoint_config": None,
             },
         }
 
@@ -298,6 +303,7 @@ def test_serde_in_pipeline(self, mock_boto3_session, monkeypatch):
                             }
                         ],
                         "guardrail_config": None,
+                        "tools_cachepoint_config": None,
                     },
                 }
             },
@@ -945,6 +951,28 @@ def test_live_run_with_guardrail(self, streaming_callback):
         assert "trace" in results["replies"][0].meta
         assert "guardrail" in results["replies"][0].meta["trace"]
 
+    @pytest.mark.parametrize("streaming_callback", [None, print_streaming_chunk])
+    @pytest.mark.parametrize("model_name", MODELS_TO_TEST_WITH_PROMPT_CACHING)
+    def test_prompt_caching_live_run_with_user_message(self, model_name, streaming_callback):
+        generator = AmazonBedrockChatGenerator(model=model_name, streaming_callback=streaming_callback)
+
+        system_message = ChatMessage.from_system("Always respond with: 'Life is beautiful' (and nothing else).")
+
+        user_message = ChatMessage.from_user(
+            "User message that should be long enough to cache. " * 100, meta={"cachePoint": {"type": "default"}}
+        )
+        messages = [system_message, user_message]
+        result = generator.run(messages=messages)
+
+        assert "replies" in result
+        assert len(result["replies"]) == 1
+        usage = result["replies"][0].meta["usage"]
+
+        # tests run in parallel based on the workflow matrix, so this request should either hit the cache (read tokens)
+        # or populate it (write tokens)
+        assert usage["cache_read_input_tokens"] > 1000 or usage["cache_write_input_tokens"] > 1000
+        assert "cache_details" in usage
+
     @pytest.mark.parametrize("model_name", [MODELS_TO_TEST_WITH_TOOLS[0]])  # just one model is enough
     def test_pipeline_with_amazon_bedrock_chat_generator(self, model_name, tools):
         """
diff --git a/integrations/amazon_bedrock/tests/test_chat_generator_utils.py b/integrations/amazon_bedrock/tests/test_chat_generator_utils.py