Skip to content

Commit 0835dc3

Browse files
committed
feat(bedrock): add TTL support to auto-injected tool and system/user cache points
Extends prompt caching TTL coverage beyond user-supplied cachePoint blocks (PR #1660) to the two SDK-managed auto-injected paths on BedrockModel: - Adds cache_tools_ttl config option so the toolConfig auto-injected cache point can carry a TTL (e.g. '5m' or '1h'). - Adds ttl field to CacheConfig dataclass so _inject_cache_point propagates TTL into the cache point appended to the last user message when strategy='auto'. Together, these let users align all three cache checkpoint TTLs (toolConfig -> system -> messages) to satisfy Bedrock's non-increasing TTL ordering rule -- which was previously impossible because cache_tools hardcoded an implicit 5m TTL. Partially addresses #2121 (Bug 2: cache_tools ordering violation with 1h TTL). Bug 1 from #2121 was resolved by #1660. Tests: - 4 unit tests covering cache_tools_ttl and CacheConfig.ttl with and without TTL (backward-compat). - 3 integration tests against Claude Haiku 4.5 (officially documented for 1h TTL on Bedrock), including a regression test that sets 1h TTL on all three cache checkpoints simultaneously. - Model ID extracted into a _CACHE_TTL_MODEL_ID module constant so future model bumps are a one-line change.
1 parent 980bc91 commit 0835dc3

4 files changed

Lines changed: 214 additions & 3 deletions

File tree

src/strands/models/bedrock.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ class BedrockConfig(BaseModelConfig, total=False):
9191
cache_prompt: Cache point type for the system prompt (deprecated, use cache_config)
9292
cache_config: Configuration for prompt caching. Use CacheConfig(strategy="auto") for automatic caching.
9393
cache_tools: Cache point type for tools
94+
cache_tools_ttl: Optional TTL duration for tool cache points (e.g. "5m", "1h")
9495
guardrail_id: ID of the guardrail to apply
9596
guardrail_trace: Guardrail trace mode. Defaults to enabled.
9697
guardrail_version: Version of the guardrail to apply
@@ -128,6 +129,7 @@ class BedrockConfig(BaseModelConfig, total=False):
128129
cache_prompt: str | None
129130
cache_config: CacheConfig | None
130131
cache_tools: str | None
132+
cache_tools_ttl: str | None
131133
guardrail_id: str | None
132134
guardrail_trace: Literal["enabled", "disabled", "enabled_full"] | None
133135
guardrail_stream_processing_mode: Literal["sync", "async"] | None
@@ -293,7 +295,18 @@ def _format_request(
293295
for tool_spec in tool_specs
294296
],
295297
*(
296-
[{"cachePoint": {"type": self.config["cache_tools"]}}]
298+
[
299+
{
300+
"cachePoint": {
301+
"type": self.config["cache_tools"],
302+
**(
303+
{"ttl": self.config["cache_tools_ttl"]}
304+
if self.config.get("cache_tools_ttl")
305+
else {}
306+
),
307+
}
308+
}
309+
]
297310
if self.config.get("cache_tools")
298311
else []
299312
),
@@ -395,7 +408,11 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None:
395408
last_user_idx = msg_idx
396409

397410
if last_user_idx is not None and messages[last_user_idx].get("content"):
398-
messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}})
411+
cache_point: dict[str, Any] = {"type": "default"}
412+
cache_config = self.config.get("cache_config")
413+
if cache_config and cache_config.ttl:
414+
cache_point["ttl"] = cache_config.ttl
415+
messages[last_user_idx]["content"].append({"cachePoint": cache_point})
399416
logger.debug("msg_idx=<%s> | added cache point to last user message", last_user_idx)
400417

401418
def _find_last_user_text_message_index(self, messages: Messages) -> int | None:

src/strands/models/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -134,9 +134,12 @@ class CacheConfig:
134134
strategy: Caching strategy to use.
135135
- "auto": Automatically detect model support and inject cachePoint to maximize cache coverage
136136
- "anthropic": Inject cachePoint in Anthropic-compatible format without model support check
137+
ttl: Optional TTL duration for cache entries (e.g. "5m", "1h").
138+
When specified, auto-injected cache points will include this TTL value.
137139
"""
138140

139141
strategy: Literal["auto", "anthropic"] = "auto"
142+
ttl: str | None = None
140143

141144

142145
class Model(abc.ABC):

tests/strands/models/test_bedrock.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3495,3 +3495,59 @@ async def test_skip_native_api_when_use_native_token_count_false(self, bedrock_c
34953495
bedrock_client.count_tokens.assert_not_called()
34963496
assert isinstance(result, int)
34973497
assert result >= 0
3498+
3499+
3500+
def test_inject_cache_point_with_ttl(bedrock_client):
3501+
"""Test that _inject_cache_point includes TTL when cache_config has ttl set."""
3502+
model = BedrockModel(
3503+
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
3504+
cache_config=CacheConfig(strategy="auto", ttl="5m"),
3505+
)
3506+
3507+
cleaned_messages = [
3508+
{"role": "user", "content": [{"text": "Hello"}]},
3509+
]
3510+
3511+
model._inject_cache_point(cleaned_messages)
3512+
3513+
cache_point = cleaned_messages[0]["content"][-1]["cachePoint"]
3514+
assert cache_point["type"] == "default"
3515+
assert cache_point["ttl"] == "5m"
3516+
3517+
3518+
def test_inject_cache_point_without_ttl(bedrock_client):
3519+
"""Test that _inject_cache_point omits TTL when cache_config has no ttl."""
3520+
model = BedrockModel(
3521+
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
3522+
cache_config=CacheConfig(strategy="auto"),
3523+
)
3524+
3525+
cleaned_messages = [
3526+
{"role": "user", "content": [{"text": "Hello"}]},
3527+
]
3528+
3529+
model._inject_cache_point(cleaned_messages)
3530+
3531+
cache_point = cleaned_messages[0]["content"][-1]["cachePoint"]
3532+
assert cache_point["type"] == "default"
3533+
assert "ttl" not in cache_point
3534+
3535+
3536+
def test_format_request_cache_tools_with_ttl(model, messages, model_id, tool_spec, cache_type):
3537+
"""Test that cache_tools_ttl propagates into toolConfig cachePoint."""
3538+
model.update_config(cache_tools=cache_type, cache_tools_ttl="5m")
3539+
3540+
tru_request = model._format_request(messages, tool_specs=[tool_spec])
3541+
3542+
exp_cache_point = {"cachePoint": {"type": cache_type, "ttl": "5m"}}
3543+
assert tru_request["toolConfig"]["tools"][-1] == exp_cache_point
3544+
3545+
3546+
def test_format_request_cache_tools_without_ttl(model, messages, model_id, tool_spec, cache_type):
3547+
"""Test that toolConfig cachePoint omits TTL when cache_tools_ttl is not set."""
3548+
model.update_config(cache_tools=cache_type)
3549+
3550+
tru_request = model._format_request(messages, tool_specs=[tool_spec])
3551+
3552+
exp_cache_point = {"cachePoint": {"type": cache_type}}
3553+
assert tru_request["toolConfig"]["tools"][-1] == exp_cache_point

tests_integ/models/test_model_bedrock.py

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,18 @@
66

77
import strands
88
from strands import Agent
9-
from strands.models import BedrockModel
9+
from strands.models import BedrockModel, CacheConfig
1010
from strands.types.content import ContentBlock
1111

12+
# Model ID used for prompt-caching TTL integration tests. Per
13+
# https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
14+
# the models that officially support 1h TTL on CachePoint are Claude Opus 4.5,
15+
# Claude Haiku 4.5, and Claude Sonnet 4.5. Haiku 4.5 is the newest Haiku
16+
# available and is preferred for CI due to lower latency and cost relative to
17+
# the same-version Sonnet 4.5. Bump this when a newer Haiku is released that
18+
# supports CachePoint TTL.
19+
_CACHE_TTL_MODEL_ID = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
20+
1221

1322
@pytest.fixture
1423
def system_prompt():
@@ -576,3 +585,129 @@ def calculator(expression: str) -> float:
576585
agent('Search for "python" with tags ["programming", "language"] using the search tool.')
577586

578587
assert "search" in tools_called
588+
589+
590+
def test_prompt_caching_cache_tools_ttl():
591+
"""Test that cache_tools_ttl propagates into the auto-injected toolConfig cache point.
592+
593+
Verifies that BedrockModel(cache_tools="default", cache_tools_ttl="5m") produces a
594+
Bedrock request with cachePoint.ttl on the toolConfig checkpoint, and that the call
595+
completes without a ValidationException on the TTL field.
596+
597+
Note: we intentionally do not assert specific cacheWriteInputTokens on the toolConfig
598+
prefix because Bedrock's tool-prefix cache threshold varies by model and region.
599+
The critical behavior under test here is that the TTL field is accepted end-to-end.
600+
601+
Uses Claude Haiku 4.5 which supports TTL in CachePointBlock on Bedrock per
602+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
603+
(Claude Opus 4.5, Claude Haiku 4.5, and Claude Sonnet 4.5 all support 1h TTL).
604+
"""
605+
model = BedrockModel(
606+
model_id=_CACHE_TTL_MODEL_ID,
607+
streaming=False,
608+
cache_tools="default",
609+
cache_tools_ttl="5m",
610+
)
611+
612+
@strands.tool
613+
def lookup_fact(topic: str) -> str:
614+
"""Look up a fact about the given topic.
615+
616+
This tool is useful when you need authoritative information.
617+
"""
618+
return f"Fact about {topic}: example"
619+
620+
agent = Agent(
621+
model=model,
622+
tools=[lookup_fact],
623+
load_tools_from_directory=False,
624+
)
625+
626+
# The call must succeed — Bedrock must accept cachePoint.ttl on the toolConfig checkpoint
627+
# without raising a ValidationException.
628+
result = agent("Use the lookup_fact tool to look up 'python'.")
629+
assert len(str(result)) > 0
630+
631+
632+
def test_prompt_caching_cache_config_auto_with_ttl():
633+
"""Test that CacheConfig(strategy="auto", ttl="5m") propagates TTL to the auto-injected message cache point.
634+
635+
Verifies that the cache point appended to the last user message by _inject_cache_point
636+
carries the configured TTL, and that Bedrock accepts the request.
637+
638+
Uses Claude Haiku 4.5 which supports TTL in CachePointBlock on Bedrock per
639+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
640+
"""
641+
model = BedrockModel(
642+
model_id=_CACHE_TTL_MODEL_ID,
643+
streaming=False,
644+
cache_config=CacheConfig(strategy="auto", ttl="5m"),
645+
)
646+
647+
unique_id = str(uuid.uuid4())
648+
# Minimum 4096 tokens required for caching with Haiku 4.5
649+
large_message = f"Context for test {unique_id}: " + ("This is important context. " * 1000) + " What is 2+2?"
650+
651+
agent = Agent(
652+
model=model,
653+
load_tools_from_directory=False,
654+
)
655+
656+
# First call: auto-injected cache point on the last user message must include ttl and be accepted
657+
result1 = agent(large_message)
658+
assert len(str(result1)) > 0
659+
660+
# Verify cache write occurred with auto-inject + ttl
661+
assert result1.metrics.accumulated_usage.get("cacheWriteInputTokens", 0) > 0, (
662+
"Expected cacheWriteInputTokens > 0 with CacheConfig(strategy='auto', ttl='5m')"
663+
)
664+
665+
666+
def test_prompt_caching_aligned_1h_ttl_across_checkpoints():
667+
"""Regression test for Bedrock TTL non-increasing ordering rule (Issue #2121).
668+
669+
Bedrock processes cache checkpoints in order: toolConfig -> system -> messages,
670+
and requires TTLs to be non-increasing. Before this change, cache_tools hardcoded
671+
an implicit 5m TTL, so any 1h TTL on a later checkpoint would raise a
672+
ValidationException.
673+
674+
This test sets 1h TTL on all three checkpoints simultaneously and verifies the
675+
call succeeds.
676+
677+
Uses Claude Haiku 4.5 which supports 1h TTL per
678+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
679+
"""
680+
model = BedrockModel(
681+
model_id=_CACHE_TTL_MODEL_ID,
682+
streaming=False,
683+
cache_tools="default",
684+
cache_tools_ttl="1h",
685+
cache_config=CacheConfig(strategy="auto", ttl="1h"),
686+
)
687+
688+
# Timestamp-based uniqueness to avoid cache conflicts across CI runs
689+
unique_id = str(int(time.time() * 1000000))
690+
large_context = f"Background context for test {unique_id}: " + ("This is important context. " * 1000)
691+
692+
# User-supplied 1h cache point on system prompt — third checkpoint also at 1h
693+
system_prompt_with_cache = [
694+
{"text": large_context},
695+
{"cachePoint": {"type": "default", "ttl": "1h"}},
696+
{"text": "You are a helpful assistant."},
697+
]
698+
699+
@strands.tool
700+
def echo(value: str) -> str:
701+
"""Echo the given value back."""
702+
return value
703+
704+
agent = Agent(
705+
model=model,
706+
system_prompt=system_prompt_with_cache,
707+
tools=[echo],
708+
load_tools_from_directory=False,
709+
)
710+
711+
# Must succeed without ValidationException on the non-increasing TTL rule
712+
result = agent("What is 2+2?")
713+
assert len(str(result)) > 0

0 commit comments

Comments
 (0)