Skip to content

Commit 797f359

Browse files
committed
feat(bedrock): add TTL support to auto-injected tool and system/user cache points
Extends prompt caching TTL coverage beyond user-supplied cachePoint blocks (PR #1660) to the two SDK-managed auto-injected paths on BedrockModel: - Adds cache_tools_ttl config option so the toolConfig auto-injected cache point can carry a TTL (e.g. '5m' or '1h'). - Adds ttl field to CacheConfig dataclass so _inject_cache_point propagates TTL into the cache point appended to the last user message when strategy='auto'. Together, these let users align all three cache checkpoint TTLs (toolConfig -> system -> messages) to satisfy Bedrock's non-increasing TTL ordering rule -- which was previously impossible because cache_tools hardcoded an implicit 5m TTL. Partially addresses #2121 (Bug 2: cache_tools ordering violation with 1h TTL). Bug 1 from #2121 was resolved by #1660. Tests: - 4 unit tests covering cache_tools_ttl and CacheConfig.ttl with and without TTL (backward-compat). - 3 integration tests against Claude Haiku 4.5 (officially documented for 1h TTL on Bedrock), including a regression test that sets 1h TTL on all three cache checkpoints simultaneously. - Model ID extracted into a _CACHE_TTL_MODEL_ID module constant so future model bumps are a one-line change.
1 parent 6e208a8 commit 797f359

4 files changed

Lines changed: 214 additions & 3 deletions

File tree

src/strands/models/bedrock.py

Lines changed: 19 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ class BedrockConfig(BaseModelConfig, total=False):
8181
cache_prompt: Cache point type for the system prompt (deprecated, use cache_config)
8282
cache_config: Configuration for prompt caching. Use CacheConfig(strategy="auto") for automatic caching.
8383
cache_tools: Cache point type for tools
84+
cache_tools_ttl: Optional TTL duration for tool cache points (e.g. "5m", "1h")
8485
guardrail_id: ID of the guardrail to apply
8586
guardrail_trace: Guardrail trace mode. Defaults to enabled.
8687
guardrail_version: Version of the guardrail to apply
@@ -115,6 +116,7 @@ class BedrockConfig(BaseModelConfig, total=False):
115116
cache_prompt: str | None
116117
cache_config: CacheConfig | None
117118
cache_tools: str | None
119+
cache_tools_ttl: str | None
118120
guardrail_id: str | None
119121
guardrail_trace: Literal["enabled", "disabled", "enabled_full"] | None
120122
guardrail_stream_processing_mode: Literal["sync", "async"] | None
@@ -279,7 +281,18 @@ def _format_request(
279281
for tool_spec in tool_specs
280282
],
281283
*(
282-
[{"cachePoint": {"type": self.config["cache_tools"]}}]
284+
[
285+
{
286+
"cachePoint": {
287+
"type": self.config["cache_tools"],
288+
**(
289+
{"ttl": self.config["cache_tools_ttl"]}
290+
if self.config.get("cache_tools_ttl")
291+
else {}
292+
),
293+
}
294+
}
295+
]
283296
if self.config.get("cache_tools")
284297
else []
285298
),
@@ -381,7 +394,11 @@ def _inject_cache_point(self, messages: list[dict[str, Any]]) -> None:
381394
last_user_idx = msg_idx
382395

383396
if last_user_idx is not None and messages[last_user_idx].get("content"):
384-
messages[last_user_idx]["content"].append({"cachePoint": {"type": "default"}})
397+
cache_point: dict[str, Any] = {"type": "default"}
398+
cache_config = self.config.get("cache_config")
399+
if cache_config and cache_config.ttl:
400+
cache_point["ttl"] = cache_config.ttl
401+
messages[last_user_idx]["content"].append({"cachePoint": cache_point})
385402
logger.debug("msg_idx=<%s> | added cache point to last user message", last_user_idx)
386403

387404
def _find_last_user_text_message_index(self, messages: Messages) -> int | None:

src/strands/models/model.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -202,9 +202,12 @@ class CacheConfig:
202202
strategy: Caching strategy to use.
203203
- "auto": Automatically detect model support and inject cachePoint to maximize cache coverage
204204
- "anthropic": Inject cachePoint in Anthropic-compatible format without model support check
205+
ttl: Optional TTL duration for cache entries (e.g. "5m", "1h").
206+
When specified, auto-injected cache points will include this TTL value.
205207
"""
206208

207209
strategy: Literal["auto", "anthropic"] = "auto"
210+
ttl: str | None = None
208211

209212

210213
class Model(abc.ABC):

tests/strands/models/test_bedrock.py

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3409,3 +3409,59 @@ async def test_fallback_logs_debug(self, model_with_client, bedrock_client, mess
34093409
await model_with_client.count_tokens(messages=messages)
34103410

34113411
assert any("native token counting failed" in record.message for record in caplog.records)
3412+
3413+
3414+
def test_inject_cache_point_with_ttl(bedrock_client):
3415+
"""Test that _inject_cache_point includes TTL when cache_config has ttl set."""
3416+
model = BedrockModel(
3417+
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
3418+
cache_config=CacheConfig(strategy="auto", ttl="5m"),
3419+
)
3420+
3421+
cleaned_messages = [
3422+
{"role": "user", "content": [{"text": "Hello"}]},
3423+
]
3424+
3425+
model._inject_cache_point(cleaned_messages)
3426+
3427+
cache_point = cleaned_messages[0]["content"][-1]["cachePoint"]
3428+
assert cache_point["type"] == "default"
3429+
assert cache_point["ttl"] == "5m"
3430+
3431+
3432+
def test_inject_cache_point_without_ttl(bedrock_client):
3433+
"""Test that _inject_cache_point omits TTL when cache_config has no ttl."""
3434+
model = BedrockModel(
3435+
model_id="us.anthropic.claude-sonnet-4-20250514-v1:0",
3436+
cache_config=CacheConfig(strategy="auto"),
3437+
)
3438+
3439+
cleaned_messages = [
3440+
{"role": "user", "content": [{"text": "Hello"}]},
3441+
]
3442+
3443+
model._inject_cache_point(cleaned_messages)
3444+
3445+
cache_point = cleaned_messages[0]["content"][-1]["cachePoint"]
3446+
assert cache_point["type"] == "default"
3447+
assert "ttl" not in cache_point
3448+
3449+
3450+
def test_format_request_cache_tools_with_ttl(model, messages, model_id, tool_spec, cache_type):
3451+
"""Test that cache_tools_ttl propagates into toolConfig cachePoint."""
3452+
model.update_config(cache_tools=cache_type, cache_tools_ttl="5m")
3453+
3454+
tru_request = model._format_request(messages, tool_specs=[tool_spec])
3455+
3456+
exp_cache_point = {"cachePoint": {"type": cache_type, "ttl": "5m"}}
3457+
assert tru_request["toolConfig"]["tools"][-1] == exp_cache_point
3458+
3459+
3460+
def test_format_request_cache_tools_without_ttl(model, messages, model_id, tool_spec, cache_type):
3461+
"""Test that toolConfig cachePoint omits TTL when cache_tools_ttl is not set."""
3462+
model.update_config(cache_tools=cache_type)
3463+
3464+
tru_request = model._format_request(messages, tool_specs=[tool_spec])
3465+
3466+
exp_cache_point = {"cachePoint": {"type": cache_type}}
3467+
assert tru_request["toolConfig"]["tools"][-1] == exp_cache_point

tests_integ/models/test_model_bedrock.py

Lines changed: 136 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,18 @@
66

77
import strands
88
from strands import Agent
9-
from strands.models import BedrockModel
9+
from strands.models import BedrockModel, CacheConfig
1010
from strands.types.content import ContentBlock
1111

12+
# Model ID used for prompt-caching TTL integration tests. Per
13+
# https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
14+
# the models that officially support 1h TTL on CachePoint are Claude Opus 4.5,
15+
# Claude Haiku 4.5, and Claude Sonnet 4.5. Haiku 4.5 is the newest Haiku
16+
# available and is preferred for CI due to lower latency and cost relative to
17+
# the same-version Sonnet 4.5. Bump this when a newer Haiku is released that
18+
# supports CachePoint TTL.
19+
_CACHE_TTL_MODEL_ID = "us.anthropic.claude-haiku-4-5-20251001-v1:0"
20+
1221

1322
@pytest.fixture
1423
def system_prompt():
@@ -576,3 +585,129 @@ def calculator(expression: str) -> float:
576585
agent('Search for "python" with tags ["programming", "language"] using the search tool.')
577586

578587
assert "search" in tools_called
588+
589+
590+
def test_prompt_caching_cache_tools_ttl():
591+
"""Test that cache_tools_ttl propagates into the auto-injected toolConfig cache point.
592+
593+
Verifies that BedrockModel(cache_tools="default", cache_tools_ttl="5m") produces a
594+
Bedrock request with cachePoint.ttl on the toolConfig checkpoint, and that the call
595+
completes without a ValidationException on the TTL field.
596+
597+
Note: we intentionally do not assert specific cacheWriteInputTokens on the toolConfig
598+
prefix because Bedrock's tool-prefix cache threshold varies by model and region.
599+
The critical behavior under test here is that the TTL field is accepted end-to-end.
600+
601+
Uses Claude Haiku 4.5 which supports TTL in CachePointBlock on Bedrock per
602+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
603+
(Claude Opus 4.5, Claude Haiku 4.5, and Claude Sonnet 4.5 all support 1h TTL).
604+
"""
605+
model = BedrockModel(
606+
model_id=_CACHE_TTL_MODEL_ID,
607+
streaming=False,
608+
cache_tools="default",
609+
cache_tools_ttl="5m",
610+
)
611+
612+
@strands.tool
613+
def lookup_fact(topic: str) -> str:
614+
"""Look up a fact about the given topic.
615+
616+
This tool is useful when you need authoritative information.
617+
"""
618+
return f"Fact about {topic}: example"
619+
620+
agent = Agent(
621+
model=model,
622+
tools=[lookup_fact],
623+
load_tools_from_directory=False,
624+
)
625+
626+
# The call must succeed — Bedrock must accept cachePoint.ttl on the toolConfig checkpoint
627+
# without raising a ValidationException.
628+
result = agent("Use the lookup_fact tool to look up 'python'.")
629+
assert len(str(result)) > 0
630+
631+
632+
def test_prompt_caching_cache_config_auto_with_ttl():
633+
"""Test that CacheConfig(strategy="auto", ttl="5m") propagates TTL to the auto-injected message cache point.
634+
635+
Verifies that the cache point appended to the last user message by _inject_cache_point
636+
carries the configured TTL, and that Bedrock accepts the request.
637+
638+
Uses Claude Haiku 4.5 which supports TTL in CachePointBlock on Bedrock per
639+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
640+
"""
641+
model = BedrockModel(
642+
model_id=_CACHE_TTL_MODEL_ID,
643+
streaming=False,
644+
cache_config=CacheConfig(strategy="auto", ttl="5m"),
645+
)
646+
647+
unique_id = str(uuid.uuid4())
648+
# Minimum 4096 tokens required for caching with Haiku 4.5
649+
large_message = f"Context for test {unique_id}: " + ("This is important context. " * 1000) + " What is 2+2?"
650+
651+
agent = Agent(
652+
model=model,
653+
load_tools_from_directory=False,
654+
)
655+
656+
# First call: auto-injected cache point on the last user message must include ttl and be accepted
657+
result1 = agent(large_message)
658+
assert len(str(result1)) > 0
659+
660+
# Verify cache write occurred with auto-inject + ttl
661+
assert result1.metrics.accumulated_usage.get("cacheWriteInputTokens", 0) > 0, (
662+
"Expected cacheWriteInputTokens > 0 with CacheConfig(strategy='auto', ttl='5m')"
663+
)
664+
665+
666+
def test_prompt_caching_aligned_1h_ttl_across_checkpoints():
667+
"""Regression test for Bedrock TTL non-increasing ordering rule (Issue #2121).
668+
669+
Bedrock processes cache checkpoints in order: toolConfig -> system -> messages,
670+
and requires TTLs to be non-increasing. Before this change, cache_tools hardcoded
671+
an implicit 5m TTL, so any 1h TTL on a later checkpoint would raise a
672+
ValidationException.
673+
674+
This test sets 1h TTL on all three checkpoints simultaneously and verifies the
675+
call succeeds.
676+
677+
Uses Claude Haiku 4.5 which supports 1h TTL per
678+
https://docs.aws.amazon.com/bedrock/latest/userguide/prompt-caching.html
679+
"""
680+
model = BedrockModel(
681+
model_id=_CACHE_TTL_MODEL_ID,
682+
streaming=False,
683+
cache_tools="default",
684+
cache_tools_ttl="1h",
685+
cache_config=CacheConfig(strategy="auto", ttl="1h"),
686+
)
687+
688+
# Timestamp-based uniqueness to avoid cache conflicts across CI runs
689+
unique_id = str(int(time.time() * 1000000))
690+
large_context = f"Background context for test {unique_id}: " + ("This is important context. " * 1000)
691+
692+
# User-supplied 1h cache point on system prompt — third checkpoint also at 1h
693+
system_prompt_with_cache = [
694+
{"text": large_context},
695+
{"cachePoint": {"type": "default", "ttl": "1h"}},
696+
{"text": "You are a helpful assistant."},
697+
]
698+
699+
@strands.tool
700+
def echo(value: str) -> str:
701+
"""Echo the given value back."""
702+
return value
703+
704+
agent = Agent(
705+
model=model,
706+
system_prompt=system_prompt_with_cache,
707+
tools=[echo],
708+
load_tools_from_directory=False,
709+
)
710+
711+
# Must succeed without ValidationException on the non-increasing TTL rule
712+
result = agent("What is 2+2?")
713+
assert len(str(result)) > 0

0 commit comments

Comments
 (0)