feat: add service_tier support to BedrockModel (#1799)

BV-Venky · BV-Venky · web-flow · commit 94fc8dd3ac69 · 2026-04-02T17:01:02.000-04:00
Co-authored-by: BV-Venky &lt;venkateshcjjc@@gmail.com&gt;
diff --git a/src/strands/models/bedrock.py b/src/strands/models/bedrock.py
@@ -93,6 +93,10 @@ class BedrockConfig(TypedDict, total=False):
             model_id: The Bedrock model ID (e.g., "us.anthropic.claude-sonnet-4-20250514-v1:0")
             include_tool_result_status: Flag to include status field in tool results.
                 True includes status, False removes status, "auto" determines based on model_id. Defaults to "auto".
+            service_tier: Service tier for the request, controlling the trade-off between latency and cost.
+                Valid values: "default" (standard), "priority" (faster, premium), "flex" (cheaper, slower).
+                Please check https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html for
+                supported service tiers, models, and regions
             stop_sequences: List of sequences that will stop generation when encountered
             streaming: Flag to enable/disable streaming. Defaults to True.
             temperature: Controls randomness in generation (higher = more random)
@@ -117,6 +121,7 @@ class BedrockConfig(TypedDict, total=False):
         max_tokens: int | None
         model_id: str
         include_tool_result_status: Literal["auto"] | bool | None
+        service_tier: str | None
         stop_sequences: list[str] | None
         streaming: bool | None
         temperature: float | None
@@ -245,6 +250,7 @@ def _format_request(
             "modelId": self.config["model_id"],
             "messages": self._format_bedrock_messages(messages),
             "system": system_blocks,
+            **({"serviceTier": {"type": self.config["service_tier"]}} if self.config.get("service_tier") else {}),
             **(
                 {
                     "toolConfig": {
diff --git a/tests/strands/models/test_bedrock.py b/tests/strands/models/test_bedrock.py
@@ -379,6 +379,20 @@ def test_format_request_guardrail_config_without_trace_or_stream_processing_mode
     assert tru_request == exp_request
 
 
+def test_format_request_with_service_tier(model, messages, model_id):
+    model.update_config(service_tier="flex")
+    tru_request = model._format_request(messages)
+    exp_request = {
+        "inferenceConfig": {},
+        "modelId": model_id,
+        "messages": messages,
+        "serviceTier": {"type": "flex"},
+        "system": [],
+    }
+
+    assert tru_request == exp_request
+
+
 def test_format_request_inference_config(model, messages, model_id, inference_config):
     model.update_config(**inference_config)
     tru_request = model._format_request(messages)
diff --git a/tests_integ/models/test_model_bedrock.py b/tests_integ/models/test_model_bedrock.py
@@ -73,6 +73,27 @@ def test_non_streaming_agent(non_streaming_agent):
     assert len(str(result)) > 0
 
 
+def test_bedrock_service_tier_flex_invocation_succeeds():
+    """Bedrock accepts serviceTier when model and region support Priority/Flex tiers.
+
+    Tier support is model- and region-specific. See:
+    https://docs.aws.amazon.com/bedrock/latest/userguide/service-tiers-inference.html
+
+    CI runs integ tests with AWS_REGION=us-east-1; amazon.nova-pro-v1:0 is listed for
+    that region under Priority and Flex tiers.
+    """
+    model = BedrockModel(
+        model_id="amazon.nova-pro-v1:0",
+        region_name="us-east-1",
+        service_tier="flex",
+    )
+    agent = Agent(model=model, load_tools_from_directory=False)
+    result = agent("Reply with exactly the word: ok")
+
+    assert result.stop_reason == "end_turn"
+    assert len(str(result).strip()) > 0
+
+
 @pytest.mark.asyncio
 async def test_streaming_model_events(streaming_model, alist):
     """Test streaming model events."""