open-telemetry · vinigrazzioli-96 · May 21, 2026 · May 21, 2026 · May 22, 2026
@@ -0,0 +1 @@
+`opentelemetry-instrumentation-botocore`: capture Bedrock prompt cache token usage
diff --git a/...instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py b/...instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py
@@ -47,6 +47,8 @@
     GEN_AI_RESPONSE_FINISH_REASONS,
     GEN_AI_SYSTEM,
     GEN_AI_TOKEN_TYPE,
+    GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+    GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
     GEN_AI_USAGE_INPUT_TOKENS,
     GEN_AI_USAGE_OUTPUT_TOKENS,
     GenAiOperationNameValues,
@@ -462,7 +464,7 @@ def before_service_call(
         # this is used to calculate the operation duration metric, duration may be skewed by request_hook
         self._operation_start = default_timer()
 
-    # pylint: disable=no-self-use,too-many-locals
+    # pylint: disable=no-self-use,too-many-locals,too-many-branches
     def _converse_on_success(
         self,
         span: Span,
@@ -482,6 +484,16 @@ def _converse_on_success(
                         GEN_AI_USAGE_OUTPUT_TOKENS,
                         output_tokens,
                     )
+                if cache_read := usage.get("cacheReadInputTokens"):
+                    span.set_attribute(
+                        GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS,
+                        cache_read,
+                    )
+                if cache_write := usage.get("cacheWriteInputTokens"):
+                    span.set_attribute(
+                        GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS,
+                        cache_write,
+                    )
 
             if stop_reason := result.get("stopReason"):
                 span.set_attribute(

diff --git a/...mentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/...mentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py
@@ -132,6 +132,16 @@ def _process_event(self, event):
 
                 if output_tokens := usage.get("outputTokens"):
                     self._response["usage"]["outputTokens"] = output_tokens
+
+                if cache_read := usage.get("cacheReadInputTokens"):
+                    self._response["usage"]["cacheReadInputTokens"] = (
+                        cache_read
+                    )
+
+                if cache_write := usage.get("cacheWriteInputTokens"):
+                    self._response["usage"]["cacheWriteInputTokens"] = (
+                        cache_write
+                    )
             self._complete_stream(self._response)
 
             return

diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py
@@ -15,6 +15,7 @@
 from botocore.response import StreamingBody
 
 from opentelemetry.instrumentation.botocore.extensions.bedrock_utils import (
+    ConverseStreamWrapper,
     InvokeModelWithResponseStreamWrapper,
     _Choice,
 )
@@ -3077,6 +3078,35 @@ def test_converse_stream_with_missing_output_in_response():
     assert choice.index == 0
 
 
+def test_converse_stream_accumulates_cache_tokens():
+    # The ConverseStream metadata event carries prompt cache token usage;
+    # the wrapper should accumulate it alongside input/output tokens.
+    wrapper = ConverseStreamWrapper(
+        stream=mock.MagicMock(),
+        stream_done_callback=lambda *args, **kwargs: None,
+        stream_error_callback=lambda *args, **kwargs: None,
+    )
+
+    wrapper._process_event(
+        {
+            "metadata": {
+                "usage": {
+                    "inputTokens": 8,
+                    "outputTokens": 10,
+                    "cacheReadInputTokens": 1500,
+                    "cacheWriteInputTokens": 25,
+                }
+            }
+        }
+    )
+
+    usage = wrapper._response["usage"]
+    assert usage["inputTokens"] == 8
+    assert usage["outputTokens"] == 10
+    assert usage["cacheReadInputTokens"] == 1500
+    assert usage["cacheWriteInputTokens"] == 25
+
+
 def amazon_nova_messages():
     return [
         {"role": "user", "content": [{"text": "Say this is a test"}]},
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		`opentelemetry-instrumentation-botocore`: capture Bedrock prompt cache token usage