diff --git a/.changelog/4615.added b/.changelog/4615.added new file mode 100644 index 0000000000..f8130e0eb0 --- /dev/null +++ b/.changelog/4615.added @@ -0,0 +1 @@ +`opentelemetry-instrumentation-botocore`: capture Bedrock prompt cache token usage diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py index dc3fe839c0..83395bac21 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock.py @@ -47,6 +47,8 @@ GEN_AI_RESPONSE_FINISH_REASONS, GEN_AI_SYSTEM, GEN_AI_TOKEN_TYPE, + GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, GEN_AI_USAGE_INPUT_TOKENS, GEN_AI_USAGE_OUTPUT_TOKENS, GenAiOperationNameValues, @@ -462,7 +464,7 @@ def before_service_call( # this is used to calculate the operation duration metric, duration may be skewed by request_hook self._operation_start = default_timer() - # pylint: disable=no-self-use,too-many-locals + # pylint: disable=no-self-use,too-many-locals,too-many-branches def _converse_on_success( self, span: Span, @@ -482,6 +484,16 @@ def _converse_on_success( GEN_AI_USAGE_OUTPUT_TOKENS, output_tokens, ) + if cache_read := usage.get("cacheReadInputTokens"): + span.set_attribute( + GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, + cache_read, + ) + if cache_write := usage.get("cacheWriteInputTokens"): + span.set_attribute( + GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + cache_write, + ) if stop_reason := result.get("stopReason"): span.set_attribute( diff --git a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py index 200d65cb79..3e3dbf0597 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/src/opentelemetry/instrumentation/botocore/extensions/bedrock_utils.py @@ -132,6 +132,16 @@ def _process_event(self, event): if output_tokens := usage.get("outputTokens"): self._response["usage"]["outputTokens"] = output_tokens + + if cache_read := usage.get("cacheReadInputTokens"): + self._response["usage"]["cacheReadInputTokens"] = ( + cache_read + ) + + if cache_write := usage.get("cacheWriteInputTokens"): + self._response["usage"]["cacheWriteInputTokens"] = ( + cache_write + ) self._complete_stream(self._response) return diff --git a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py index be10475c62..1c40fc7e56 100644 --- a/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py +++ b/instrumentation/opentelemetry-instrumentation-botocore/tests/test_botocore_bedrock.py @@ -15,6 +15,7 @@ from botocore.response import StreamingBody from opentelemetry.instrumentation.botocore.extensions.bedrock_utils import ( + ConverseStreamWrapper, InvokeModelWithResponseStreamWrapper, _Choice, ) @@ -3077,6 +3078,35 @@ def test_converse_stream_with_missing_output_in_response(): assert choice.index == 0 +def test_converse_stream_accumulates_cache_tokens(): + # The ConverseStream metadata event carries prompt cache token usage; + # the wrapper should accumulate it alongside input/output tokens. + wrapper = ConverseStreamWrapper( + stream=mock.MagicMock(), + stream_done_callback=lambda *args, **kwargs: None, + stream_error_callback=lambda *args, **kwargs: None, + ) + + wrapper._process_event( + { + "metadata": { + "usage": { + "inputTokens": 8, + "outputTokens": 10, + "cacheReadInputTokens": 1500, + "cacheWriteInputTokens": 25, + } + } + } + ) + + usage = wrapper._response["usage"] + assert usage["inputTokens"] == 8 + assert usage["outputTokens"] == 10 + assert usage["cacheReadInputTokens"] == 1500 + assert usage["cacheWriteInputTokens"] == 25 + + def amazon_nova_messages(): return [ {"role": "user", "content": [{"text": "Say this is a test"}]},