Skip to content

Commit dd3b6a2

Browse files
committed
feat(gemini): plumb through cache tokens in metadata events
Surface cached_content_token_count from usage_metadata as cacheReadInputTokens on the metadata event emitted by GeminiModel. The existing telemetry pipeline picks it up automatically. Relates to #1060, #1140.
1 parent 1847fae commit dd3b6a2

2 files changed

Lines changed: 62 additions & 5 deletions

File tree

src/strands/models/gemini.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from typing_extensions import Required, Unpack, override
1717

1818
from ..types.content import ContentBlock, ContentBlockStartToolUse, Messages, SystemContentBlock
19+
from ..types.event_loop import Usage
1920
from ..types.exceptions import ContextWindowOverflowException, ModelThrottledException, ProviderTokenCountError
2021
from ..types.streaming import StreamEvent
2122
from ..types.tools import ToolChoice, ToolSpec
@@ -423,13 +424,18 @@ def _format_chunk(self, event: dict[str, Any]) -> StreamEvent:
423424
return {"messageStop": {"stopReason": "end_turn"}}
424425

425426
case "metadata":
427+
usage_data: Usage = {
428+
"inputTokens": event["data"].prompt_token_count,
429+
"outputTokens": event["data"].total_token_count - event["data"].prompt_token_count,
430+
"totalTokens": event["data"].total_token_count,
431+
}
432+
433+
if cached := event["data"].cached_content_token_count:
434+
usage_data["cacheReadInputTokens"] = cached
435+
426436
return {
427437
"metadata": {
428-
"usage": {
429-
"inputTokens": event["data"].prompt_token_count,
430-
"outputTokens": event["data"].total_token_count - event["data"].prompt_token_count,
431-
"totalTokens": event["data"].total_token_count,
432-
},
438+
"usage": usage_data,
433439
"metrics": {
434440
"latencyMs": 0, # TODO
435441
},

tests/strands/models/test_gemini.py

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,57 @@ async def test_stream_response_text(gemini_client, model, messages, agenerator,
569569
assert tru_chunks == exp_chunks
570570

571571

572+
def test_format_chunk_metadata_with_cache_tokens(model):
573+
"""Test _format_chunk for metadata with cache tokens present."""
574+
event = {
575+
"chunk_type": "metadata",
576+
"data": genai.types.GenerateContentResponseUsageMetadata(
577+
prompt_token_count=100,
578+
total_token_count=150,
579+
cached_content_token_count=25,
580+
),
581+
}
582+
583+
result = model._format_chunk(event)
584+
585+
assert result == {
586+
"metadata": {
587+
"usage": {
588+
"inputTokens": 100,
589+
"outputTokens": 50,
590+
"totalTokens": 150,
591+
"cacheReadInputTokens": 25,
592+
},
593+
"metrics": {"latencyMs": 0},
594+
},
595+
}
596+
597+
598+
def test_format_chunk_metadata_with_zero_cached_tokens(model):
599+
"""Test _format_chunk for metadata when cached_content_token_count is 0."""
600+
event = {
601+
"chunk_type": "metadata",
602+
"data": genai.types.GenerateContentResponseUsageMetadata(
603+
prompt_token_count=100,
604+
total_token_count=150,
605+
cached_content_token_count=0,
606+
),
607+
}
608+
609+
result = model._format_chunk(event)
610+
611+
assert result == {
612+
"metadata": {
613+
"usage": {
614+
"inputTokens": 100,
615+
"outputTokens": 50,
616+
"totalTokens": 150,
617+
},
618+
"metrics": {"latencyMs": 0},
619+
},
620+
}
621+
622+
572623
@pytest.mark.asyncio
573624
async def test_stream_response_tool_use(gemini_client, model, messages, agenerator, alist):
574625
gemini_client.aio.models.generate_content_stream.return_value = agenerator(

0 commit comments

Comments
 (0)