@@ -550,12 +550,9 @@ def _convert_google_genai_response_to_chatmessage(response: types.GenerateConten
550550 usage ["thoughts_token_count" ] = usage_metadata .thoughts_token_count
551551
552552 # Add cached content token count if available (implicit or explicit context caching)
553- if (
554- usage_metadata
555- and hasattr (usage_metadata , "cached_content_token_count" )
556- and usage_metadata .cached_content_token_count
557- ):
558- usage ["cached_content_token_count" ] = usage_metadata .cached_content_token_count
553+ cached_content_token_count = getattr (usage_metadata , "cached_content_token_count" , None ) if usage_metadata else None
554+ if cached_content_token_count is not None :
555+ usage ["cached_content_token_count" ] = cached_content_token_count
559556
560557 usage .update (_convert_usage_metadata_to_serializable (usage_metadata ))
561558
@@ -625,6 +622,11 @@ def _convert_google_chunk_to_streaming_chunk(
625622 if usage_metadata and hasattr (usage_metadata , "thoughts_token_count" ) and usage_metadata .thoughts_token_count :
626623 usage ["thoughts_token_count" ] = usage_metadata .thoughts_token_count
627624
625+ # Add cached content token count if available (context caching)
626+ cached_content_token_count = getattr (usage_metadata , "cached_content_token_count" , None ) if usage_metadata else None
627+ if cached_content_token_count is not None :
628+ usage ["cached_content_token_count" ] = cached_content_token_count
629+
628630 if candidate .content and candidate .content .parts :
629631 tc_index = - 1
630632 for part_index , part in enumerate (candidate .content .parts ):
@@ -717,6 +719,7 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) ->
717719 reasoning_text_parts : list [str ] = []
718720 thought_signatures : list [dict [str , Any ]] = []
719721 thoughts_token_count = None
722+ cached_content_token_count = None
720723
721724 for chunk in chunks :
722725 # Extract reasoning from the StreamingChunk.reasoning field
@@ -731,18 +734,26 @@ def _aggregate_streaming_chunks_with_reasoning(chunks: list[StreamingChunk]) ->
731734 # We'll keep the last set of signatures as they represent the complete state
732735 thought_signatures = signature_deltas
733736
734- # Extract thinking token usage (from the last chunk that has it)
737+ # Extract token usage metadata (from the last chunk that has it)
735738 if chunk .meta and "usage" in chunk .meta :
736739 chunk_usage = chunk .meta ["usage" ]
737740 if "thoughts_token_count" in chunk_usage :
738741 thoughts_token_count = chunk_usage ["thoughts_token_count" ]
742+ if "cached_content_token_count" in chunk_usage :
743+ cached_content_token_count = chunk_usage ["cached_content_token_count" ]
739744
740745 # Add thinking token count to usage if present
741746 if thoughts_token_count is not None and "usage" in message .meta :
742747 if message .meta ["usage" ] is None :
743748 message .meta ["usage" ] = {}
744749 message .meta ["usage" ]["thoughts_token_count" ] = thoughts_token_count
745750
751+ # Add cached content token count to usage if present
752+ if cached_content_token_count is not None and "usage" in message .meta :
753+ if message .meta ["usage" ] is None :
754+ message .meta ["usage" ] = {}
755+ message .meta ["usage" ]["cached_content_token_count" ] = cached_content_token_count
756+
746757 # Add thought signatures to meta if present (for multi-turn context preservation)
747758 if thought_signatures :
748759 message .meta ["thought_signatures" ] = thought_signatures
0 commit comments