@@ -617,24 +617,6 @@ def _get_model_name_from_response(response: LLMResult) -> str | None:
617617 return model_name
618618
619619
620- def _cache_tokens_are_separate_from_input_tokens (input_token_details : dict [str , Any ]) -> bool :
621- # LangChain provider packages use different cache-token conventions:
622- # - OpenAI-style responses report cache reads as a subset of input_tokens.
623- # - Anthropic-style responses report cache reads/creation separately from input_tokens.
624- #
625- # Avoid provider-name checks here so any LangChain integration using the same
626- # "separate cache tokens" schema gets normalized, while providers that only
627- # expose cache_read as input-token detail do not get double-counted.
628- return any (
629- key in input_token_details
630- for key in (
631- "cache_creation" ,
632- "ephemeral_5m_input_tokens" ,
633- "ephemeral_1h_input_tokens" ,
634- )
635- )
636-
637-
638620def _get_metrics_from_response (response : LLMResult ):
639621 metrics = {}
640622
@@ -685,15 +667,16 @@ def _get_metrics_from_response(response: LLMResult):
685667 completion_tokens = metrics .get ("completion_tokens" )
686668 total_tokens = metrics .get ("total_tokens" )
687669 if prompt_tokens is not None and completion_tokens is not None :
688- if (
689- cache_tokens
690- and total_tokens == prompt_tokens + completion_tokens
691- and _cache_tokens_are_separate_from_input_tokens (input_token_details )
692- ):
670+ # LangChain's UsageMetadata contract makes input_token_details a
671+ # breakdown of input_tokens, so cache tokens already count toward
672+ # the prompt total (langchain-anthropic >= 0.2.3, langchain-aws,
673+ # langchain-openai all comply). Cache tokens exceeding the prompt
674+ # total means the integration reported uncached input only — fold
675+ # cache tokens back in so prompt/total stay internally consistent.
676+ if cache_tokens > prompt_tokens and total_tokens == prompt_tokens + completion_tokens :
693677 prompt_tokens += cache_tokens
694678 metrics ["prompt_tokens" ] = prompt_tokens
695- if total_tokens is not None :
696- metrics ["total_tokens" ] = total_tokens + cache_tokens
679+ metrics ["total_tokens" ] = total_tokens + cache_tokens
697680 metrics ["tokens" ] = prompt_tokens + completion_tokens
698681
699682 if not metrics or not any (metrics .values ()):
0 commit comments