5454 "anthropic.claude" ,
5555]
5656
57+ # Cache of model IDs that do not support the CountTokens API.
58+ _UNSUPPORTED_COUNT_TOKENS_MODELS : set [str ] = set ()
59+
60+
61+ def _clear_unsupported_count_tokens_cache () -> None :
62+ """Clear the cache of model IDs that do not support the CountTokens API."""
63+ _UNSUPPORTED_COUNT_TOKENS_MODELS .clear ()
64+
65+
5766T = TypeVar ("T" , bound = BaseModel )
5867
5968DEFAULT_READ_TIMEOUT = 120
@@ -784,6 +793,11 @@ async def count_tokens(
784793 Returns:
785794 Total input token count.
786795 """
796+ model_id : str = self .config ["model_id" ]
797+
798+ if model_id in _UNSUPPORTED_COUNT_TOKENS_MODELS :
799+ return await super ().count_tokens (messages , tool_specs , system_prompt , system_prompt_content )
800+
787801 try :
788802 if system_prompt and system_prompt_content is None :
789803 system_prompt_content = [{"text" : system_prompt }]
@@ -810,11 +824,23 @@ async def count_tokens(
810824 logger .debug ("model_id=<%s>, total_tokens=<%d> | native token count" , self .config ["model_id" ], total_tokens )
811825 return total_tokens
812826 except Exception as e :
813- logger .debug (
814- "model_id=<%s>, error=<%s> | native token counting failed, falling back to estimation" ,
815- self .config ["model_id" ],
816- e ,
817- )
827+ if (
828+ isinstance (e , ClientError )
829+ and e .response .get ("Error" , {}).get ("Code" ) == "ValidationException"
830+ and "doesn't support counting tokens" in str (e )
831+ ):
832+ logger .debug (
833+ "model_id=<%s> | model does not support CountTokens, caching for future calls,"
834+ " falling back to estimation" ,
835+ model_id ,
836+ )
837+ _UNSUPPORTED_COUNT_TOKENS_MODELS .add (model_id )
838+ else :
839+ logger .debug (
840+ "model_id=<%s>, error=<%s> | native token counting failed, falling back to estimation" ,
841+ model_id ,
842+ e ,
843+ )
818844 return await super ().count_tokens (messages , tool_specs , system_prompt , system_prompt_content )
819845
820846 @override
0 commit comments