3737 "Please report this issue to New Relic Support."
3838)
3939STREAM_PARSING_FAILURE_LOG_MESSAGE = "Exception occurred in Anthropic instrumentation: Failed to process event stream information. Please report this issue to New Relic Support."
40+ TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE = "Exception occurred in llm_token_count_callback for Anthropic %s tokens. Please check your callback implementation and ensure it can handle the provided input. Falling back to token counts from response usage if available." # noqa: S105
4041
4142_logger = logging .getLogger (__name__ )
4243
@@ -425,6 +426,8 @@ def _record_completion_error(*, transaction, linking_metadata, completion_id, kw
425426 request_model = request_model ,
426427 llm_metadata = llm_metadata ,
427428 response_content = None ,
429+ # We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
430+ all_token_counts = True ,
428431 request_timestamp = request_timestamp ,
429432 )
430433 except Exception :
@@ -447,6 +450,7 @@ def _record_completion_success(
447450 request_timestamp = None ,
448451 time_to_first_token = None ,
449452):
453+ settings = transaction .settings or global_settings ()
450454 span_id = linking_metadata .get ("span.id" )
451455 trace_id = linking_metadata .get ("trace.id" )
452456 try :
@@ -455,10 +459,45 @@ def _record_completion_success(
455459 request_temperature = kwargs .get ("temperature" )
456460 request_max_tokens = kwargs .get ("max_tokens" )
457461
458- # TODO: Complete token counting
459- # total_tokens = (
460- # (input_tokens + output_tokens) if (input_tokens is not None and output_tokens is not None) else None
461- # )
462+ # Token counts default to those reported in the response object if available,
463+ # but the user registered callback below may override them.
464+ # Anthropic does not include a total in usage, so it is always recomputed from the parts below.
465+ response_prompt_tokens = input_tokens
466+ response_completion_tokens = output_tokens
467+ response_total_tokens = None
468+
469+ # If the user has registered a callback to compute token counts it should always be preferred.
470+ token_count_callback = settings .ai_monitoring .llm_token_count_callback
471+ if token_count_callback :
472+ input_message_content = " " .join (
473+ content
474+ for msg in messages
475+ if (
476+ content := _extract_message_content (
477+ msg .get ("content" ) if isinstance (msg , dict ) else getattr (msg , "content" , None )
478+ )
479+ )
480+ )
481+ if input_message_content :
482+ try :
483+ response_prompt_tokens = token_count_callback (request_model , input_message_content )
484+ except Exception :
485+ _logger .exception (TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE , "prompt" )
486+ response_text = _extract_message_content (response_content )
487+ if response_text :
488+ try :
489+ response_completion_tokens = token_count_callback (response_model , response_text )
490+ except Exception :
491+ _logger .exception (TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE , "completion" )
492+
493+ # Prefer the sum of individual counts as the total whenever both are available.
494+ # This ensures consistency in the event that the token counting callback has reported
495+ # different values for prompt or completion tokens.
496+ if response_prompt_tokens and response_completion_tokens :
497+ response_total_tokens = response_prompt_tokens + response_completion_tokens
498+
499+ all_token_counts = bool (response_prompt_tokens and response_completion_tokens and response_total_tokens )
500+
462501 number_of_messages = len (messages ) + (1 if response_content else 0 )
463502
464503 full_chat_completion_summary_dict = {
@@ -474,13 +513,15 @@ def _record_completion_success(
474513 "response.model" : response_model ,
475514 "response.choices.finish_reason" : stop_reason ,
476515 "response.number_of_messages" : number_of_messages ,
477- # "response.usage.total_tokens": total_tokens,
478- # "response.usage.prompt_tokens": input_tokens,
479- # "response.usage.completion_tokens": output_tokens,
480516 "timestamp" : request_timestamp ,
481517 "time_to_first_token" : time_to_first_token ,
482518 }
483519
520+ if all_token_counts :
521+ full_chat_completion_summary_dict ["response.usage.prompt_tokens" ] = response_prompt_tokens
522+ full_chat_completion_summary_dict ["response.usage.completion_tokens" ] = response_completion_tokens
523+ full_chat_completion_summary_dict ["response.usage.total_tokens" ] = response_total_tokens
524+
484525 llm_metadata = _get_llm_attributes (transaction )
485526 full_chat_completion_summary_dict .update (llm_metadata )
486527 transaction .record_custom_event ("LlmChatCompletionSummary" , full_chat_completion_summary_dict )
@@ -496,6 +537,7 @@ def _record_completion_success(
496537 request_model = request_model ,
497538 llm_metadata = llm_metadata ,
498539 response_content = response_content ,
540+ all_token_counts = all_token_counts ,
499541 request_timestamp = request_timestamp ,
500542 )
501543 except Exception :
@@ -514,6 +556,7 @@ def create_chat_completion_message_event(
514556 request_model ,
515557 llm_metadata ,
516558 response_content ,
559+ all_token_counts ,
517560 request_timestamp = None ,
518561):
519562 try :
@@ -530,18 +573,15 @@ def create_chat_completion_message_event(
530573 "id" : message_id ,
531574 "span_id" : span_id ,
532575 "trace_id" : trace_id ,
533- "token_count" : (
534- settings .ai_monitoring .llm_token_count_callback (request_model , message_content )
535- if settings .ai_monitoring .llm_token_count_callback and message_content
536- else None
537- ),
538576 "role" : role ,
539577 "completion_id" : completion_id ,
540578 "sequence" : sequence ,
541579 "response.model" : response_model ,
542580 "vendor" : "anthropic" ,
543581 "ingest_source" : "Python" ,
544582 }
583+ if all_token_counts :
584+ input_message_dict ["token_count" ] = 0
545585 if settings .ai_monitoring .record_content .enabled and message_content is not None :
546586 input_message_dict ["content" ] = message_content
547587 if request_timestamp :
@@ -551,26 +591,14 @@ def create_chat_completion_message_event(
551591 transaction .record_custom_event ("LlmChatCompletionMessage" , input_message_dict )
552592
553593 # Record one event for the response
554- if response_content :
594+ response_text = _extract_message_content (response_content )
595+ if response_text :
555596 response_sequence = len (messages )
556- # response_content may be a plain string (streaming path) or a list of content blocks (non-streaming).
557- if isinstance (response_content , str ):
558- response_text = response_content
559- else :
560- response_text = " " .join (
561- block .text for block in response_content if getattr (block , "type" , None ) == "text"
562- )
563-
564597 response_message_id = f"{ response_id } -{ response_sequence } " if response_id else str (uuid .uuid4 ())
565598 output_message_dict = {
566599 "id" : response_message_id ,
567600 "span_id" : span_id ,
568601 "trace_id" : trace_id ,
569- "token_count" : (
570- settings .ai_monitoring .llm_token_count_callback (response_model , response_text )
571- if settings .ai_monitoring .llm_token_count_callback and response_text
572- else None
573- ),
574602 "role" : "assistant" ,
575603 "completion_id" : completion_id ,
576604 "sequence" : response_sequence ,
@@ -579,6 +607,8 @@ def create_chat_completion_message_event(
579607 "ingest_source" : "Python" ,
580608 "is_response" : True ,
581609 }
610+ if all_token_counts :
611+ output_message_dict ["token_count" ] = 0
582612 if settings .ai_monitoring .record_content .enabled and response_text :
583613 output_message_dict ["content" ] = response_text
584614
0 commit comments