Skip to content

Commit 1f2b421

Browse files
authored
Anthropic Token Counting (#1749)
* New testing utils * Anthropic token counting * Add error message for token counting failures
1 parent 0699eca commit 1f2b421

4 files changed

Lines changed: 94 additions & 30 deletions

File tree

newrelic/hooks/mlmodel_anthropic.py

Lines changed: 56 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
"Please report this issue to New Relic Support."
3838
)
3939
STREAM_PARSING_FAILURE_LOG_MESSAGE = "Exception occurred in Anthropic instrumentation: Failed to process event stream information. Please report this issue to New Relic Support."
40+
TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE = "Exception occurred in llm_token_count_callback for Anthropic %s tokens. Please check your callback implementation and ensure it can handle the provided input. Falling back to token counts from response usage if available." # noqa: S105
4041

4142
_logger = logging.getLogger(__name__)
4243

@@ -425,6 +426,8 @@ def _record_completion_error(*, transaction, linking_metadata, completion_id, kw
425426
request_model=request_model,
426427
llm_metadata=llm_metadata,
427428
response_content=None,
429+
# We do not record token counts in error cases, so set all_token_counts to True so the pipeline tokenizer does not run
430+
all_token_counts=True,
428431
request_timestamp=request_timestamp,
429432
)
430433
except Exception:
@@ -447,6 +450,7 @@ def _record_completion_success(
447450
request_timestamp=None,
448451
time_to_first_token=None,
449452
):
453+
settings = transaction.settings or global_settings()
450454
span_id = linking_metadata.get("span.id")
451455
trace_id = linking_metadata.get("trace.id")
452456
try:
@@ -455,10 +459,45 @@ def _record_completion_success(
455459
request_temperature = kwargs.get("temperature")
456460
request_max_tokens = kwargs.get("max_tokens")
457461

458-
# TODO: Complete token counting
459-
# total_tokens = (
460-
# (input_tokens + output_tokens) if (input_tokens is not None and output_tokens is not None) else None
461-
# )
462+
# Token counts default to those reported in the response object if available,
463+
# but the user registered callback below may override them.
464+
# Anthropic does not include a total in usage, so it is always recomputed from the parts below.
465+
response_prompt_tokens = input_tokens
466+
response_completion_tokens = output_tokens
467+
response_total_tokens = None
468+
469+
# If the user has registered a callback to compute token counts it should always be preferred.
470+
token_count_callback = settings.ai_monitoring.llm_token_count_callback
471+
if token_count_callback:
472+
input_message_content = " ".join(
473+
content
474+
for msg in messages
475+
if (
476+
content := _extract_message_content(
477+
msg.get("content") if isinstance(msg, dict) else getattr(msg, "content", None)
478+
)
479+
)
480+
)
481+
if input_message_content:
482+
try:
483+
response_prompt_tokens = token_count_callback(request_model, input_message_content)
484+
except Exception:
485+
_logger.exception(TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE, "prompt")
486+
response_text = _extract_message_content(response_content)
487+
if response_text:
488+
try:
489+
response_completion_tokens = token_count_callback(response_model, response_text)
490+
except Exception:
491+
_logger.exception(TOKEN_COUNTING_CALLBACK_FAILURE_LOG_MESSAGE, "completion")
492+
493+
# Prefer the sum of individual counts as the total whenever both are available.
494+
# This ensures consistency in the event that the token counting callback has reported
495+
# different values for prompt or completion tokens.
496+
if response_prompt_tokens and response_completion_tokens:
497+
response_total_tokens = response_prompt_tokens + response_completion_tokens
498+
499+
all_token_counts = bool(response_prompt_tokens and response_completion_tokens and response_total_tokens)
500+
462501
number_of_messages = len(messages) + (1 if response_content else 0)
463502

464503
full_chat_completion_summary_dict = {
@@ -474,13 +513,15 @@ def _record_completion_success(
474513
"response.model": response_model,
475514
"response.choices.finish_reason": stop_reason,
476515
"response.number_of_messages": number_of_messages,
477-
# "response.usage.total_tokens": total_tokens,
478-
# "response.usage.prompt_tokens": input_tokens,
479-
# "response.usage.completion_tokens": output_tokens,
480516
"timestamp": request_timestamp,
481517
"time_to_first_token": time_to_first_token,
482518
}
483519

520+
if all_token_counts:
521+
full_chat_completion_summary_dict["response.usage.prompt_tokens"] = response_prompt_tokens
522+
full_chat_completion_summary_dict["response.usage.completion_tokens"] = response_completion_tokens
523+
full_chat_completion_summary_dict["response.usage.total_tokens"] = response_total_tokens
524+
484525
llm_metadata = _get_llm_attributes(transaction)
485526
full_chat_completion_summary_dict.update(llm_metadata)
486527
transaction.record_custom_event("LlmChatCompletionSummary", full_chat_completion_summary_dict)
@@ -496,6 +537,7 @@ def _record_completion_success(
496537
request_model=request_model,
497538
llm_metadata=llm_metadata,
498539
response_content=response_content,
540+
all_token_counts=all_token_counts,
499541
request_timestamp=request_timestamp,
500542
)
501543
except Exception:
@@ -514,6 +556,7 @@ def create_chat_completion_message_event(
514556
request_model,
515557
llm_metadata,
516558
response_content,
559+
all_token_counts,
517560
request_timestamp=None,
518561
):
519562
try:
@@ -530,18 +573,15 @@ def create_chat_completion_message_event(
530573
"id": message_id,
531574
"span_id": span_id,
532575
"trace_id": trace_id,
533-
"token_count": (
534-
settings.ai_monitoring.llm_token_count_callback(request_model, message_content)
535-
if settings.ai_monitoring.llm_token_count_callback and message_content
536-
else None
537-
),
538576
"role": role,
539577
"completion_id": completion_id,
540578
"sequence": sequence,
541579
"response.model": response_model,
542580
"vendor": "anthropic",
543581
"ingest_source": "Python",
544582
}
583+
if all_token_counts:
584+
input_message_dict["token_count"] = 0
545585
if settings.ai_monitoring.record_content.enabled and message_content is not None:
546586
input_message_dict["content"] = message_content
547587
if request_timestamp:
@@ -551,26 +591,14 @@ def create_chat_completion_message_event(
551591
transaction.record_custom_event("LlmChatCompletionMessage", input_message_dict)
552592

553593
# Record one event for the response
554-
if response_content:
594+
response_text = _extract_message_content(response_content)
595+
if response_text:
555596
response_sequence = len(messages)
556-
# response_content may be a plain string (streaming path) or a list of content blocks (non-streaming).
557-
if isinstance(response_content, str):
558-
response_text = response_content
559-
else:
560-
response_text = " ".join(
561-
block.text for block in response_content if getattr(block, "type", None) == "text"
562-
)
563-
564597
response_message_id = f"{response_id}-{response_sequence}" if response_id else str(uuid.uuid4())
565598
output_message_dict = {
566599
"id": response_message_id,
567600
"span_id": span_id,
568601
"trace_id": trace_id,
569-
"token_count": (
570-
settings.ai_monitoring.llm_token_count_callback(response_model, response_text)
571-
if settings.ai_monitoring.llm_token_count_callback and response_text
572-
else None
573-
),
574602
"role": "assistant",
575603
"completion_id": completion_id,
576604
"sequence": response_sequence,
@@ -579,6 +607,8 @@ def create_chat_completion_message_event(
579607
"ingest_source": "Python",
580608
"is_response": True,
581609
}
610+
if all_token_counts:
611+
output_message_dict["token_count"] = 0
582612
if settings.ai_monitoring.record_content.enabled and response_text:
583613
output_message_dict["content"] = response_text
584614

tests/mlmodel_anthropic/test_chat_completion.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from conftest import ANTHROPIC_VERSION_METRIC
1717
from testing_support.fixtures import override_llm_token_callback_settings, reset_core_stats_engine, validate_attributes
1818
from testing_support.ml_testing_utils import (
19-
add_token_count_to_events,
19+
add_token_counts_to_chat_events,
2020
disabled_ai_monitoring_record_content_settings,
2121
disabled_ai_monitoring_settings,
2222
events_sans_content,
@@ -51,6 +51,9 @@ def chat_completion_events(is_streaming):
5151
"response.model": "claude-sonnet-4-5-20250929",
5252
"request.temperature": 0.7,
5353
"request.max_tokens": 100,
54+
"response.usage.prompt_tokens": 16,
55+
"response.usage.completion_tokens": 26,
56+
"response.usage.total_tokens": 42,
5457
"response.choices.finish_reason": "end_turn",
5558
"vendor": "anthropic",
5659
"ingest_source": "Python",
@@ -71,6 +74,7 @@ def chat_completion_events(is_streaming):
7174
"completion_id": None,
7275
"sequence": 0,
7376
"response.model": "claude-sonnet-4-5-20250929",
77+
"token_count": 0,
7478
"vendor": "anthropic",
7579
"ingest_source": "Python",
7680
},
@@ -88,6 +92,7 @@ def chat_completion_events(is_streaming):
8892
"completion_id": None,
8993
"sequence": 1,
9094
"response.model": "claude-sonnet-4-5-20250929",
95+
"token_count": 0,
9196
"vendor": "anthropic",
9297
"is_response": True,
9398
"ingest_source": "Python",
@@ -238,7 +243,7 @@ def _test():
238243
def test_anthropic_chat_completion_with_token_count(
239244
exercise_model, chat_completion_metrics, set_trace_info, chat_completion_events
240245
):
241-
@validate_custom_events(add_token_count_to_events(chat_completion_events))
246+
@validate_custom_events(add_token_counts_to_chat_events(chat_completion_events))
242247
@validate_custom_event_count(count=3)
243248
@validate_transaction_metrics(
244249
name="test_anthropic_chat_completion_with_token_count",

tests/mlmodel_anthropic/test_chat_completion_error.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
from conftest import ANTHROPIC_VERSION_METRIC
2020
from testing_support.fixtures import dt_enabled, override_llm_token_callback_settings, reset_core_stats_engine
2121
from testing_support.ml_testing_utils import (
22-
add_token_count_to_events,
2322
disabled_ai_monitoring_record_content_settings,
2423
events_sans_content,
2524
events_with_context_attrs,
@@ -69,6 +68,7 @@
6968
"role": "user",
7069
"completion_id": None,
7170
"sequence": 0,
71+
"token_count": 0,
7272
"vendor": "anthropic",
7373
"ingest_source": "Python",
7474
},
@@ -190,6 +190,7 @@ def _test():
190190
"completion_id": None,
191191
"response.model": "does-not-exist",
192192
"sequence": 0,
193+
"token_count": 0,
193194
"vendor": "anthropic",
194195
"ingest_source": "Python",
195196
},
@@ -230,7 +231,7 @@ def test_chat_completion_invalid_request_error_invalid_model_with_token_count(
230231
custom_metrics=[(ANTHROPIC_VERSION_METRIC, 1)],
231232
background_task=True,
232233
)
233-
@validate_custom_events(add_token_count_to_events(expected_events_on_invalid_model_error))
234+
@validate_custom_events(expected_events_on_invalid_model_error)
234235
@validate_custom_event_count(count=2)
235236
@background_task(name="test_chat_completion_invalid_request_error_invalid_model_with_token_count")
236237
def _test():
@@ -277,6 +278,7 @@ def _test():
277278
"response.model": "claude-4-5-sonnet",
278279
"completion_id": None,
279280
"sequence": 0,
281+
"token_count": 0,
280282
"vendor": "anthropic",
281283
"ingest_source": "Python",
282284
},

tests/testing_support/ml_testing_utils.py

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ def llm_token_count_callback(model, content):
2929
return 105
3030

3131

32+
# This will be removed once all LLM instrumentations have been converted to use new token count design
3233
def add_token_count_to_events(expected_events):
3334
events = copy.deepcopy(expected_events)
3435
for event in events:
@@ -37,6 +38,32 @@ def add_token_count_to_events(expected_events):
3738
return events
3839

3940

41+
def add_token_count_to_embedding_events(expected_events):
42+
events = copy.deepcopy(expected_events)
43+
for event in events:
44+
if event[0]["type"] == "LlmEmbedding":
45+
event[1]["response.usage.total_tokens"] = 105
46+
return events
47+
48+
49+
def add_token_count_streaming_events(expected_events):
50+
events = copy.deepcopy(expected_events)
51+
for event in events:
52+
if event[0]["type"] == "LlmChatCompletionMessage":
53+
event[1]["token_count"] = 0
54+
return events
55+
56+
57+
def add_token_counts_to_chat_events(expected_events):
58+
events = copy.deepcopy(expected_events)
59+
for event in events:
60+
if event[0]["type"] == "LlmChatCompletionSummary":
61+
event[1]["response.usage.prompt_tokens"] = 105
62+
event[1]["response.usage.completion_tokens"] = 105
63+
event[1]["response.usage.total_tokens"] = 210
64+
return events
65+
66+
4067
def events_sans_content(event):
4168
new_event = copy.deepcopy(event)
4269
for _event in new_event:

0 commit comments

Comments
 (0)