Skip to content

Commit d7e8d22

Browse files
Python: Fix Python OTel usage detail attributes (#6493)
* fix python otel usage detail attributes Map cached/read/reasoning usage detail fields to standard OTel GenAI attributes while preserving provider-specific legacy keys. Add focused coverage for direct response spans, aggregated agent spans, and provider usage parsing. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * address usage detail review feedback Omit missing OpenAI Responses usage detail counts while preserving zero-valued counts. Record zero-valued token usage in OTel histograms and add regression coverage. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent d7027fc commit d7e8d22

9 files changed

Lines changed: 239 additions & 23 deletions

File tree

python/packages/anthropic/agent_framework_anthropic/_chat_client.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1024,8 +1024,10 @@ def _parse_usage_from_anthropic(self, usage: BetaUsage | BetaMessageDeltaUsage |
10241024
usage_details["input_token_count"] = usage.input_tokens
10251025
if usage.cache_creation_input_tokens is not None:
10261026
usage_details["anthropic.cache_creation_input_tokens"] = usage.cache_creation_input_tokens # type: ignore[typeddict-unknown-key]
1027+
usage_details["cache_creation_input_token_count"] = usage.cache_creation_input_tokens
10271028
if usage.cache_read_input_tokens is not None:
10281029
usage_details["anthropic.cache_read_input_tokens"] = usage.cache_read_input_tokens # type: ignore[typeddict-unknown-key]
1030+
usage_details["cache_read_input_token_count"] = usage.cache_read_input_tokens
10291031
return usage_details
10301032

10311033
def _parse_contents_from_anthropic(

python/packages/anthropic/tests/test_anthropic_client.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2354,6 +2354,27 @@ def test_parse_usage_with_cache_tokens(mock_anthropic_client: MagicMock) -> None
23542354
assert result["input_token_count"] == 100
23552355
assert result["anthropic.cache_creation_input_tokens"] == 20
23562356
assert result["anthropic.cache_read_input_tokens"] == 30
2357+
assert result["cache_creation_input_token_count"] == 20
2358+
assert result["cache_read_input_token_count"] == 30
2359+
2360+
2361+
def test_parse_usage_preserves_zero_cache_tokens(mock_anthropic_client: MagicMock) -> None:
2362+
"""Test parsing usage preserves zero-valued mapped cache tokens."""
2363+
client = create_test_anthropic_client(mock_anthropic_client)
2364+
2365+
mock_usage = MagicMock()
2366+
mock_usage.input_tokens = 100
2367+
mock_usage.output_tokens = 50
2368+
mock_usage.cache_creation_input_tokens = 0
2369+
mock_usage.cache_read_input_tokens = 0
2370+
2371+
result = client._parse_usage_from_anthropic(mock_usage)
2372+
2373+
assert result is not None
2374+
assert result["anthropic.cache_creation_input_tokens"] == 0
2375+
assert result["cache_creation_input_token_count"] == 0
2376+
assert result["anthropic.cache_read_input_tokens"] == 0
2377+
assert result["cache_read_input_token_count"] == 0
23572378

23582379

23592380
# Code Execution Result Tests

python/packages/core/agent_framework/_types.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,12 +400,18 @@ class UsageDetails(TypedDict, total=False, extra_items=int): # type: ignore[cal
400400
input_token_count: The number of input tokens used.
401401
output_token_count: The number of output tokens generated.
402402
total_token_count: The total number of tokens (input + output).
403+
cache_creation_input_token_count: The number of input tokens written to a provider-managed cache.
404+
cache_read_input_token_count: The number of input tokens served from a provider-managed cache.
405+
reasoning_output_token_count: The number of output tokens used for reasoning.
403406
404407
"""
405408

406409
input_token_count: int | None
407410
output_token_count: int | None
408411
total_token_count: int | None
412+
cache_creation_input_token_count: int | None
413+
cache_read_input_token_count: int | None
414+
reasoning_output_token_count: int | None
409415

410416

411417
def add_usage_details(usage1: UsageDetails | None, usage2: UsageDetails | None) -> UsageDetails:

python/packages/core/agent_framework/observability.py

Lines changed: 30 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -201,6 +201,9 @@ class OtelAttr(str, Enum):
201201
# Usage attributes
202202
INPUT_TOKENS = "gen_ai.usage.input_tokens"
203203
OUTPUT_TOKENS = "gen_ai.usage.output_tokens"
204+
CACHE_CREATION_INPUT_TOKENS = "gen_ai.usage.cache_creation.input_tokens"
205+
CACHE_READ_INPUT_TOKENS = "gen_ai.usage.cache_read.input_tokens"
206+
REASONING_OUTPUT_TOKENS = "gen_ai.usage.reasoning.output_tokens"
204207
# Tool attributes
205208
TOOL_CALL_ID = "gen_ai.tool.call.id"
206209
TOOL_DESCRIPTION = "gen_ai.tool.description"
@@ -327,6 +330,20 @@ def __str__(self) -> str:
327330
"tool_calls": "tool_call",
328331
"length": "length",
329332
}
333+
USAGE_DETAIL_TO_OTEL_ATTR: Final[tuple[tuple[str, OtelAttr], ...]] = (
334+
("input_token_count", OtelAttr.INPUT_TOKENS),
335+
("output_token_count", OtelAttr.OUTPUT_TOKENS),
336+
("cache_creation_input_token_count", OtelAttr.CACHE_CREATION_INPUT_TOKENS),
337+
("cache_read_input_token_count", OtelAttr.CACHE_READ_INPUT_TOKENS),
338+
("reasoning_output_token_count", OtelAttr.REASONING_OUTPUT_TOKENS),
339+
("anthropic.cache_creation_input_tokens", OtelAttr.CACHE_CREATION_INPUT_TOKENS),
340+
("anthropic.cache_read_input_tokens", OtelAttr.CACHE_READ_INPUT_TOKENS),
341+
("openai.cached_input_tokens", OtelAttr.CACHE_READ_INPUT_TOKENS),
342+
("prompt/cached_tokens", OtelAttr.CACHE_READ_INPUT_TOKENS),
343+
("openai.reasoning_tokens", OtelAttr.REASONING_OUTPUT_TOKENS),
344+
("completion/reasoning_tokens", OtelAttr.REASONING_OUTPUT_TOKENS),
345+
("reasoning_tokens", OtelAttr.REASONING_OUTPUT_TOKENS),
346+
)
330347

331348

332349
# region Telemetry utils
@@ -2350,12 +2367,16 @@ def _apply_accumulated_usage(attributes: dict[str, Any], captured_fields: set[st
23502367
accumulated = INNER_ACCUMULATED_USAGE.get()
23512368
if not accumulated:
23522369
return
2353-
input_tokens = accumulated.get("input_token_count")
2354-
if input_tokens:
2355-
attributes[OtelAttr.INPUT_TOKENS] = input_tokens
2356-
output_tokens = accumulated.get("output_token_count")
2357-
if output_tokens:
2358-
attributes[OtelAttr.OUTPUT_TOKENS] = output_tokens
2370+
_apply_usage_attributes(attributes, accumulated)
2371+
2372+
2373+
def _apply_usage_attributes(attributes: dict[str, Any], usage: Mapping[str, Any]) -> None:
2374+
"""Apply known usage details as standard OTel GenAI attributes."""
2375+
for usage_key, otel_attr in USAGE_DETAIL_TO_OTEL_ATTR:
2376+
value = usage.get(usage_key)
2377+
if value is None or isinstance(value, bool) or not isinstance(value, int):
2378+
continue
2379+
attributes.setdefault(otel_attr, value)
23592380

23602381

23612382
def _get_response_attributes(
@@ -2378,12 +2399,7 @@ def _get_response_attributes(
23782399
if model := getattr(response, "model", None):
23792400
attributes[OtelAttr.RESPONSE_MODEL] = model
23802401
if capture_usage and (usage := response.usage_details):
2381-
input_tokens = usage.get("input_token_count")
2382-
if input_tokens:
2383-
attributes[OtelAttr.INPUT_TOKENS] = input_tokens
2384-
output_tokens = usage.get("output_token_count")
2385-
if output_tokens:
2386-
attributes[OtelAttr.OUTPUT_TOKENS] = output_tokens
2402+
_apply_usage_attributes(attributes, usage)
23872403
return attributes
23882404

23892405

@@ -2407,9 +2423,9 @@ def _capture_response(
24072423
"""Set the response for a given span."""
24082424
span.set_attributes(attributes)
24092425
attrs: dict[str, Any] = {k: v for k, v in attributes.items() if k in GEN_AI_METRIC_ATTRIBUTES}
2410-
if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)):
2426+
if token_usage_histogram and (input_tokens := attributes.get(OtelAttr.INPUT_TOKENS)) is not None:
24112427
token_usage_histogram.record(input_tokens, attributes={**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_INPUT})
2412-
if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)):
2428+
if token_usage_histogram and (output_tokens := attributes.get(OtelAttr.OUTPUT_TOKENS)) is not None:
24132429
token_usage_histogram.record(output_tokens, {**attrs, OtelAttr.T_TYPE: OtelAttr.T_TYPE_OUTPUT})
24142430
if operation_duration_histogram and duration is not None:
24152431
if OtelAttr.ERROR_TYPE in attributes:

python/packages/core/tests/core/test_observability.py

Lines changed: 97 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,58 @@ def test_get_response_attributes_with_usage():
21542154
assert result[OtelAttr.OUTPUT_TOKENS] == 50
21552155

21562156

2157+
def test_get_response_attributes_with_additional_usage():
2158+
"""Test _get_response_attributes maps additional usage details to OTel attributes."""
2159+
from unittest.mock import Mock
2160+
2161+
from agent_framework.observability import OtelAttr, _get_response_attributes
2162+
2163+
response = Mock()
2164+
response.response_id = None
2165+
response.finish_reason = None
2166+
response.raw_representation = None
2167+
response.usage_details = {
2168+
"input_token_count": 0,
2169+
"output_token_count": 50,
2170+
"cache_creation_input_token_count": 10,
2171+
"cache_read_input_token_count": 0,
2172+
"reasoning_output_token_count": 30,
2173+
}
2174+
2175+
attrs = {}
2176+
result = _get_response_attributes(attrs, response)
2177+
2178+
assert result[OtelAttr.INPUT_TOKENS] == 0
2179+
assert result[OtelAttr.OUTPUT_TOKENS] == 50
2180+
assert result[OtelAttr.CACHE_CREATION_INPUT_TOKENS] == 10
2181+
assert result[OtelAttr.CACHE_READ_INPUT_TOKENS] == 0
2182+
assert result[OtelAttr.REASONING_OUTPUT_TOKENS] == 30
2183+
2184+
2185+
def test_get_response_attributes_maps_legacy_usage_keys():
2186+
"""Test _get_response_attributes maps legacy provider usage keys to standard OTel attributes."""
2187+
from unittest.mock import Mock
2188+
2189+
from agent_framework.observability import OtelAttr, _get_response_attributes
2190+
2191+
response = Mock()
2192+
response.response_id = None
2193+
response.finish_reason = None
2194+
response.raw_representation = None
2195+
response.usage_details = {
2196+
"anthropic.cache_creation_input_tokens": 12,
2197+
"openai.cached_input_tokens": 0,
2198+
"completion/reasoning_tokens": 34,
2199+
}
2200+
2201+
attrs = {}
2202+
result = _get_response_attributes(attrs, response)
2203+
2204+
assert result[OtelAttr.CACHE_CREATION_INPUT_TOKENS] == 12
2205+
assert result[OtelAttr.CACHE_READ_INPUT_TOKENS] == 0
2206+
assert result[OtelAttr.REASONING_OUTPUT_TOKENS] == 34
2207+
2208+
21572209
def test_get_response_attributes_capture_usage_false():
21582210
"""Test _get_response_attributes skips usage when capture_usage is False."""
21592211
from unittest.mock import Mock
@@ -2164,13 +2216,22 @@ def test_get_response_attributes_capture_usage_false():
21642216
response.response_id = None
21652217
response.finish_reason = None
21662218
response.raw_representation = None
2167-
response.usage_details = {"input_token_count": 100, "output_token_count": 50}
2219+
response.usage_details = {
2220+
"input_token_count": 100,
2221+
"output_token_count": 50,
2222+
"cache_creation_input_token_count": 10,
2223+
"cache_read_input_token_count": 20,
2224+
"reasoning_output_token_count": 30,
2225+
}
21682226

21692227
attrs = {}
21702228
result = _get_response_attributes(attrs, response, capture_usage=False)
21712229

21722230
assert OtelAttr.INPUT_TOKENS not in result
21732231
assert OtelAttr.OUTPUT_TOKENS not in result
2232+
assert OtelAttr.CACHE_CREATION_INPUT_TOKENS not in result
2233+
assert OtelAttr.CACHE_READ_INPUT_TOKENS not in result
2234+
assert OtelAttr.REASONING_OUTPUT_TOKENS not in result
21742235

21752236

21762237
def test_get_response_attributes_capture_response_id_false():
@@ -2933,6 +2994,23 @@ def test_capture_response(span_exporter: InMemorySpanExporter):
29332994
assert spans[0].attributes.get(OtelAttr.OUTPUT_TOKENS) == 50
29342995

29352996

2997+
def test_capture_response_records_zero_token_usage():
2998+
"""Test _capture_response records zero-valued token usage."""
2999+
from agent_framework.observability import OtelAttr, _capture_response
3000+
3001+
span = Mock()
3002+
token_histogram = Mock()
3003+
attrs = {
3004+
OtelAttr.INPUT_TOKENS: 0,
3005+
OtelAttr.OUTPUT_TOKENS: 0,
3006+
}
3007+
3008+
_capture_response(span=span, attributes=attrs, token_usage_histogram=token_histogram)
3009+
3010+
span.set_attributes.assert_called_once_with(attrs)
3011+
assert token_histogram.record.call_count == 2
3012+
3013+
29363014
async def test_layer_ordering_span_sequence_with_function_calling(span_exporter: InMemorySpanExporter):
29373015
"""Test that with correct layer ordering, spans appear in the expected sequence.
29383016
@@ -3937,11 +4015,21 @@ class _InstrumentedAgent(AgentTelemetryLayer, RawAgent):
39374015
Content.from_function_call(call_id="call_1", name="get_weather", arguments='{"city": "Seattle"}')
39384016
],
39394017
),
3940-
usage_details=UsageDetails(input_token_count=2239, output_token_count=192),
4018+
usage_details=UsageDetails(
4019+
input_token_count=2239,
4020+
output_token_count=192,
4021+
cache_read_input_token_count=100,
4022+
reasoning_output_token_count=25,
4023+
),
39414024
),
39424025
ChatResponse(
39434026
messages=Message(role="assistant", contents=["The weather in Seattle is sunny."]),
3944-
usage_details=UsageDetails(input_token_count=2569, output_token_count=99),
4027+
usage_details=UsageDetails(
4028+
input_token_count=2569,
4029+
output_token_count=99,
4030+
cache_read_input_token_count=200,
4031+
reasoning_output_token_count=0,
4032+
),
39454033
),
39464034
]
39474035

@@ -3965,12 +4053,18 @@ class _InstrumentedAgent(AgentTelemetryLayer, RawAgent):
39654053
# Individual chat spans retain their own usage
39664054
assert chat_spans[0].attributes.get(OtelAttr.INPUT_TOKENS) == 2239
39674055
assert chat_spans[0].attributes.get(OtelAttr.OUTPUT_TOKENS) == 192
4056+
assert chat_spans[0].attributes.get(OtelAttr.CACHE_READ_INPUT_TOKENS) == 100
4057+
assert chat_spans[0].attributes.get(OtelAttr.REASONING_OUTPUT_TOKENS) == 25
39684058
assert chat_spans[1].attributes.get(OtelAttr.INPUT_TOKENS) == 2569
39694059
assert chat_spans[1].attributes.get(OtelAttr.OUTPUT_TOKENS) == 99
4060+
assert chat_spans[1].attributes.get(OtelAttr.CACHE_READ_INPUT_TOKENS) == 200
4061+
assert chat_spans[1].attributes.get(OtelAttr.REASONING_OUTPUT_TOKENS) == 0
39704062

39714063
# The invoke_agent span must report the aggregate across all LLM round-trips
39724064
assert agent_span.attributes.get(OtelAttr.INPUT_TOKENS) == 2239 + 2569
39734065
assert agent_span.attributes.get(OtelAttr.OUTPUT_TOKENS) == 192 + 99
4066+
assert agent_span.attributes.get(OtelAttr.CACHE_READ_INPUT_TOKENS) == 100 + 200
4067+
assert agent_span.attributes.get(OtelAttr.REASONING_OUTPUT_TOKENS) == 25
39744068

39754069

39764070
@pytest.mark.parametrize("enable_sensitive_data", [False], indirect=True)

python/packages/openai/agent_framework_openai/_chat_client.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2979,10 +2979,16 @@ def _parse_usage_from_openai(self, usage: ResponseUsage) -> UsageDetails | None:
29792979
output_token_count=usage.output_tokens,
29802980
total_token_count=usage.total_tokens,
29812981
)
2982-
if usage.input_tokens_details and usage.input_tokens_details.cached_tokens:
2983-
details["openai.cached_input_tokens"] = usage.input_tokens_details.cached_tokens # type: ignore[typeddict-unknown-key]
2984-
if usage.output_tokens_details and usage.output_tokens_details.reasoning_tokens:
2985-
details["openai.reasoning_tokens"] = usage.output_tokens_details.reasoning_tokens # type: ignore[typeddict-unknown-key]
2982+
if usage.input_tokens_details:
2983+
cached_tokens = cast("int | None", getattr(usage.input_tokens_details, "cached_tokens", None))
2984+
if cached_tokens is not None:
2985+
details["openai.cached_input_tokens"] = cached_tokens # type: ignore[typeddict-unknown-key]
2986+
details["cache_read_input_token_count"] = cached_tokens
2987+
if usage.output_tokens_details:
2988+
reasoning_tokens = cast("int | None", getattr(usage.output_tokens_details, "reasoning_tokens", None))
2989+
if reasoning_tokens is not None:
2990+
details["openai.reasoning_tokens"] = reasoning_tokens # type: ignore[typeddict-unknown-key]
2991+
details["reasoning_output_token_count"] = reasoning_tokens
29862992
return details
29872993

29882994
def _get_metadata_from_response(self, output: Any) -> dict[str, Any]:

python/packages/openai/agent_framework_openai/_chat_completion_client.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -765,15 +765,17 @@ def _parse_usage_from_openai(self, usage: CompletionUsage) -> UsageDetails:
765765
details["completion/accepted_prediction_tokens"] = tokens # type: ignore[typeddict-unknown-key]
766766
if tokens := usage.completion_tokens_details.audio_tokens:
767767
details["completion/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key]
768-
if tokens := usage.completion_tokens_details.reasoning_tokens:
768+
if (tokens := usage.completion_tokens_details.reasoning_tokens) is not None:
769769
details["completion/reasoning_tokens"] = tokens # type: ignore[typeddict-unknown-key]
770+
details["reasoning_output_token_count"] = tokens
770771
if tokens := usage.completion_tokens_details.rejected_prediction_tokens:
771772
details["completion/rejected_prediction_tokens"] = tokens # type: ignore[typeddict-unknown-key]
772773
if usage.prompt_tokens_details:
773774
if tokens := usage.prompt_tokens_details.audio_tokens:
774775
details["prompt/audio_tokens"] = tokens # type: ignore[typeddict-unknown-key]
775-
if tokens := usage.prompt_tokens_details.cached_tokens:
776+
if (tokens := usage.prompt_tokens_details.cached_tokens) is not None:
776777
details["prompt/cached_tokens"] = tokens # type: ignore[typeddict-unknown-key]
778+
details["cache_read_input_token_count"] = tokens
777779
return details
778780

779781
def _parse_text_from_openai(self, choice: Choice | ChunkChoice) -> Content | None:

python/packages/openai/tests/openai/test_openai_chat_client.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3301,6 +3301,7 @@ def test_usage_details_with_cached_tokens() -> None:
33013301
assert details is not None
33023302
assert details["input_token_count"] == 200
33033303
assert details["openai.cached_input_tokens"] == 25
3304+
assert details["cache_read_input_token_count"] == 25
33043305

33053306

33063307
def test_usage_details_with_reasoning_tokens() -> None:
@@ -3319,6 +3320,49 @@ def test_usage_details_with_reasoning_tokens() -> None:
33193320
assert details is not None
33203321
assert details["output_token_count"] == 80
33213322
assert details["openai.reasoning_tokens"] == 30
3323+
assert details["reasoning_output_token_count"] == 30
3324+
3325+
3326+
def test_usage_details_with_zero_cached_and_reasoning_tokens() -> None:
3327+
"""Test _parse_usage_from_openai preserves zero-valued mapped usage details."""
3328+
client = OpenAIChatClient(model="test-model", api_key="test-key")
3329+
3330+
mock_usage = MagicMock()
3331+
mock_usage.input_tokens = 150
3332+
mock_usage.output_tokens = 80
3333+
mock_usage.total_tokens = 230
3334+
mock_usage.input_tokens_details = MagicMock()
3335+
mock_usage.input_tokens_details.cached_tokens = 0
3336+
mock_usage.output_tokens_details = MagicMock()
3337+
mock_usage.output_tokens_details.reasoning_tokens = 0
3338+
3339+
details = client._parse_usage_from_openai(mock_usage) # type: ignore
3340+
assert details is not None
3341+
assert details["openai.cached_input_tokens"] == 0
3342+
assert details["cache_read_input_token_count"] == 0
3343+
assert details["openai.reasoning_tokens"] == 0
3344+
assert details["reasoning_output_token_count"] == 0
3345+
3346+
3347+
def test_usage_details_omits_missing_cached_and_reasoning_tokens() -> None:
3348+
"""Test _parse_usage_from_openai omits missing mapped usage details."""
3349+
client = OpenAIChatClient(model="test-model", api_key="test-key")
3350+
3351+
mock_usage = MagicMock()
3352+
mock_usage.input_tokens = 150
3353+
mock_usage.output_tokens = 80
3354+
mock_usage.total_tokens = 230
3355+
mock_usage.input_tokens_details = MagicMock()
3356+
mock_usage.input_tokens_details.cached_tokens = None
3357+
mock_usage.output_tokens_details = MagicMock()
3358+
mock_usage.output_tokens_details.reasoning_tokens = None
3359+
3360+
details = client._parse_usage_from_openai(mock_usage) # type: ignore
3361+
assert details is not None
3362+
assert "openai.cached_input_tokens" not in details
3363+
assert "cache_read_input_token_count" not in details
3364+
assert "openai.reasoning_tokens" not in details
3365+
assert "reasoning_output_token_count" not in details
33223366

33233367

33243368
def test_get_metadata_from_response() -> None:

0 commit comments

Comments
 (0)