fix(telemetry): use per-invocation usage in agent span attributes (#2017)

en-yao · web-flow · commit d27b8ff79d76 · 2026-04-09T20:31:10.000Z
diff --git a/src/strands/telemetry/tracer.py b/src/strands/telemetry/tracer.py
@@ -83,8 +83,8 @@ class Tracer:
     When the OTEL_EXPORTER_OTLP_ENDPOINT environment variable is set, traces
     are sent to the OTLP endpoint.
 
-    Both attributes are controlled by including "gen_ai_latest_experimental" or "gen_ai_tool_definitions",
-    respectively, in the OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
+    Both attributes are controlled by including "gen_ai_latest_experimental", "gen_ai_tool_definitions",
+    or "gen_ai_use_latest_invocation_tokens", respectively, in the OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
     """
 
     def __init__(self) -> None:
@@ -100,6 +100,7 @@ def __init__(self) -> None:
         ## To-do: should not set below attributes directly, use env var instead
         self.use_latest_genai_conventions = "gen_ai_latest_experimental" in opt_in_values
         self._include_tool_definitions = "gen_ai_tool_definitions" in opt_in_values
+        self._use_latest_invocation_tokens = "gen_ai_use_latest_invocation_tokens" in opt_in_values
 
     def _parse_semconv_opt_in(self) -> set[str]:
         """Parse the OTEL_SEMCONV_STABILITY_OPT_IN environment variable.
@@ -690,16 +691,26 @@ def end_agent_span(
             if hasattr(response, "metrics") and hasattr(response.metrics, "accumulated_usage"):
                 if self.is_langfuse:
                     attributes.update({"langfuse.observation.type": "span"})
-                accumulated_usage = response.metrics.accumulated_usage
+                if self._use_latest_invocation_tokens:
+                    latest_invocation = response.metrics.latest_agent_invocation
+                    if latest_invocation is None:
+                        logger.warning(
+                            "latest_agent_invocation is None despite _use_latest_invocation_tokens being set"
+                        )
+                        usage: Usage = Usage(inputTokens=0, outputTokens=0, totalTokens=0)
+                    else:
+                        usage = latest_invocation.usage
+                else:
+                    usage = response.metrics.accumulated_usage
                 attributes.update(
                     {
-                        "gen_ai.usage.prompt_tokens": accumulated_usage["inputTokens"],
-                        "gen_ai.usage.completion_tokens": accumulated_usage["outputTokens"],
-                        "gen_ai.usage.input_tokens": accumulated_usage["inputTokens"],
-                        "gen_ai.usage.output_tokens": accumulated_usage["outputTokens"],
-                        "gen_ai.usage.total_tokens": accumulated_usage["totalTokens"],
-                        "gen_ai.usage.cache_read_input_tokens": accumulated_usage.get("cacheReadInputTokens", 0),
-                        "gen_ai.usage.cache_write_input_tokens": accumulated_usage.get("cacheWriteInputTokens", 0),
+                        "gen_ai.usage.prompt_tokens": usage["inputTokens"],
+                        "gen_ai.usage.completion_tokens": usage["outputTokens"],
+                        "gen_ai.usage.input_tokens": usage["inputTokens"],
+                        "gen_ai.usage.output_tokens": usage["outputTokens"],
+                        "gen_ai.usage.total_tokens": usage["totalTokens"],
+                        "gen_ai.usage.cache_read_input_tokens": usage.get("cacheReadInputTokens", 0),
+                        "gen_ai.usage.cache_write_input_tokens": usage.get("cacheWriteInputTokens", 0),
                     }
                 )
 
diff --git a/tests/strands/telemetry/test_tracer.py b/tests/strands/telemetry/test_tracer.py
@@ -1,4 +1,5 @@
 import json
+import logging
 import os
 from datetime import date, datetime, timezone
 from unittest import mock
@@ -1053,6 +1054,57 @@ def test_end_agent_span_latest_conventions(mock_span, monkeypatch):
     mock_span.end.assert_called_once()
 
 
+def test_end_agent_span_uses_per_invocation_usage_when_opted_in(mock_span, monkeypatch):
+    """Test that agent span reports per-invocation usage when gen_ai_use_latest_invocation_tokens is set."""
+    monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_use_latest_invocation_tokens")
+    tracer = Tracer()
+
+    mock_invocation = mock.MagicMock()
+    mock_invocation.usage = {"inputTokens": 100, "outputTokens": 50, "totalTokens": 150}
+
+    mock_metrics = mock.MagicMock()
+    mock_metrics.accumulated_usage = {"inputTokens": 1000, "outputTokens": 500, "totalTokens": 1500}
+    mock_metrics.latest_agent_invocation = mock_invocation
+
+    mock_response = mock.MagicMock()
+    mock_response.metrics = mock_metrics
+    mock_response.stop_reason = "end_turn"
+    mock_response.__str__ = mock.MagicMock(return_value="Agent response")
+
+    tracer.end_agent_span(mock_span, mock_response)
+
+    call_args = mock_span.set_attributes.call_args[0][0]
+    assert call_args["gen_ai.usage.input_tokens"] == 100
+    assert call_args["gen_ai.usage.output_tokens"] == 50
+    assert call_args["gen_ai.usage.total_tokens"] == 150
+    assert call_args["gen_ai.usage.prompt_tokens"] == 100
+    assert call_args["gen_ai.usage.completion_tokens"] == 50
+
+
+def test_end_agent_span_warns_when_opted_in_but_no_invocations(mock_span, monkeypatch, caplog):
+    """Test warning and zero usage when opted in but no agent invocations exist."""
+    monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_use_latest_invocation_tokens")
+    tracer = Tracer()
+
+    mock_metrics = mock.MagicMock()
+    mock_metrics.accumulated_usage = {"inputTokens": 200, "outputTokens": 100, "totalTokens": 300}
+    mock_metrics.latest_agent_invocation = None
+
+    mock_response = mock.MagicMock()
+    mock_response.metrics = mock_metrics
+    mock_response.stop_reason = "end_turn"
+    mock_response.__str__ = mock.MagicMock(return_value="Agent response")
+
+    with caplog.at_level(logging.WARNING):
+        tracer.end_agent_span(mock_span, mock_response)
+
+    assert "latest_agent_invocation is None" in caplog.text
+    call_args = mock_span.set_attributes.call_args[0][0]
+    assert call_args["gen_ai.usage.input_tokens"] == 0
+    assert call_args["gen_ai.usage.output_tokens"] == 0
+    assert call_args["gen_ai.usage.total_tokens"] == 0
+
+
 def test_end_model_invoke_span_with_cache_metrics(mock_span):
     """Test ending a model invoke span with cache metrics."""
     tracer = Tracer()