Skip to content

Commit 6942aac

Browse files
google-genai-botcopybara-github
authored andcommitted
feat: add native OpenTelemetry agentic metrics
New metrics added: - gen_ai.agent.invocation.duration - gen_ai.tool.execution.duration - gen_ai.agent.request.size - gen_ai.agent.response.size - gen_ai.agent.workflow.steps New metrics and attributes are in line with OTel semantic conventions where possible, and will be formally to the semconv later. PiperOrigin-RevId: 904294814
1 parent 179380f commit 6942aac

9 files changed

Lines changed: 953 additions & 70 deletions

File tree

src/google/adk/agents/base_agent.py

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,7 @@
4242
from ..events.event_actions import EventActions
4343
from ..features import experimental
4444
from ..features import FeatureName
45-
from ..telemetry import tracing
46-
from ..telemetry.tracing import tracer
45+
from ..telemetry import _instrumentation
4746
from ..utils.context_utils import Aclosing
4847
from .base_agent_config import BaseAgentConfig
4948
from .callback_context import CallbackContext
@@ -285,9 +284,8 @@ async def run_async(
285284
Event: the events generated by the agent.
286285
"""
287286

288-
with tracer.start_as_current_span(f'invoke_agent {self.name}') as span:
289-
ctx = self._create_invocation_context(parent_context)
290-
tracing.trace_agent_invocation(span, self, ctx)
287+
ctx = self._create_invocation_context(parent_context)
288+
async with _instrumentation.record_agent_invocation(ctx, self):
291289
if event := await self._handle_before_agent_callback(ctx):
292290
yield event
293291
if ctx.end_invocation:
@@ -318,9 +316,8 @@ async def run_live(
318316
Event: the events generated by the agent.
319317
"""
320318

321-
with tracer.start_as_current_span(f'invoke_agent {self.name}') as span:
322-
ctx = self._create_invocation_context(parent_context)
323-
tracing.trace_agent_invocation(span, self, ctx)
319+
ctx = self._create_invocation_context(parent_context)
320+
async with _instrumentation.record_agent_invocation(ctx, self):
324321
if event := await self._handle_before_agent_callback(ctx):
325322
yield event
326323
if ctx.end_invocation:

src/google/adk/flows/llm_flows/functions.py

Lines changed: 11 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
from concurrent.futures import ThreadPoolExecutor
2323
import contextvars
2424
import copy
25-
import functools
2625
import inspect
2726
import logging
2827
import threading
@@ -43,8 +42,8 @@
4342
from ...auth.auth_tool import AuthToolArguments
4443
from ...events.event import Event
4544
from ...events.event_actions import EventActions
45+
from ...telemetry import _instrumentation
4646
from ...telemetry.tracing import trace_merged_tool_calls
47-
from ...telemetry.tracing import trace_tool_call
4847
from ...telemetry.tracing import tracer
4948
from ...tools.base_tool import BaseTool
5049
from ...tools.tool_confirmation import ToolConfirmation
@@ -591,22 +590,11 @@ async def _run_with_trace():
591590
)
592591
return function_response_event
593592

594-
with tracer.start_as_current_span(f'execute_tool {tool.name}'):
595-
function_response_event = None
596-
caught_error = None
597-
try:
598-
function_response_event = await _run_with_trace()
599-
return function_response_event
600-
except Exception as e:
601-
caught_error = e
602-
raise
603-
finally:
604-
trace_tool_call(
605-
tool=tool,
606-
args=function_args,
607-
function_response_event=function_response_event,
608-
error=caught_error,
609-
)
593+
async with _instrumentation.record_tool_execution(
594+
tool, agent, invocation_context, function_args
595+
) as tel_ctx:
596+
tel_ctx.function_response_event = await _run_with_trace()
597+
return tel_ctx.function_response_event
610598

611599

612600
async def handle_function_calls_live(
@@ -830,22 +818,11 @@ async def _run_with_trace():
830818
)
831819
return function_response_event
832820

833-
with tracer.start_as_current_span(f'execute_tool {tool.name}'):
834-
function_response_event = None
835-
caught_error = None
836-
try:
837-
function_response_event = await _run_with_trace()
838-
return function_response_event
839-
except Exception as e:
840-
caught_error = e
841-
raise
842-
finally:
843-
trace_tool_call(
844-
tool=tool,
845-
args=function_args,
846-
function_response_event=function_response_event,
847-
error=caught_error,
848-
)
821+
async with _instrumentation.record_tool_execution(
822+
tool, agent, invocation_context, function_args
823+
) as tel_ctx:
824+
tel_ctx.function_response_event = await _run_with_trace()
825+
return tel_ctx.function_response_event
849826

850827

851828
async def _process_function_live_helper(
Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
# Copyright 2026 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
from __future__ import annotations
16+
17+
import contextlib
18+
import dataclasses
19+
import logging
20+
import sys
21+
import time
22+
from typing import Any
23+
from typing import AsyncIterator
24+
from typing import TYPE_CHECKING
25+
26+
logger = logging.getLogger("google_adk." + __name__)
27+
28+
from opentelemetry import trace
29+
import opentelemetry.context as context_api
30+
31+
from . import _metrics
32+
from . import tracing
33+
from ..events import event as event_lib
34+
35+
if TYPE_CHECKING:
36+
from ..agents.base_agent import BaseAgent
37+
from ..agents.invocation_context import InvocationContext
38+
39+
40+
def _get_elapsed_ms(span: trace.Span | None, fallback_start: float) -> float:
41+
"""Guarantees consistent time source for duration calculation.
42+
43+
Note: This must be called with an ended span.
44+
45+
Args:
46+
span (trace.Span | None): The ended span to extract duration from.
47+
fallback_start (float): Fallback start time in seconds (monotonic).
48+
49+
Returns:
50+
float: Elapsed duration in milliseconds.
51+
"""
52+
if span is None:
53+
return (time.monotonic() - fallback_start) * 1000
54+
55+
start_ns = getattr(span, "start_time", None)
56+
end_ns = getattr(span, "end_time", None)
57+
58+
if isinstance(start_ns, int) and isinstance(end_ns, int):
59+
return (end_ns - start_ns) / 1e6 # Convert ns to ms
60+
61+
# Fallback if span times are missing
62+
return (time.monotonic() - fallback_start) * 1000
63+
64+
65+
@dataclasses.dataclass
66+
class TelemetryContext:
67+
"""Stores all telemetry related state."""
68+
69+
otel_context: context_api.Context
70+
function_response_event: event_lib.Event | None = None
71+
72+
73+
@contextlib.asynccontextmanager
74+
async def record_agent_invocation(
75+
ctx: InvocationContext, agent: BaseAgent
76+
) -> AsyncIterator[TelemetryContext]:
77+
"""Unified context manager for consolidated metrics and tracing."""
78+
start_time = time.monotonic()
79+
caught_error: Exception | None = None
80+
span: trace.Span | None = None
81+
span_name = f"invoke_agent {agent.name}"
82+
try:
83+
with tracing.tracer.start_as_current_span(span_name) as s:
84+
span = s
85+
tracing.trace_agent_invocation(span, agent, ctx)
86+
_metrics.record_agent_request_size(agent.name, ctx.user_content)
87+
tel_ctx = TelemetryContext(otel_context=context_api.get_current())
88+
yield tel_ctx
89+
except Exception as e:
90+
caught_error = e
91+
raise
92+
finally:
93+
elapsed_ms = _get_elapsed_ms(span, start_time)
94+
_metrics.record_agent_invocation_duration(
95+
agent.name,
96+
elapsed_ms,
97+
ctx.user_content,
98+
ctx.session.events,
99+
caught_error,
100+
)
101+
_metrics.record_agent_response_size(agent.name, ctx.session.events)
102+
_metrics.record_agent_workflow_steps(agent.name, len(ctx.session.events))
103+
104+
105+
@contextlib.asynccontextmanager
106+
async def record_tool_execution(
107+
tool: BaseTool,
108+
agent: BaseAgent,
109+
invocation_context: InvocationContext,
110+
function_args: dict[str, Any],
111+
) -> AsyncIterator[TelemetryContext]:
112+
"""Unified context manager for consolidated tool execution telemetry."""
113+
start_time = time.monotonic()
114+
caught_error: Exception | None = None
115+
span: trace.Span | None = None
116+
tel_ctx: TelemetryContext | None = None
117+
span_name = f"execute_tool {tool.name}"
118+
try:
119+
with tracing.tracer.start_as_current_span(span_name) as s:
120+
span = s
121+
tel_ctx = TelemetryContext(otel_context=context_api.get_current())
122+
try:
123+
yield tel_ctx
124+
except Exception as e:
125+
caught_error = e
126+
raise
127+
finally:
128+
response_event = (
129+
tel_ctx.function_response_event if caught_error is None else None
130+
)
131+
tracing.trace_tool_call(
132+
tool=tool,
133+
args=function_args,
134+
function_response_event=response_event,
135+
error=caught_error,
136+
)
137+
finally:
138+
elapsed_ms = _get_elapsed_ms(span, start_time)
139+
result_event = (
140+
tel_ctx.function_response_event if tel_ctx is not None else None
141+
)
142+
output_content = (
143+
result_event.content
144+
if isinstance(result_event, event_lib.Event)
145+
else None
146+
)
147+
_metrics.record_tool_execution_duration(
148+
tool_name=tool.name,
149+
agent_name=agent.name,
150+
elapsed_ms=elapsed_ms,
151+
input_content=invocation_context.user_content,
152+
output_content=output_content,
153+
error=caught_error,
154+
)

0 commit comments

Comments
 (0)