Skip to content

Commit 0bb329b

Browse files
Marcin VinietowiczGWeale
authored andcommitted
feat: emit OTel gen_ai.client.* metrics natively
gen_ai.client.operation.duration and gen_ai.client.token.usage already exist in OTel semconv (https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/), and are currently emitted by https://github.com/open-telemetry/opentelemetry-python-contrib/tree/main/instrumentation-genai/opentelemetry-instrumentation-google-genai, if configured. However, they have no agentic attributes. This change builds on top of the recently introduced metric instrumentation and emits those two metrics a new attribute: gen_ai.agent.name. In the future, gen_ai.agent.version will also be set. Semconv updates will be handled separately. When the google-genai instrumentation lib is active for a Gemini agent, we don't re-emit the gen_ai.client.* metrics to avoid double-counting. Change-Id: Ie610d25ca44671d3a31d4307ebf068a1c9deeaab
1 parent 460cb8c commit 0bb329b

7 files changed

Lines changed: 382 additions & 42 deletions

File tree

src/google/adk/flows/llm_flows/base_llm_flow.py

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
from ...models.google_llm import GoogleLLMVariant
4444
from ...models.llm_request import LlmRequest
4545
from ...models.llm_response import LlmResponse
46+
from ...telemetry import _instrumentation
4647
from ...telemetry import tracing
4748
from ...telemetry.tracing import trace_call_llm
4849
from ...telemetry.tracing import trace_send_data
@@ -376,18 +377,14 @@ async def _run_on_model_error_callbacks(
376377
return None
377378

378379
try:
379-
async with Aclosing(response_generator) as agen:
380-
async with tracing.use_inference_span(
381-
llm_request,
382-
invocation_context,
383-
model_response_event,
384-
) as gc_span:
380+
async with _instrumentation.record_inference_telemetry(
381+
llm_request,
382+
invocation_context,
383+
model_response_event,
384+
) as tel_ctx:
385+
async with Aclosing(response_generator) as agen:
385386
async for llm_response in agen:
386-
if gc_span:
387-
tracing.trace_inference_result(
388-
gc_span,
389-
llm_response,
390-
)
387+
tel_ctx.record_llm_response(llm_response)
391388
yield llm_response
392389
except Exception as model_error:
393390
callback_context = CallbackContext(

src/google/adk/telemetry/_instrumentation.py

Lines changed: 67 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import contextlib
1818
import dataclasses
1919
import logging
20+
import sys
2021
import time
2122
from typing import Any
2223
from typing import AsyncIterator
@@ -32,18 +33,24 @@
3233
if TYPE_CHECKING:
3334
from ..agents.base_agent import BaseAgent
3435
from ..agents.invocation_context import InvocationContext
36+
from ..models.llm_request import LlmRequest
37+
from ..models.llm_response import LlmResponse
3538
from ..tools.base_tool import BaseTool
3639

3740
logger = logging.getLogger("google_adk." + __name__)
3841

3942

40-
def _get_elapsed_ms(span: trace.Span | None, fallback_start: float) -> float:
43+
def _get_elapsed_ms(
44+
span: trace.Span | tracing.GenerateContentSpan | None,
45+
fallback_start: float,
46+
) -> float:
4147
"""Guarantees consistent time source for duration calculation.
4248
4349
Note: This must be called with an ended span.
4450
4551
Args:
46-
span (trace.Span | None): The ended span to extract duration from.
52+
span (trace.Span | tracing.GenerateContentSpan | None): The ended span to
53+
extract duration from.
4754
fallback_start (float): Fallback start time in seconds (monotonic).
4855
4956
Returns:
@@ -52,6 +59,7 @@ def _get_elapsed_ms(span: trace.Span | None, fallback_start: float) -> float:
5259
if span is None:
5360
return (time.monotonic() - fallback_start) * 1000
5461

62+
span = span.span if hasattr(span, "span") else span
5563
start_ns = getattr(span, "start_time", None)
5664
end_ns = getattr(span, "end_time", None)
5765

@@ -66,9 +74,19 @@ def _get_elapsed_ms(span: trace.Span | None, fallback_start: float) -> float:
6674
class TelemetryContext:
6775
"""Stores all telemetry related state."""
6876

69-
otel_context: context_api.Context
77+
otel_context: context_api.Context | None = None
7078
function_response_event: event_lib.Event | None = None
7179
error_type: str | None = None
80+
span: tracing.GenerateContentSpan | trace.Span | None = None
81+
_llm_responses: list[LlmResponse] = dataclasses.field(default_factory=list)
82+
83+
@property
84+
def llm_responses(self) -> list[LlmResponse]:
85+
return self._llm_responses
86+
87+
def record_llm_response(self, response: LlmResponse) -> None:
88+
self._llm_responses.append(response)
89+
tracing.trace_inference_result(self.span, response)
7290

7391

7492
def _record_agent_metrics(
@@ -163,3 +181,49 @@ async def record_tool_execution(
163181
logger.exception(
164182
"Failed to record tool execution duration for tool %s", tool.name
165183
)
184+
185+
186+
@contextlib.asynccontextmanager
187+
async def record_inference_telemetry(
188+
llm_request: LlmRequest,
189+
invocation_context: InvocationContext,
190+
model_response_event: event_lib.Event,
191+
) -> AsyncIterator[TelemetryContext]:
192+
"""Unified async context manager for consolidated inference metrics."""
193+
start_time = time.monotonic()
194+
tel_ctx: TelemetryContext = TelemetryContext()
195+
try:
196+
async with tracing.use_inference_span(
197+
llm_request,
198+
invocation_context,
199+
model_response_event,
200+
) as gc_span:
201+
tel_ctx.span = gc_span
202+
yield tel_ctx
203+
finally:
204+
inference_error = sys.exc_info()[1]
205+
elapsed_ms = _get_elapsed_ms(tel_ctx.span, start_time)
206+
agent = invocation_context.agent
207+
try:
208+
if agent is not None and tracing._should_emit_native_telemetry(agent):
209+
_metrics.record_client_operation_duration(
210+
agent_name=agent.name,
211+
elapsed_ms=elapsed_ms,
212+
llm_request=llm_request,
213+
responses=tel_ctx.llm_responses,
214+
error=(
215+
inference_error
216+
if isinstance(inference_error, Exception)
217+
else None
218+
),
219+
)
220+
_metrics.record_client_token_usage(
221+
agent_name=agent.name,
222+
llm_request=llm_request,
223+
responses=tel_ctx.llm_responses,
224+
)
225+
except Exception: # pylint: disable=broad-exception-caught
226+
logger.exception(
227+
"Failed to record inference metrics for agent %s",
228+
agent.name if agent is not None else "<unknown>",
229+
)

src/google/adk/telemetry/_metrics.py

Lines changed: 100 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,28 +15,31 @@
1515
from __future__ import annotations
1616

1717
import logging
18+
from typing import TYPE_CHECKING
1819

1920
from google.adk import version
20-
from google.adk.events.event import Event
21+
from google.adk.telemetry import tracing
2122
from google.genai import types
2223
from opentelemetry import metrics
2324
from opentelemetry.semconv._incubating.attributes import gen_ai_attributes
25+
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
2426
from opentelemetry.semconv.attributes import error_attributes
2527

28+
if TYPE_CHECKING:
29+
from google.adk.events.event import Event
30+
from google.adk.models.llm_request import LlmRequest
31+
from google.adk.models.llm_response import LlmResponse
32+
2633
logger = logging.getLogger("google_adk." + __name__)
2734

28-
# TODO(b/477553411): add these attributes to Otel semconv.
2935
GEN_AI_AGENT_VERSION = "gen_ai.agent.version"
3036
GEN_AI_TOOL_VERSION = "gen_ai.tool.version"
3137

32-
# Initialize meter
3338
meter = metrics.get_meter(
3439
name="gcp.vertex.agent",
3540
version=version.__version__,
36-
# TODO(b/477553411): set schema version after OTel semconv updates.
3741
)
3842

39-
# Define histograms
4043
_agent_invocation_duration = meter.create_histogram(
4144
"gen_ai.agent.invocation.duration",
4245
unit="ms",
@@ -62,6 +65,10 @@
6265
unit="1",
6366
description="Length of agentic workflow (# of events).",
6467
)
68+
_client_operation_duration = (
69+
gen_ai_metrics.create_gen_ai_client_operation_duration(meter)
70+
)
71+
_client_token_usage = gen_ai_metrics.create_gen_ai_client_token_usage(meter)
6572

6673

6774
def record_agent_invocation_duration(
@@ -121,6 +128,90 @@ def record_tool_execution_duration(
121128
_tool_execution_duration.record(elapsed_ms, attributes=attrs)
122129

123130

131+
def record_client_operation_duration(
132+
agent_name: str,
133+
elapsed_ms: float,
134+
llm_request: LlmRequest,
135+
responses: list[LlmResponse],
136+
error: Exception | None = None,
137+
):
138+
"""Encapsulates the business logic for tracking gen_ai client operation duration."""
139+
140+
attrs = {
141+
gen_ai_attributes.GEN_AI_AGENT_NAME: agent_name,
142+
gen_ai_attributes.GEN_AI_OPERATION_NAME: "generate_content",
143+
gen_ai_attributes.GEN_AI_PROVIDER_NAME: _get_provider_name(),
144+
}
145+
if llm_request.model:
146+
attrs[gen_ai_attributes.GEN_AI_REQUEST_MODEL] = llm_request.model
147+
148+
if responses:
149+
response_model = responses[-1].model_version or llm_request.model
150+
if response_model:
151+
attrs[gen_ai_attributes.GEN_AI_RESPONSE_MODEL] = response_model
152+
153+
if error is not None:
154+
attrs[error_attributes.ERROR_TYPE] = type(error).__name__
155+
156+
_client_operation_duration.record(elapsed_ms / 1000.0, attributes=attrs)
157+
158+
159+
def record_client_token_usage(
160+
agent_name: str,
161+
llm_request: LlmRequest,
162+
responses: list[LlmResponse],
163+
):
164+
"""Encapsulates the business logic for tracking gen_ai client token usage."""
165+
if not responses:
166+
return
167+
168+
# The assumption is that token usage in streaming responses is cumulative.
169+
# The last response chunk contains the total usage for the entire request.
170+
# Summing them up across all response chunks would result in overcounting.
171+
last_response = responses[-1]
172+
if not last_response.usage_metadata:
173+
logger.warning(
174+
"Skipping missing token usage metadata for agent %s and model %s",
175+
agent_name,
176+
llm_request.model,
177+
)
178+
return
179+
180+
# OTel semconv for `gen_ai.client.token.usage` states that token counts should
181+
# be categorized under `gen_ai.token.type` as either "input" or "output".
182+
# We aggregate prompt and tool use tokens for "input", and candidates and
183+
# thoughts tokens for "output".
184+
# `cached_content_token_count` is omitted as it's already included in prompt tokens.
185+
# `total_token_count` is omitted as SemConv expects input/output breakdown.
186+
usage = last_response.usage_metadata
187+
input_token_count = (usage.prompt_token_count or 0) + (
188+
usage.tool_use_prompt_token_count or 0
189+
)
190+
output_token_count = (usage.candidates_token_count or 0) + (
191+
usage.thoughts_token_count or 0
192+
)
193+
response_model = last_response.model_version or llm_request.model
194+
base_attrs = {
195+
gen_ai_attributes.GEN_AI_AGENT_NAME: agent_name,
196+
gen_ai_attributes.GEN_AI_OPERATION_NAME: "generate_content",
197+
gen_ai_attributes.GEN_AI_PROVIDER_NAME: _get_provider_name(),
198+
}
199+
if llm_request.model:
200+
base_attrs[gen_ai_attributes.GEN_AI_REQUEST_MODEL] = llm_request.model
201+
if response_model:
202+
base_attrs[gen_ai_attributes.GEN_AI_RESPONSE_MODEL] = response_model
203+
204+
if input_token_count > 0:
205+
input_attrs = base_attrs.copy()
206+
input_attrs[gen_ai_attributes.GEN_AI_TOKEN_TYPE] = "input"
207+
_client_token_usage.record(input_token_count, attributes=input_attrs)
208+
209+
if output_token_count > 0:
210+
output_attrs = base_attrs.copy()
211+
output_attrs[gen_ai_attributes.GEN_AI_TOKEN_TYPE] = "output"
212+
_client_token_usage.record(output_token_count, attributes=output_attrs)
213+
214+
124215
def _get_content_size(
125216
content: types.Content | None,
126217
) -> int:
@@ -133,3 +224,7 @@ def _get_content_size(
133224
if part.inline_data and part.inline_data.data:
134225
size += len(part.inline_data.data)
135226
return size
227+
228+
229+
def _get_provider_name() -> str:
230+
return tracing._guess_gemini_system_name()

src/google/adk/telemetry/tracing.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -570,22 +570,19 @@ def use_generate_content_span(
570570
log_only_common_attributes = {}
571571
if invocation_context.session.user_id is not None:
572572
log_only_common_attributes[USER_ID] = invocation_context.session.user_id
573-
if (
574-
_is_gemini_agent(invocation_context.agent)
575-
and _instrumented_with_opentelemetry_instrumentation_google_genai()
576-
):
577-
with _use_extra_generate_content_attributes(
578-
common_attributes,
579-
log_only_extra_attributes=log_only_common_attributes,
580-
):
581-
yield
582-
else:
573+
if _should_emit_native_telemetry(invocation_context.agent):
583574
with _use_native_generate_content_span_stable_semconv(
584575
llm_request=llm_request,
585576
common_attributes=common_attributes,
586577
log_only_common_attributes=log_only_common_attributes,
587578
) as span:
588579
yield span.span
580+
else:
581+
with _use_extra_generate_content_attributes(
582+
common_attributes,
583+
log_only_extra_attributes=log_only_common_attributes,
584+
):
585+
yield
589586

590587

591588
@asynccontextmanager
@@ -610,16 +607,7 @@ async def use_inference_span(
610607
log_only_common_attributes = {}
611608
if invocation_context.session.user_id is not None:
612609
log_only_common_attributes[USER_ID] = invocation_context.session.user_id
613-
if (
614-
_is_gemini_agent(invocation_context.agent)
615-
and _instrumented_with_opentelemetry_instrumentation_google_genai()
616-
):
617-
with _use_extra_generate_content_attributes(
618-
common_attributes,
619-
log_only_extra_attributes=log_only_common_attributes,
620-
):
621-
yield
622-
else:
610+
if _should_emit_native_telemetry(invocation_context.agent):
623611
async with _use_native_generate_content_span(
624612
llm_request=llm_request,
625613
common_attributes=common_attributes,
@@ -640,6 +628,12 @@ async def use_inference_span(
640628
gc_span.operation_details_attributes,
641629
gc_span.operation_details_common_attributes,
642630
)
631+
else:
632+
with _use_extra_generate_content_attributes(
633+
common_attributes,
634+
log_only_extra_attributes=log_only_common_attributes,
635+
):
636+
yield
643637

644638

645639
def _should_log_prompt_response_content() -> bool:
@@ -683,6 +677,17 @@ def _instrumented_with_opentelemetry_instrumentation_google_genai() -> bool:
683677
return False
684678

685679

680+
def _should_emit_native_telemetry(agent: BaseAgent) -> bool:
681+
"""If the google-genai instrumentation lib is active AND this is a Gemini agent, then the lib already emits inference metrics."""
682+
if (
683+
_instrumented_with_opentelemetry_instrumentation_google_genai()
684+
and _is_gemini_agent(agent)
685+
):
686+
return False
687+
688+
return True
689+
690+
686691
@contextmanager
687692
def _use_extra_generate_content_attributes(
688693
extra_attributes: Mapping[str, AttributeValue],

0 commit comments

Comments
 (0)