1515from __future__ import annotations
1616
1717import logging
18+ from typing import TYPE_CHECKING
1819
1920from google .adk import version
20- from google .adk .events . event import Event
21+ from google .adk .telemetry import tracing
2122from google .genai import types
2223from opentelemetry import metrics
2324from opentelemetry .semconv ._incubating .attributes import gen_ai_attributes
25+ from opentelemetry .semconv ._incubating .metrics import gen_ai_metrics
2426from opentelemetry .semconv .attributes import error_attributes
2527
28+ if TYPE_CHECKING :
29+ from google .adk .events .event import Event
30+ from google .adk .models .llm_request import LlmRequest
31+ from google .adk .models .llm_response import LlmResponse
32+
2633logger = logging .getLogger ("google_adk." + __name__ )
2734
28- # TODO(b/477553411): add these attributes to Otel semconv.
2935GEN_AI_AGENT_VERSION = "gen_ai.agent.version"
3036GEN_AI_TOOL_VERSION = "gen_ai.tool.version"
3137
32- # Initialize meter
3338meter = metrics .get_meter (
3439 name = "gcp.vertex.agent" ,
3540 version = version .__version__ ,
36- # TODO(b/477553411): set schema version after OTel semconv updates.
3741)
3842
39- # Define histograms
4043_agent_invocation_duration = meter .create_histogram (
4144 "gen_ai.agent.invocation.duration" ,
4245 unit = "ms" ,
6265 unit = "1" ,
6366 description = "Length of agentic workflow (# of events)." ,
6467)
68+ _client_operation_duration = (
69+ gen_ai_metrics .create_gen_ai_client_operation_duration (meter )
70+ )
71+ _client_token_usage = gen_ai_metrics .create_gen_ai_client_token_usage (meter )
6572
6673
6774def record_agent_invocation_duration (
@@ -121,6 +128,90 @@ def record_tool_execution_duration(
121128 _tool_execution_duration .record (elapsed_ms , attributes = attrs )
122129
123130
131+ def record_client_operation_duration (
132+ agent_name : str ,
133+ elapsed_ms : float ,
134+ llm_request : LlmRequest ,
135+ responses : list [LlmResponse ],
136+ error : Exception | None = None ,
137+ ):
138+ """Encapsulates the business logic for tracking gen_ai client operation duration."""
139+
140+ attrs = {
141+ gen_ai_attributes .GEN_AI_AGENT_NAME : agent_name ,
142+ gen_ai_attributes .GEN_AI_OPERATION_NAME : "generate_content" ,
143+ gen_ai_attributes .GEN_AI_PROVIDER_NAME : _get_provider_name (),
144+ }
145+ if llm_request .model :
146+ attrs [gen_ai_attributes .GEN_AI_REQUEST_MODEL ] = llm_request .model
147+
148+ if responses :
149+ response_model = responses [- 1 ].model_version or llm_request .model
150+ if response_model :
151+ attrs [gen_ai_attributes .GEN_AI_RESPONSE_MODEL ] = response_model
152+
153+ if error is not None :
154+ attrs [error_attributes .ERROR_TYPE ] = type (error ).__name__
155+
156+ _client_operation_duration .record (elapsed_ms / 1000.0 , attributes = attrs )
157+
158+
159+ def record_client_token_usage (
160+ agent_name : str ,
161+ llm_request : LlmRequest ,
162+ responses : list [LlmResponse ],
163+ ):
164+ """Encapsulates the business logic for tracking gen_ai client token usage."""
165+ if not responses :
166+ return
167+
168+ # The assumption is that token usage in streaming responses is cumulative.
169+ # The last response chunk contains the total usage for the entire request.
170+ # Summing them up across all response chunks would result in overcounting.
171+ last_response = responses [- 1 ]
172+ if not last_response .usage_metadata :
173+ logger .warning (
174+ "Skipping missing token usage metadata for agent %s and model %s" ,
175+ agent_name ,
176+ llm_request .model ,
177+ )
178+ return
179+
180+ # OTel semconv for `gen_ai.client.token.usage` states that token counts should
181+ # be categorized under `gen_ai.token.type` as either "input" or "output".
182+ # We aggregate prompt and tool use tokens for "input", and candidates and
183+ # thoughts tokens for "output".
184+ # `cached_content_token_count` is omitted as it's already included in prompt tokens.
185+ # `total_token_count` is omitted as SemConv expects input/output breakdown.
186+ usage = last_response .usage_metadata
187+ input_token_count = (usage .prompt_token_count or 0 ) + (
188+ usage .tool_use_prompt_token_count or 0
189+ )
190+ output_token_count = (usage .candidates_token_count or 0 ) + (
191+ usage .thoughts_token_count or 0
192+ )
193+ response_model = last_response .model_version or llm_request .model
194+ base_attrs = {
195+ gen_ai_attributes .GEN_AI_AGENT_NAME : agent_name ,
196+ gen_ai_attributes .GEN_AI_OPERATION_NAME : "generate_content" ,
197+ gen_ai_attributes .GEN_AI_PROVIDER_NAME : _get_provider_name (),
198+ }
199+ if llm_request .model :
200+ base_attrs [gen_ai_attributes .GEN_AI_REQUEST_MODEL ] = llm_request .model
201+ if response_model :
202+ base_attrs [gen_ai_attributes .GEN_AI_RESPONSE_MODEL ] = response_model
203+
204+ if input_token_count > 0 :
205+ input_attrs = base_attrs .copy ()
206+ input_attrs [gen_ai_attributes .GEN_AI_TOKEN_TYPE ] = "input"
207+ _client_token_usage .record (input_token_count , attributes = input_attrs )
208+
209+ if output_token_count > 0 :
210+ output_attrs = base_attrs .copy ()
211+ output_attrs [gen_ai_attributes .GEN_AI_TOKEN_TYPE ] = "output"
212+ _client_token_usage .record (output_token_count , attributes = output_attrs )
213+
214+
124215def _get_content_size (
125216 content : types .Content | None ,
126217) -> int :
@@ -133,3 +224,7 @@ def _get_content_size(
133224 if part .inline_data and part .inline_data .data :
134225 size += len (part .inline_data .data )
135226 return size
227+
228+
229+ def _get_provider_name () -> str :
230+ return tracing ._guess_gemini_system_name ()
0 commit comments