6060
6161from __future__ import annotations
6262
63+ import logging
6364import timeit
6465from contextlib import contextmanager
65- from typing import Iterator
66+ from typing import Callable , Iterator , TypeVar
6667
6768from opentelemetry import context as otel_context
6869from opentelemetry ._logs import (
8081)
8182from opentelemetry .util .genai .metrics import InvocationMetricsRecorder
8283from opentelemetry .util .genai .span_utils import (
84+ _apply_agent_finish_attributes ,
85+ _apply_creation_finish_attributes ,
8386 _apply_error_attributes ,
8487 _apply_llm_finish_attributes ,
88+ _maybe_emit_agent_event ,
8589 _maybe_emit_llm_event ,
8690)
87- from opentelemetry .util .genai .types import Error , LLMInvocation
91+ from opentelemetry .util .genai .types import (
92+ AgentCreation ,
93+ AgentInvocation ,
94+ Error ,
95+ GenAIInvocation ,
96+ LLMInvocation ,
97+ )
8898from opentelemetry .util .genai .version import __version__
8999
100+ _logger = logging .getLogger (__name__ )
101+
102+ _T = TypeVar ("_T" , bound = GenAIInvocation )
103+
104+
105+ @contextmanager
106+ def _lifecycle_context (
107+ invocation : _T ,
108+ start : Callable [[_T ], _T ],
109+ stop : Callable [[_T ], _T ],
110+ fail : Callable [[_T , Error ], _T ],
111+ label : str ,
112+ ) -> Iterator [_T ]:
113+ """Shared lifecycle context manager for GenAI invocations.
114+
115+ Wraps start/stop/fail calls with error handling so SDK-internal
116+ errors never propagate to the caller.
117+ """
118+ try :
119+ start (invocation )
120+ except Exception :
121+ _logger .warning ("Failed to start %s span" , label , exc_info = True )
122+ try :
123+ yield invocation
124+ except Exception as exc :
125+ try :
126+ fail (invocation , Error (message = str (exc ), type = type (exc )))
127+ except Exception :
128+ _logger .warning (
129+ "Failed to record %s failure" , label , exc_info = True
130+ )
131+ raise
132+ else :
133+ try :
134+ stop (invocation )
135+ except Exception :
136+ _logger .warning ("Failed to stop %s span" , label , exc_info = True )
137+
90138
91139class TelemetryHandler :
92140 """
@@ -156,13 +204,13 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab
156204 # TODO: Provide feedback that this invocation was not started
157205 return invocation
158206
159- span = invocation . span
160- _apply_llm_finish_attributes (span , invocation )
161- self ._record_llm_metrics (invocation , span )
162- _maybe_emit_llm_event (self ._logger , span , invocation )
163- # Detach context and end span
164- otel_context .detach (invocation .context_token )
165- span .end ()
207+ try :
208+ _apply_llm_finish_attributes (invocation . span , invocation )
209+ self ._record_llm_metrics (invocation , invocation . span )
210+ _maybe_emit_llm_event (self ._logger , invocation . span , invocation )
211+ finally :
212+ otel_context .detach (invocation .context_token )
213+ invocation . span .end ()
166214 return invocation
167215
168216 def fail_llm ( # pylint: disable=no-self-use
@@ -173,15 +221,19 @@ def fail_llm( # pylint: disable=no-self-use
173221 # TODO: Provide feedback that this invocation was not started
174222 return invocation
175223
176- span = invocation .span
177- _apply_llm_finish_attributes (invocation .span , invocation )
178- _apply_error_attributes (invocation .span , error )
179- error_type = getattr (error .type , "__qualname__" , None )
180- self ._record_llm_metrics (invocation , span , error_type = error_type )
181- _maybe_emit_llm_event (self ._logger , span , invocation , error )
182- # Detach context and end span
183- otel_context .detach (invocation .context_token )
184- span .end ()
224+ try :
225+ _apply_llm_finish_attributes (invocation .span , invocation )
226+ _apply_error_attributes (invocation .span , error )
227+ error_type = getattr (error .type , "__qualname__" , None )
228+ self ._record_llm_metrics (
229+ invocation , invocation .span , error_type = error_type
230+ )
231+ _maybe_emit_llm_event (
232+ self ._logger , invocation .span , invocation , error
233+ )
234+ finally :
235+ otel_context .detach (invocation .context_token )
236+ invocation .span .end ()
185237 return invocation
186238
187239 @contextmanager
@@ -200,13 +252,173 @@ def llm(
200252 invocation = LLMInvocation (
201253 request_model = "" ,
202254 )
203- self .start_llm (invocation )
255+ with _lifecycle_context (
256+ invocation , self .start_llm , self .stop_llm , self .fail_llm , "llm"
257+ ) as inv :
258+ yield inv
259+
260+ # ---- Agent invocation lifecycle ----
261+
262+ def start_agent (
263+ self ,
264+ invocation : AgentInvocation ,
265+ ) -> AgentInvocation :
266+ """Start an agent invocation and create a pending span entry."""
267+ span_name = (
268+ f"{ invocation .operation_name } { invocation .agent_name } " .strip ()
269+ )
270+ kind = SpanKind .CLIENT if invocation .is_remote else SpanKind .INTERNAL
271+ span = self ._tracer .start_span (
272+ name = span_name ,
273+ kind = kind ,
274+ )
275+ invocation .monotonic_start_s = timeit .default_timer ()
276+ invocation .span = span
277+ invocation .context_token = otel_context .attach (
278+ set_span_in_context (span )
279+ )
280+ return invocation
281+
282+ def _record_agent_metrics (
283+ self ,
284+ invocation : AgentInvocation ,
285+ span : Span | None = None ,
286+ * ,
287+ error_type : str | None = None ,
288+ ) -> None :
289+ if self ._metrics_recorder is None or span is None :
290+ return
291+ self ._metrics_recorder .record_agent (
292+ span ,
293+ invocation ,
294+ error_type = error_type ,
295+ )
296+
297+ def stop_agent (self , invocation : AgentInvocation ) -> AgentInvocation :
298+ """Finalize an agent invocation successfully and end its span."""
299+ if invocation .context_token is None or invocation .span is None :
300+ return invocation
301+
302+ try :
303+ _apply_agent_finish_attributes (invocation .span , invocation )
304+ self ._record_agent_metrics (invocation , invocation .span )
305+ _maybe_emit_agent_event (self ._logger , invocation .span , invocation )
306+ finally :
307+ otel_context .detach (invocation .context_token )
308+ invocation .span .end ()
309+ return invocation
310+
311+ def fail_agent (
312+ self , invocation : AgentInvocation , error : Error
313+ ) -> AgentInvocation :
314+ """Fail an agent invocation and end its span with error status."""
315+ if invocation .context_token is None or invocation .span is None :
316+ return invocation
317+
318+ try :
319+ _apply_agent_finish_attributes (invocation .span , invocation )
320+ _apply_error_attributes (invocation .span , error )
321+ error_type = getattr (error .type , "__qualname__" , None )
322+ self ._record_agent_metrics (
323+ invocation , invocation .span , error_type = error_type
324+ )
325+ _maybe_emit_agent_event (
326+ self ._logger , invocation .span , invocation , error
327+ )
328+ finally :
329+ otel_context .detach (invocation .context_token )
330+ invocation .span .end ()
331+ return invocation
332+
333+ @contextmanager
334+ def agent (
335+ self , invocation : AgentInvocation | None = None
336+ ) -> Iterator [AgentInvocation ]:
337+ """Context manager for agent invocations.
338+
339+ Only set data attributes on the invocation object, do not modify the span or context.
340+
341+ Starts the span on entry. On normal exit, finalizes the invocation and ends the span.
342+ If an exception occurs inside the context, marks the span as error, ends it, and
343+ re-raises the original exception.
344+ """
345+ if invocation is None :
346+ invocation = AgentInvocation ()
347+ with _lifecycle_context (
348+ invocation ,
349+ self .start_agent ,
350+ self .stop_agent ,
351+ self .fail_agent ,
352+ "agent" ,
353+ ) as inv :
354+ yield inv
355+
356+ # ---- Agent creation lifecycle ----
357+
358+ def start_create_agent (
359+ self ,
360+ creation : AgentCreation ,
361+ ) -> AgentCreation :
362+ """Start an agent creation and create a pending span entry."""
363+ span_name = f"{ creation .operation_name } { creation .agent_name } " .strip ()
364+ span = self ._tracer .start_span (
365+ name = span_name ,
366+ kind = SpanKind .CLIENT ,
367+ )
368+ creation .monotonic_start_s = timeit .default_timer ()
369+ creation .span = span
370+ creation .context_token = otel_context .attach (set_span_in_context (span ))
371+ return creation
372+
373+ def stop_create_agent (self , creation : AgentCreation ) -> AgentCreation : # pylint: disable=no-self-use
374+ """Finalize an agent creation successfully and end its span."""
375+ if creation .context_token is None or creation .span is None :
376+ return creation
377+
204378 try :
205- yield invocation
206- except Exception as exc :
207- self .fail_llm (invocation , Error (message = str (exc ), type = type (exc )))
208- raise
209- self .stop_llm (invocation )
379+ _apply_creation_finish_attributes (creation .span , creation )
380+ finally :
381+ otel_context .detach (creation .context_token )
382+ creation .span .end ()
383+ return creation
384+
385+ def fail_create_agent ( # pylint: disable=no-self-use
386+ self , creation : AgentCreation , error : Error
387+ ) -> AgentCreation :
388+ """Fail an agent creation and end its span with error status."""
389+ if creation .context_token is None or creation .span is None :
390+ return creation
391+
392+ try :
393+ _apply_creation_finish_attributes (creation .span , creation )
394+ _apply_error_attributes (creation .span , error )
395+ finally :
396+ otel_context .detach (creation .context_token )
397+ creation .span .end ()
398+ return creation
399+
400+ @contextmanager
401+ def create_agent (
402+ self , creation : AgentCreation | None = None
403+ ) -> Iterator [AgentCreation ]:
404+ """Context manager for agent creation.
405+
406+ Only set data attributes on the creation object, do not modify the span or context.
407+
408+ Starts the span on entry. On normal exit, finalizes the creation and ends the span.
409+ If an exception occurs inside the context, marks the span as error, ends it, and
410+ re-raises the original exception.
411+ """
412+ if creation is None :
413+ creation = AgentCreation ()
414+ with _lifecycle_context (
415+ creation ,
416+ self .start_create_agent ,
417+ self .stop_create_agent ,
418+ self .fail_create_agent ,
419+ "create_agent" ,
420+ ) as c :
421+ yield c
210422
211423
212424def get_telemetry_handler (
0 commit comments