From 08e2b07837516526d0ec8d56bcf7b2af16869c58 Mon Sep 17 00:00:00 2001 From: etserend Date: Wed, 18 Feb 2026 16:07:48 -0600 Subject: [PATCH 01/10] GenAI Utils | Agent Base Type and Creation Span --- util/opentelemetry-util-genai/CHANGELOG.md | 2 + .../src/opentelemetry/util/genai/handler.py | 146 ++++++- .../opentelemetry/util/genai/span_utils.py | 225 ++++++++++ .../src/opentelemetry/util/genai/types.py | 120 ++++++ .../tests/test_handler_agent.py | 404 ++++++++++++++++++ 5 files changed, 896 insertions(+), 1 deletion(-) create mode 100644 util/opentelemetry-util-genai/tests/test_handler_agent.py diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 16339e79ad..80587603ac 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add `AgentInvocation` type and agent invocation lifecycle support + ([#4274](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4274)) - Add support for emitting inference events and enrich message types. ([#3994](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3994)) - Add support for `server.address`, `server.port` on all signals and additional metric-only attributes ([#4069](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4069)) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 54e626deaa..c4a85f80af 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -80,11 +80,18 @@ ) from opentelemetry.util.genai.metrics import InvocationMetricsRecorder from opentelemetry.util.genai.span_utils import ( + _apply_agent_finish_attributes, + _apply_creation_finish_attributes, _apply_error_attributes, _apply_llm_finish_attributes, _maybe_emit_llm_event, ) -from opentelemetry.util.genai.types import Error, LLMInvocation +from opentelemetry.util.genai.types import ( + AgentCreation, + AgentInvocation, + Error, + LLMInvocation, +) from opentelemetry.util.genai.version import __version__ @@ -208,6 +215,143 @@ def llm( raise self.stop_llm(invocation) + # ---- Agent invocation lifecycle ---- + + def start_agent( + self, + invocation: AgentInvocation, + ) -> AgentInvocation: + """Start an agent invocation and create a pending span entry.""" + span_name = f"{invocation.operation_name} {invocation.agent_name}".strip() + kind = SpanKind.CLIENT if invocation.is_remote else SpanKind.INTERNAL + span = self._tracer.start_span( + name=span_name, + kind=kind, + ) + invocation.monotonic_start_s = timeit.default_timer() + invocation.span = span + invocation.context_token = otel_context.attach( + set_span_in_context(span) + ) + return invocation + + def stop_agent(self, invocation: AgentInvocation) -> AgentInvocation: # pylint: disable=no-self-use + """Finalize an agent invocation successfully and end its span.""" + if invocation.context_token is None or invocation.span is None: + return invocation + + span = invocation.span + _apply_agent_finish_attributes(span, invocation) + otel_context.detach(invocation.context_token) + span.end() + return invocation + + def fail_agent( # pylint: disable=no-self-use + self, invocation: AgentInvocation, error: Error + ) -> AgentInvocation: + """Fail an agent invocation and end its span with error status.""" + if invocation.context_token is None or invocation.span is None: + return invocation + + span = invocation.span + _apply_agent_finish_attributes(span, invocation) + _apply_error_attributes(span, error) + otel_context.detach(invocation.context_token) + span.end() + return invocation + + @contextmanager + def agent( + self, invocation: AgentInvocation | None = None + ) -> Iterator[AgentInvocation]: + """Context manager for agent invocations. + + Only set data attributes on the invocation object, do not modify the span or context. + + Starts the span on entry. On normal exit, finalizes the invocation and ends the span. + If an exception occurs inside the context, marks the span as error, ends it, and + re-raises the original exception. + """ + if invocation is None: + invocation = AgentInvocation() + self.start_agent(invocation) + try: + yield invocation + except Exception as exc: + self.fail_agent( + invocation, Error(message=str(exc), type=type(exc)) + ) + raise + self.stop_agent(invocation) + + # ---- Agent creation lifecycle ---- + + def start_create_agent( + self, + creation: AgentCreation, + ) -> AgentCreation: + """Start an agent creation and create a pending span entry.""" + span_name = f"{creation.operation_name} {creation.agent_name}".strip() + span = self._tracer.start_span( + name=span_name, + kind=SpanKind.CLIENT, + ) + creation.monotonic_start_s = timeit.default_timer() + creation.span = span + creation.context_token = otel_context.attach( + set_span_in_context(span) + ) + return creation + + def stop_create_agent(self, creation: AgentCreation) -> AgentCreation: # pylint: disable=no-self-use + """Finalize an agent creation successfully and end its span.""" + if creation.context_token is None or creation.span is None: + return creation + + span = creation.span + _apply_creation_finish_attributes(span, creation) + otel_context.detach(creation.context_token) + span.end() + return creation + + def fail_create_agent( # pylint: disable=no-self-use + self, creation: AgentCreation, error: Error + ) -> AgentCreation: + """Fail an agent creation and end its span with error status.""" + if creation.context_token is None or creation.span is None: + return creation + + span = creation.span + _apply_creation_finish_attributes(span, creation) + _apply_error_attributes(span, error) + otel_context.detach(creation.context_token) + span.end() + return creation + + @contextmanager + def create_agent( + self, creation: AgentCreation | None = None + ) -> Iterator[AgentCreation]: + """Context manager for agent creation. + + Only set data attributes on the creation object, do not modify the span or context. + + Starts the span on entry. On normal exit, finalizes the creation and ends the span. + If an exception occurs inside the context, marks the span as error, ends it, and + re-raises the original exception. + """ + if creation is None: + creation = AgentCreation() + self.start_create_agent(creation) + try: + yield creation + except Exception as exc: + self.fail_create_agent( + creation, Error(message=str(exc), type=type(exc)) + ) + raise + self.stop_create_agent(creation) + def get_telemetry_handler( tracer_provider: TracerProvider | None = None, diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 889994436f..85c82b1212 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -32,11 +32,14 @@ from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.genai.types import ( + AgentCreation, + AgentInvocation, Error, InputMessage, LLMInvocation, MessagePart, OutputMessage, + _BaseAgent, ) from opentelemetry.util.genai.utils import ( ContentCapturingMode, @@ -279,6 +282,218 @@ def _get_llm_response_attributes( return {key: value for key, value in optional_attrs if value is not None} +def _get_base_agent_common_attributes( + agent: _BaseAgent, +) -> dict[str, Any]: + """Get common attributes shared by all agent operations (invoke_agent, create_agent).""" + optional_attrs = ( + (GenAI.GEN_AI_REQUEST_MODEL, agent.request_model), + (GenAI.GEN_AI_PROVIDER_NAME, agent.provider), + (GenAI.GEN_AI_AGENT_NAME, agent.agent_name), + (GenAI.GEN_AI_AGENT_ID, agent.agent_id), + (GenAI.GEN_AI_AGENT_DESCRIPTION, agent.agent_description), + ("gen_ai.agent.version", agent.agent_version), + (server_attributes.SERVER_ADDRESS, agent.server_address), + (server_attributes.SERVER_PORT, agent.server_port), + ) + + return { + GenAI.GEN_AI_OPERATION_NAME: agent.operation_name, + **{key: value for key, value in optional_attrs if value is not None}, + } + + +def _get_base_agent_span_name(agent: _BaseAgent) -> str: + """Get the span name for any agent operation.""" + if agent.agent_name: + return f"{agent.operation_name} {agent.agent_name}" + return agent.operation_name + + +def _get_agent_common_attributes( + invocation: AgentInvocation, +) -> dict[str, Any]: + """Get common agent invocation attributes shared by finish() and error() paths.""" + attrs = _get_base_agent_common_attributes(invocation) + + # Invoke-specific conditionally required attributes + invoke_attrs = ( + (GenAI.GEN_AI_CONVERSATION_ID, invocation.conversation_id), + (GenAI.GEN_AI_DATA_SOURCE_ID, invocation.data_source_id), + (GenAI.GEN_AI_OUTPUT_TYPE, invocation.output_type), + ) + attrs.update( + {key: value for key, value in invoke_attrs if value is not None} + ) + + return attrs + + +def _get_agent_span_name(invocation: AgentInvocation) -> str: + """Get the span name for an agent invocation.""" + return _get_base_agent_span_name(invocation) + + +def _get_agent_request_attributes( + invocation: AgentInvocation, +) -> dict[str, Any]: + """Get GenAI request semantic convention attributes for agent invocation.""" + optional_attrs = ( + (GenAI.GEN_AI_REQUEST_TEMPERATURE, invocation.temperature), + (GenAI.GEN_AI_REQUEST_TOP_P, invocation.top_p), + (GenAI.GEN_AI_REQUEST_FREQUENCY_PENALTY, invocation.frequency_penalty), + (GenAI.GEN_AI_REQUEST_PRESENCE_PENALTY, invocation.presence_penalty), + (GenAI.GEN_AI_REQUEST_MAX_TOKENS, invocation.max_tokens), + (GenAI.GEN_AI_REQUEST_STOP_SEQUENCES, invocation.stop_sequences), + (GenAI.GEN_AI_REQUEST_SEED, invocation.seed), + (GenAI.GEN_AI_REQUEST_CHOICE_COUNT, invocation.choice_count), + ) + + return {key: value for key, value in optional_attrs if value is not None} + + +def _get_agent_response_attributes( + invocation: AgentInvocation, +) -> dict[str, Any]: + """Get GenAI response semantic convention attributes for agent invocation.""" + finish_reasons: list[str] | None + if invocation.finish_reasons is not None: + finish_reasons = invocation.finish_reasons + elif invocation.output_messages: + finish_reasons = [ + message.finish_reason + for message in invocation.output_messages + if message.finish_reason + ] + else: + finish_reasons = None + + unique_finish_reasons = ( + sorted(set(finish_reasons)) if finish_reasons else None + ) + + optional_attrs = ( + ( + GenAI.GEN_AI_RESPONSE_FINISH_REASONS, + unique_finish_reasons if unique_finish_reasons else None, + ), + (GenAI.GEN_AI_RESPONSE_MODEL, invocation.response_model_name), + (GenAI.GEN_AI_RESPONSE_ID, invocation.response_id), + (GenAI.GEN_AI_USAGE_INPUT_TOKENS, invocation.input_tokens), + (GenAI.GEN_AI_USAGE_OUTPUT_TOKENS, invocation.output_tokens), + ) + + return {key: value for key, value in optional_attrs if value is not None} + + +def _get_agent_messages_attributes_for_span( + input_messages: list[InputMessage], + output_messages: list[OutputMessage], + system_instruction: list[MessagePart] | None = None, + tool_definitions: list[dict[str, Any]] | None = None, +) -> dict[str, Any]: + """Get message attributes formatted for span (JSON string format) for agent invocation.""" + if not is_experimental_mode() or get_content_capturing_mode() not in ( + ContentCapturingMode.SPAN_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ): + return {} + + optional_attrs = ( + ( + GenAI.GEN_AI_INPUT_MESSAGES, + gen_ai_json_dumps([asdict(m) for m in input_messages]) + if input_messages + else None, + ), + ( + GenAI.GEN_AI_OUTPUT_MESSAGES, + gen_ai_json_dumps([asdict(m) for m in output_messages]) + if output_messages + else None, + ), + ( + GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, + gen_ai_json_dumps([asdict(p) for p in system_instruction]) + if system_instruction + else None, + ), + ( + GenAI.GEN_AI_TOOL_DEFINITIONS, + gen_ai_json_dumps(tool_definitions) + if tool_definitions + else None, + ), + ) + + return {key: value for key, value in optional_attrs if value is not None} + + +def _apply_agent_finish_attributes( + span: Span, invocation: AgentInvocation +) -> None: + """Apply attributes/messages common to agent finish() paths.""" + span.update_name(_get_agent_span_name(invocation)) + + attributes: dict[str, Any] = {} + attributes.update(_get_agent_common_attributes(invocation)) + attributes.update(_get_agent_request_attributes(invocation)) + attributes.update(_get_agent_response_attributes(invocation)) + attributes.update( + _get_agent_messages_attributes_for_span( + invocation.input_messages, + invocation.output_messages, + invocation.system_instruction, + invocation.tool_definitions, + ) + ) + attributes.update(invocation.attributes) + + if attributes: + span.set_attributes(attributes) + + +def _get_creation_common_attributes( + creation: AgentCreation, +) -> dict[str, Any]: + """Get common agent creation attributes.""" + return _get_base_agent_common_attributes(creation) + + +def _get_creation_span_name(creation: AgentCreation) -> str: + """Get the span name for an agent creation.""" + return _get_base_agent_span_name(creation) + + +def _apply_creation_finish_attributes( + span: Span, creation: AgentCreation +) -> None: + """Apply attributes common to agent creation finish() paths.""" + span.update_name(_get_creation_span_name(creation)) + + attributes: dict[str, Any] = {} + attributes.update(_get_creation_common_attributes(creation)) + + # System instructions (Opt-In) + if ( + is_experimental_mode() + and get_content_capturing_mode() + in ( + ContentCapturingMode.SPAN_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ) + and creation.system_instruction + ): + attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = gen_ai_json_dumps( + [asdict(p) for p in creation.system_instruction] + ) + + attributes.update(creation.attributes) + + if attributes: + span.set_attributes(attributes) + + __all__ = [ "_apply_llm_finish_attributes", "_apply_error_attributes", @@ -287,4 +502,14 @@ def _get_llm_response_attributes( "_get_llm_response_attributes", "_get_llm_span_name", "_maybe_emit_llm_event", + "_get_base_agent_common_attributes", + "_get_base_agent_span_name", + "_apply_agent_finish_attributes", + "_apply_creation_finish_attributes", + "_get_agent_common_attributes", + "_get_agent_request_attributes", + "_get_agent_response_attributes", + "_get_agent_span_name", + "_get_creation_common_attributes", + "_get_creation_span_name", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 0e86885f20..6a3814a03f 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -236,6 +236,126 @@ class LLMInvocation(GenAIInvocation): monotonic_start_s: float | None = None +@dataclass +class _BaseAgent(GenAIInvocation): + """ + Shared base class for agent lifecycle types (AgentInvocation, AgentCreation). + + Contains fields common to all agent operations: identity, provider, + model, system instructions, server info, and telemetry plumbing. + + Follows semconv for GenAI agent spans: + https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-agent-spans.md + + Do not instantiate directly — use AgentInvocation or AgentCreation. + """ + + # Agent identity + agent_name: str | None = None + agent_id: str | None = None + agent_description: str | None = None + agent_version: str | None = None + + # Operation + operation_name: str = "" + provider: str | None = None + + # Request + request_model: str | None = None + + # Content (Opt-In) + system_instruction: list[MessagePart] = field( + default_factory=_new_system_instruction + ) + + # Server + server_address: str | None = None + server_port: int | None = None + + attributes: dict[str, Any] = field(default_factory=_new_str_any_dict) + """ + Additional attributes to set on spans and/or events. + """ + # Monotonic start time in seconds (from timeit.default_timer) used + # for duration calculations to avoid mixing clock sources. This is + # populated by the TelemetryHandler when starting an invocation. + monotonic_start_s: float | None = None + + +@dataclass +class AgentCreation(_BaseAgent): + """ + Represents agent creation/initialization (create_agent operation). + + Follows semconv for GenAI agent spans: + https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-agent-spans.md#create-agent-span + + When creating an AgentCreation object, only update the data attributes. + The span and context_token attributes are set by the TelemetryHandler. + """ + + # Override default operation name + operation_name: str = "create_agent" + + +@dataclass +class AgentInvocation(_BaseAgent): + """ + Represents an agent invocation (invoke_agent operation). + + Follows semconv for GenAI agent spans: + https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-agent-spans.md#invoke-agent-span + + When creating an AgentInvocation object, only update the data attributes. + The span and context_token attributes are set by the TelemetryHandler. + """ + + # Override default operation name + operation_name: str = "invoke_agent" + + # Invoke-specific request attributes (Cond. Required) + conversation_id: str | None = None + data_source_id: str | None = None + output_type: str | None = None + + # Request parameters (Recommended) + temperature: float | None = None + top_p: float | None = None + frequency_penalty: float | None = None + presence_penalty: float | None = None + max_tokens: int | None = None + stop_sequences: list[str] | None = None + seed: int | None = None + choice_count: int | None = None + + # Response (Recommended) + response_model_name: str | None = None + response_id: str | None = None + finish_reasons: list[str] | None = None + input_tokens: int | None = None + output_tokens: int | None = None + + # Content (Opt-In) — input/output messages and tool definitions + input_messages: list[InputMessage] = field( + default_factory=_new_input_messages + ) + output_messages: list[OutputMessage] = field( + default_factory=_new_output_messages + ) + tool_definitions: list[dict[str, Any]] | None = None + + # Span kind: CLIENT for remote agents, INTERNAL for in-process agents + is_remote: bool = True + + metric_attributes: dict[str, Any] = field( + default_factory=_new_str_any_dict + ) + """ + Additional attributes to set on metrics. Must be of a low cardinality. + These attributes will not be set on spans or events. + """ + + @dataclass class Error: message: str diff --git a/util/opentelemetry-util-genai/tests/test_handler_agent.py b/util/opentelemetry-util-genai/tests/test_handler_agent.py new file mode 100644 index 0000000000..2ae7d9938a --- /dev/null +++ b/util/opentelemetry-util-genai/tests/test_handler_agent.py @@ -0,0 +1,404 @@ +from __future__ import annotations + +from unittest import TestCase + +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.types import ( + AgentCreation, + AgentInvocation, + Error, + InputMessage, + OutputMessage, + Text, +) + + +class TestAgentInvocationHandler(TestCase): + def setUp(self) -> None: + self.span_exporter = InMemorySpanExporter() + self.tracer_provider = TracerProvider() + self.tracer_provider.add_span_processor( + SimpleSpanProcessor(self.span_exporter) + ) + + def _make_handler(self) -> TelemetryHandler: + return TelemetryHandler( + tracer_provider=self.tracer_provider, + ) + + # ---- start/stop agent ---- + + def test_start_stop_agent_creates_span(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation( + agent_name="Math Tutor", + provider="openai", + request_model="gpt-4", + ) + handler.start_agent(invocation) + handler.stop_agent(invocation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + span = spans[0] + self.assertEqual(span.name, "invoke_agent Math Tutor") + self.assertEqual( + span.attributes[GenAI.GEN_AI_OPERATION_NAME], "invoke_agent" + ) + self.assertEqual( + span.attributes[GenAI.GEN_AI_AGENT_NAME], "Math Tutor" + ) + self.assertEqual( + span.attributes[GenAI.GEN_AI_PROVIDER_NAME], "openai" + ) + self.assertEqual( + span.attributes[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4" + ) + + def test_agent_span_kind_client_by_default(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation(agent_name="Remote Agent", is_remote=True) + handler.start_agent(invocation) + handler.stop_agent(invocation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + from opentelemetry.trace import SpanKind + + self.assertEqual(spans[0].kind, SpanKind.CLIENT) + + def test_agent_span_kind_internal_for_local(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation( + agent_name="Local Agent", is_remote=False + ) + handler.start_agent(invocation) + handler.stop_agent(invocation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + from opentelemetry.trace import SpanKind + + self.assertEqual(spans[0].kind, SpanKind.INTERNAL) + + def test_agent_with_all_attributes(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation( + agent_name="Full Agent", + agent_id="agent-123", + agent_description="A test agent", + agent_version="1.0.0", + provider="openai", + request_model="gpt-4", + conversation_id="conv-456", + data_source_id="ds-789", + output_type="text", + temperature=0.7, + top_p=0.9, + max_tokens=1000, + seed=42, + server_address="api.openai.com", + server_port=443, + ) + handler.start_agent(invocation) + invocation.response_model_name = "gpt-4-0613" + invocation.response_id = "resp-abc" + invocation.input_tokens = 100 + invocation.output_tokens = 200 + invocation.finish_reasons = ["stop"] + handler.stop_agent(invocation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + attrs = spans[0].attributes + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_NAME], "Full Agent") + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_ID], "agent-123") + self.assertEqual( + attrs[GenAI.GEN_AI_AGENT_DESCRIPTION], "A test agent" + ) + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "gpt-4-0613") + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "resp-abc") + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 100) + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS], 200) + self.assertEqual( + tuple(attrs[GenAI.GEN_AI_RESPONSE_FINISH_REASONS]), ("stop",) + ) + + # ---- fail agent ---- + + def test_fail_agent_sets_error_status(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation( + agent_name="Failing Agent", provider="openai" + ) + handler.start_agent(invocation) + error = Error(message="agent crashed", type=RuntimeError) + handler.fail_agent(invocation, error) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + span = spans[0] + self.assertEqual(span.status.description, "agent crashed") + self.assertEqual(span.attributes.get("error.type"), "RuntimeError") + + # ---- context manager ---- + + def test_agent_context_manager_success(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation( + agent_name="CM Agent", provider="openai", request_model="gpt-4" + ) + with handler.agent(invocation) as inv: + inv.input_tokens = 10 + inv.output_tokens = 20 + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + self.assertEqual(spans[0].name, "invoke_agent CM Agent") + + def test_agent_context_manager_error(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation(agent_name="Error Agent") + with self.assertRaises(ValueError): + with handler.agent(invocation): + raise ValueError("test error") + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + self.assertEqual(spans[0].attributes.get("error.type"), "ValueError") + + def test_agent_context_manager_default_invocation(self) -> None: + handler = self._make_handler() + with handler.agent() as inv: + inv.agent_name = "Dynamic Agent" + inv.provider = "openai" + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + + # ---- not started ---- + + def test_stop_agent_without_start_is_noop(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation(agent_name="Not Started") + result = handler.stop_agent(invocation) + self.assertIs(result, invocation) + self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) + + def test_fail_agent_without_start_is_noop(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation(agent_name="Not Started") + error = Error(message="boom", type=RuntimeError) + result = handler.fail_agent(invocation, error) + self.assertIs(result, invocation) + self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) + + +class TestAgentCreationHandler(TestCase): + def setUp(self) -> None: + self.span_exporter = InMemorySpanExporter() + self.tracer_provider = TracerProvider() + self.tracer_provider.add_span_processor( + SimpleSpanProcessor(self.span_exporter) + ) + + def _make_handler(self) -> TelemetryHandler: + return TelemetryHandler( + tracer_provider=self.tracer_provider, + ) + + def test_start_stop_create_agent(self) -> None: + handler = self._make_handler() + creation = AgentCreation( + agent_name="New Agent", + agent_id="agent-new-1", + provider="openai", + request_model="gpt-4", + ) + handler.start_create_agent(creation) + handler.stop_create_agent(creation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + span = spans[0] + self.assertEqual(span.name, "create_agent New Agent") + self.assertEqual( + span.attributes[GenAI.GEN_AI_OPERATION_NAME], "create_agent" + ) + self.assertEqual( + span.attributes[GenAI.GEN_AI_AGENT_NAME], "New Agent" + ) + + def test_create_agent_span_kind_is_client(self) -> None: + handler = self._make_handler() + creation = AgentCreation(agent_name="Client Agent") + handler.start_create_agent(creation) + handler.stop_create_agent(creation) + + spans = self.span_exporter.get_finished_spans() + from opentelemetry.trace import SpanKind + + self.assertEqual(spans[0].kind, SpanKind.CLIENT) + + def test_create_agent_with_all_base_attributes(self) -> None: + handler = self._make_handler() + creation = AgentCreation( + agent_name="Full Agent", + agent_id="agent-123", + agent_description="A test agent", + agent_version="1.0.0", + provider="openai", + request_model="gpt-4", + server_address="api.openai.com", + server_port=443, + ) + handler.start_create_agent(creation) + handler.stop_create_agent(creation) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + attrs = spans[0].attributes + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "create_agent") + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_NAME], "Full Agent") + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_ID], "agent-123") + self.assertEqual( + attrs[GenAI.GEN_AI_AGENT_DESCRIPTION], "A test agent" + ) + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_VERSION], "1.0.0") + self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "openai") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4") + + def test_fail_create_agent(self) -> None: + handler = self._make_handler() + creation = AgentCreation(agent_name="Bad Agent") + handler.start_create_agent(creation) + error = Error(message="creation failed", type=RuntimeError) + handler.fail_create_agent(creation, error) + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + self.assertEqual(spans[0].status.description, "creation failed") + self.assertEqual(spans[0].attributes.get("error.type"), "RuntimeError") + + def test_create_agent_context_manager(self) -> None: + handler = self._make_handler() + creation = AgentCreation( + agent_name="CM Agent", + provider="openai", + ) + with handler.create_agent(creation) as c: + c.agent_id = "assigned-id" + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + self.assertEqual(spans[0].name, "create_agent CM Agent") + + def test_create_agent_context_manager_error(self) -> None: + handler = self._make_handler() + with self.assertRaises(TypeError): + with handler.create_agent(AgentCreation(agent_name="Err")): + raise TypeError("bad type") + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + self.assertEqual(spans[0].attributes.get("error.type"), "TypeError") + + def test_create_agent_context_manager_default(self) -> None: + handler = self._make_handler() + with handler.create_agent() as c: + c.agent_name = "Dynamic Agent" + c.provider = "openai" + + spans = self.span_exporter.get_finished_spans() + self.assertEqual(len(spans), 1) + + def test_stop_create_agent_without_start_is_noop(self) -> None: + handler = self._make_handler() + creation = AgentCreation(agent_name="Not Started") + result = handler.stop_create_agent(creation) + self.assertIs(result, creation) + self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) + + def test_fail_create_agent_without_start_is_noop(self) -> None: + handler = self._make_handler() + creation = AgentCreation(agent_name="Not Started") + error = Error(message="boom", type=RuntimeError) + result = handler.fail_create_agent(creation, error) + self.assertIs(result, creation) + self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) + + +class TestAgentTypes(TestCase): + """Unit tests for the AgentInvocation and AgentCreation dataclasses.""" + + def test_agent_invocation_defaults(self) -> None: + inv = AgentInvocation() + self.assertEqual(inv.operation_name, "invoke_agent") + self.assertIsNone(inv.agent_name) + self.assertIsNone(inv.agent_id) + self.assertIsNone(inv.provider) + self.assertIsNone(inv.request_model) + self.assertTrue(inv.is_remote) + self.assertEqual(inv.input_messages, []) + self.assertEqual(inv.output_messages, []) + self.assertEqual(inv.system_instruction, []) + self.assertIsNone(inv.tool_definitions) + self.assertIsNone(inv.span) + self.assertIsNone(inv.context_token) + + def test_agent_creation_defaults(self) -> None: + creation = AgentCreation() + self.assertEqual(creation.operation_name, "create_agent") + self.assertIsNone(creation.agent_name) + self.assertIsNone(creation.agent_id) + self.assertIsNone(creation.agent_description) + self.assertIsNone(creation.agent_version) + self.assertIsNone(creation.provider) + self.assertIsNone(creation.request_model) + self.assertEqual(creation.system_instruction, []) + self.assertIsNone(creation.server_address) + self.assertIsNone(creation.server_port) + self.assertIsNone(creation.span) + self.assertIsNone(creation.context_token) + + def test_agent_invocation_with_messages(self) -> None: + inv = AgentInvocation( + agent_name="Test", + input_messages=[ + InputMessage( + role="user", parts=[Text(content="Hello")] + ) + ], + output_messages=[ + OutputMessage( + role="assistant", + parts=[Text(content="Hi there!")], + finish_reason="stop", + ) + ], + ) + self.assertEqual(len(inv.input_messages), 1) + self.assertEqual(len(inv.output_messages), 1) + self.assertEqual(inv.input_messages[0].role, "user") + + def test_agent_invocation_custom_attributes(self) -> None: + inv = AgentInvocation( + agent_name="Custom", + attributes={"custom.key": "custom_value"}, + ) + self.assertEqual(inv.attributes["custom.key"], "custom_value") + + def test_agent_creation_custom_attributes(self) -> None: + creation = AgentCreation( + agent_name="Custom", + attributes={"custom.key": "custom_value"}, + ) + self.assertEqual(creation.attributes["custom.key"], "custom_value") From 3474e856e65946e19d49fb662e92a520a5c49da8 Mon Sep 17 00:00:00 2001 From: etserend Date: Fri, 27 Feb 2026 12:02:10 -0600 Subject: [PATCH 02/10] fix lint and add _BaseAgent to sphinx nitpick exceptions --- docs/nitpick-exceptions.ini | 1 + .../tests/test_handler_agent.py | 10 +++++----- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/docs/nitpick-exceptions.ini b/docs/nitpick-exceptions.ini index 73febacaad..782cc1b952 100644 --- a/docs/nitpick-exceptions.ini +++ b/docs/nitpick-exceptions.ini @@ -47,6 +47,7 @@ py-class= fastapi.applications.FastAPI starlette.applications.Starlette _contextvars.Token + opentelemetry.util.genai.types._BaseAgent any= ; API diff --git a/util/opentelemetry-util-genai/tests/test_handler_agent.py b/util/opentelemetry-util-genai/tests/test_handler_agent.py index 2ae7d9938a..215497b3ae 100644 --- a/util/opentelemetry-util-genai/tests/test_handler_agent.py +++ b/util/opentelemetry-util-genai/tests/test_handler_agent.py @@ -294,8 +294,8 @@ def test_create_agent_context_manager(self) -> None: agent_name="CM Agent", provider="openai", ) - with handler.create_agent(creation) as c: - c.agent_id = "assigned-id" + with handler.create_agent(creation) as cr: + cr.agent_id = "assigned-id" spans = self.span_exporter.get_finished_spans() self.assertEqual(len(spans), 1) @@ -313,9 +313,9 @@ def test_create_agent_context_manager_error(self) -> None: def test_create_agent_context_manager_default(self) -> None: handler = self._make_handler() - with handler.create_agent() as c: - c.agent_name = "Dynamic Agent" - c.provider = "openai" + with handler.create_agent() as cr: + cr.agent_name = "Dynamic Agent" + cr.provider = "openai" spans = self.span_exporter.get_finished_spans() self.assertEqual(len(spans), 1) From 157651b10754965b73cf008b69b0f5a00dbf5112 Mon Sep 17 00:00:00 2001 From: etserend Date: Fri, 27 Feb 2026 12:04:55 -0600 Subject: [PATCH 03/10] resolve merge conflict --- util/opentelemetry-util-genai/CHANGELOG.md | 3 +++ 1 file changed, 3 insertions(+) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index f64092a697..33a82ce18a 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add `AgentInvocation` type and agent invocation lifecycle support + ([#4274](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4274)) + ## Version 0.3b0 (2026-02-20) - Add `gen_ai.tool_definitions` to completion hook ([#4181](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4181)) From f7512c0b856acfd8108d7f44f9ff1a451d8a475c Mon Sep 17 00:00:00 2001 From: etserend Date: Mon, 2 Mar 2026 15:41:50 -0600 Subject: [PATCH 04/10] align AgentInvocation with invoke_agent semconv --- .../opentelemetry/util/genai/span_utils.py | 80 +---- .../src/opentelemetry/util/genai/types.py | 16 +- .../tests/test_handler_agent.py | 296 +++++------------- 3 files changed, 88 insertions(+), 304 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 85c82b1212..83d40f5aff 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -80,6 +80,7 @@ def _get_llm_messages_attributes_for_span( input_messages: list[InputMessage], output_messages: list[OutputMessage], system_instruction: list[MessagePart] | None = None, + tool_definitions: list[dict[str, Any]] | None = None, ) -> dict[str, Any]: """Get message attributes formatted for span (JSON string format). @@ -110,6 +111,12 @@ def _get_llm_messages_attributes_for_span( if system_instruction else None, ), + ( + GenAI.GEN_AI_TOOL_DEFINITIONS, + gen_ai_json_dumps(tool_definitions) + if tool_definitions + else None, + ), ) return {key: value for key, value in optional_attrs if value is not None} @@ -292,7 +299,7 @@ def _get_base_agent_common_attributes( (GenAI.GEN_AI_AGENT_NAME, agent.agent_name), (GenAI.GEN_AI_AGENT_ID, agent.agent_id), (GenAI.GEN_AI_AGENT_DESCRIPTION, agent.agent_description), - ("gen_ai.agent.version", agent.agent_version), + (GenAI.GEN_AI_AGENT_VERSION, agent.agent_version), (server_attributes.SERVER_ADDRESS, agent.server_address), (server_attributes.SERVER_PORT, agent.server_port), ) @@ -329,11 +336,6 @@ def _get_agent_common_attributes( return attrs -def _get_agent_span_name(invocation: AgentInvocation) -> str: - """Get the span name for an agent invocation.""" - return _get_base_agent_span_name(invocation) - - def _get_agent_request_attributes( invocation: AgentInvocation, ) -> dict[str, Any]: @@ -381,48 +383,13 @@ def _get_agent_response_attributes( (GenAI.GEN_AI_RESPONSE_ID, invocation.response_id), (GenAI.GEN_AI_USAGE_INPUT_TOKENS, invocation.input_tokens), (GenAI.GEN_AI_USAGE_OUTPUT_TOKENS, invocation.output_tokens), - ) - - return {key: value for key, value in optional_attrs if value is not None} - - -def _get_agent_messages_attributes_for_span( - input_messages: list[InputMessage], - output_messages: list[OutputMessage], - system_instruction: list[MessagePart] | None = None, - tool_definitions: list[dict[str, Any]] | None = None, -) -> dict[str, Any]: - """Get message attributes formatted for span (JSON string format) for agent invocation.""" - if not is_experimental_mode() or get_content_capturing_mode() not in ( - ContentCapturingMode.SPAN_ONLY, - ContentCapturingMode.SPAN_AND_EVENT, - ): - return {} - - optional_attrs = ( ( - GenAI.GEN_AI_INPUT_MESSAGES, - gen_ai_json_dumps([asdict(m) for m in input_messages]) - if input_messages - else None, - ), - ( - GenAI.GEN_AI_OUTPUT_MESSAGES, - gen_ai_json_dumps([asdict(m) for m in output_messages]) - if output_messages - else None, - ), - ( - GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, - gen_ai_json_dumps([asdict(p) for p in system_instruction]) - if system_instruction - else None, + GenAI.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS, + invocation.cache_creation_input_tokens, ), ( - GenAI.GEN_AI_TOOL_DEFINITIONS, - gen_ai_json_dumps(tool_definitions) - if tool_definitions - else None, + GenAI.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS, + invocation.cache_read_input_tokens, ), ) @@ -433,14 +400,14 @@ def _apply_agent_finish_attributes( span: Span, invocation: AgentInvocation ) -> None: """Apply attributes/messages common to agent finish() paths.""" - span.update_name(_get_agent_span_name(invocation)) + span.update_name(_get_base_agent_span_name(invocation)) attributes: dict[str, Any] = {} attributes.update(_get_agent_common_attributes(invocation)) attributes.update(_get_agent_request_attributes(invocation)) attributes.update(_get_agent_response_attributes(invocation)) attributes.update( - _get_agent_messages_attributes_for_span( + _get_llm_messages_attributes_for_span( invocation.input_messages, invocation.output_messages, invocation.system_instruction, @@ -453,26 +420,14 @@ def _apply_agent_finish_attributes( span.set_attributes(attributes) -def _get_creation_common_attributes( - creation: AgentCreation, -) -> dict[str, Any]: - """Get common agent creation attributes.""" - return _get_base_agent_common_attributes(creation) - - -def _get_creation_span_name(creation: AgentCreation) -> str: - """Get the span name for an agent creation.""" - return _get_base_agent_span_name(creation) - - def _apply_creation_finish_attributes( span: Span, creation: AgentCreation ) -> None: """Apply attributes common to agent creation finish() paths.""" - span.update_name(_get_creation_span_name(creation)) + span.update_name(_get_base_agent_span_name(creation)) attributes: dict[str, Any] = {} - attributes.update(_get_creation_common_attributes(creation)) + attributes.update(_get_base_agent_common_attributes(creation)) # System instructions (Opt-In) if ( @@ -509,7 +464,4 @@ def _apply_creation_finish_attributes( "_get_agent_common_attributes", "_get_agent_request_attributes", "_get_agent_response_attributes", - "_get_agent_span_name", - "_get_creation_common_attributes", - "_get_creation_span_name", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 75b108d140..78c5fb17dc 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -270,25 +270,19 @@ class _BaseAgent(GenAIInvocation): Do not instantiate directly — use AgentInvocation or AgentCreation. """ - # Agent identity agent_name: str | None = None agent_id: str | None = None agent_description: str | None = None agent_version: str | None = None - # Operation operation_name: str = "" provider: str | None = None - # Request request_model: str | None = None - # Content (Opt-In) system_instruction: list[MessagePart] = field( default_factory=_new_system_instruction ) - - # Server server_address: str | None = None server_port: int | None = None @@ -314,7 +308,6 @@ class AgentCreation(_BaseAgent): The span and context_token attributes are set by the TelemetryHandler. """ - # Override default operation name operation_name: str = "create_agent" @@ -330,15 +323,11 @@ class AgentInvocation(_BaseAgent): The span and context_token attributes are set by the TelemetryHandler. """ - # Override default operation name operation_name: str = "invoke_agent" - - # Invoke-specific request attributes (Cond. Required) conversation_id: str | None = None data_source_id: str | None = None output_type: str | None = None - # Request parameters (Recommended) temperature: float | None = None top_p: float | None = None frequency_penalty: float | None = None @@ -348,14 +337,14 @@ class AgentInvocation(_BaseAgent): seed: int | None = None choice_count: int | None = None - # Response (Recommended) response_model_name: str | None = None response_id: str | None = None finish_reasons: list[str] | None = None input_tokens: int | None = None output_tokens: int | None = None + cache_creation_input_tokens: int | None = None + cache_read_input_tokens: int | None = None - # Content (Opt-In) — input/output messages and tool definitions input_messages: list[InputMessage] = field( default_factory=_new_input_messages ) @@ -364,7 +353,6 @@ class AgentInvocation(_BaseAgent): ) tool_definitions: list[dict[str, Any]] | None = None - # Span kind: CLIENT for remote agents, INTERNAL for in-process agents is_remote: bool = True metric_attributes: dict[str, Any] = field( diff --git a/util/opentelemetry-util-genai/tests/test_handler_agent.py b/util/opentelemetry-util-genai/tests/test_handler_agent.py index 215497b3ae..ec557d22dd 100644 --- a/util/opentelemetry-util-genai/tests/test_handler_agent.py +++ b/util/opentelemetry-util-genai/tests/test_handler_agent.py @@ -10,9 +10,9 @@ from opentelemetry.semconv._incubating.attributes import ( gen_ai_attributes as GenAI, ) +from opentelemetry.trace import SpanKind from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.types import ( - AgentCreation, AgentInvocation, Error, InputMessage, @@ -21,7 +21,9 @@ ) -class TestAgentInvocationHandler(TestCase): +class _AgentTestBase(TestCase): + """Shared setUp and helper for agent handler tests.""" + def setUp(self) -> None: self.span_exporter = InMemorySpanExporter() self.tracer_provider = TracerProvider() @@ -34,9 +36,10 @@ def _make_handler(self) -> TelemetryHandler: tracer_provider=self.tracer_provider, ) - # ---- start/stop agent ---- - def test_start_stop_agent_creates_span(self) -> None: +class TestAgentInvocationHandler(_AgentTestBase): + + def test_start_stop_creates_span(self) -> None: handler = self._make_handler() invocation = AgentInvocation( agent_name="Math Tutor", @@ -63,33 +66,25 @@ def test_start_stop_agent_creates_span(self) -> None: span.attributes[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4" ) - def test_agent_span_kind_client_by_default(self) -> None: + def test_span_kind_client_by_default(self) -> None: handler = self._make_handler() - invocation = AgentInvocation(agent_name="Remote Agent", is_remote=True) + invocation = AgentInvocation(agent_name="Agent", is_remote=True) handler.start_agent(invocation) handler.stop_agent(invocation) + self.assertEqual( + self.span_exporter.get_finished_spans()[0].kind, SpanKind.CLIENT + ) - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - from opentelemetry.trace import SpanKind - - self.assertEqual(spans[0].kind, SpanKind.CLIENT) - - def test_agent_span_kind_internal_for_local(self) -> None: + def test_span_kind_internal_for_local(self) -> None: handler = self._make_handler() - invocation = AgentInvocation( - agent_name="Local Agent", is_remote=False - ) + invocation = AgentInvocation(agent_name="Agent", is_remote=False) handler.start_agent(invocation) handler.stop_agent(invocation) + self.assertEqual( + self.span_exporter.get_finished_spans()[0].kind, SpanKind.INTERNAL + ) - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - from opentelemetry.trace import SpanKind - - self.assertEqual(spans[0].kind, SpanKind.INTERNAL) - - def test_agent_with_all_attributes(self) -> None: + def test_all_attributes(self) -> None: handler = self._make_handler() invocation = AgentInvocation( agent_name="Full Agent", @@ -116,9 +111,7 @@ def test_agent_with_all_attributes(self) -> None: invocation.finish_reasons = ["stop"] handler.stop_agent(invocation) - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - attrs = spans[0].attributes + attrs = self.span_exporter.get_finished_spans()[0].attributes self.assertEqual(attrs[GenAI.GEN_AI_AGENT_NAME], "Full Agent") self.assertEqual(attrs[GenAI.GEN_AI_AGENT_ID], "agent-123") self.assertEqual( @@ -131,27 +124,41 @@ def test_agent_with_all_attributes(self) -> None: self.assertEqual( tuple(attrs[GenAI.GEN_AI_RESPONSE_FINISH_REASONS]), ("stop",) ) + self.assertEqual(attrs["gen_ai.agent.version"], "1.0.0") - # ---- fail agent ---- - - def test_fail_agent_sets_error_status(self) -> None: + def test_cache_token_attributes(self) -> None: handler = self._make_handler() invocation = AgentInvocation( - agent_name="Failing Agent", provider="openai" + agent_name="Cache Agent", provider="openai" ) handler.start_agent(invocation) - error = Error(message="agent crashed", type=RuntimeError) - handler.fail_agent(invocation, error) + invocation.input_tokens = 100 + invocation.cache_creation_input_tokens = 25 + invocation.cache_read_input_tokens = 50 + handler.stop_agent(invocation) - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - span = spans[0] + attrs = self.span_exporter.get_finished_spans()[0].attributes + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 100) + self.assertEqual( + attrs[GenAI.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS], 25 + ) + self.assertEqual( + attrs[GenAI.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 50 + ) + + def test_fail_sets_error_status(self) -> None: + handler = self._make_handler() + invocation = AgentInvocation(agent_name="Agent", provider="openai") + handler.start_agent(invocation) + handler.fail_agent( + invocation, Error(message="agent crashed", type=RuntimeError) + ) + + span = self.span_exporter.get_finished_spans()[0] self.assertEqual(span.status.description, "agent crashed") self.assertEqual(span.attributes.get("error.type"), "RuntimeError") - # ---- context manager ---- - - def test_agent_context_manager_success(self) -> None: + def test_context_manager_success(self) -> None: handler = self._make_handler() invocation = AgentInvocation( agent_name="CM Agent", provider="openai", request_model="gpt-4" @@ -160,222 +167,67 @@ def test_agent_context_manager_success(self) -> None: inv.input_tokens = 10 inv.output_tokens = 20 - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].name, "invoke_agent CM Agent") + self.assertEqual( + self.span_exporter.get_finished_spans()[0].name, + "invoke_agent CM Agent", + ) - def test_agent_context_manager_error(self) -> None: + def test_context_manager_error(self) -> None: handler = self._make_handler() - invocation = AgentInvocation(agent_name="Error Agent") with self.assertRaises(ValueError): - with handler.agent(invocation): + with handler.agent(AgentInvocation(agent_name="Agent")): raise ValueError("test error") - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].attributes.get("error.type"), "ValueError") + self.assertEqual( + self.span_exporter.get_finished_spans()[0] + .attributes.get("error.type"), + "ValueError", + ) - def test_agent_context_manager_default_invocation(self) -> None: + def test_context_manager_default_invocation(self) -> None: handler = self._make_handler() with handler.agent() as inv: inv.agent_name = "Dynamic Agent" inv.provider = "openai" + self.assertEqual(len(self.span_exporter.get_finished_spans()), 1) - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - - # ---- not started ---- - - def test_stop_agent_without_start_is_noop(self) -> None: + def test_stop_without_start_is_noop(self) -> None: handler = self._make_handler() invocation = AgentInvocation(agent_name="Not Started") result = handler.stop_agent(invocation) self.assertIs(result, invocation) self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) - def test_fail_agent_without_start_is_noop(self) -> None: + def test_fail_without_start_is_noop(self) -> None: handler = self._make_handler() invocation = AgentInvocation(agent_name="Not Started") - error = Error(message="boom", type=RuntimeError) - result = handler.fail_agent(invocation, error) - self.assertIs(result, invocation) - self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) - - -class TestAgentCreationHandler(TestCase): - def setUp(self) -> None: - self.span_exporter = InMemorySpanExporter() - self.tracer_provider = TracerProvider() - self.tracer_provider.add_span_processor( - SimpleSpanProcessor(self.span_exporter) - ) - - def _make_handler(self) -> TelemetryHandler: - return TelemetryHandler( - tracer_provider=self.tracer_provider, - ) - - def test_start_stop_create_agent(self) -> None: - handler = self._make_handler() - creation = AgentCreation( - agent_name="New Agent", - agent_id="agent-new-1", - provider="openai", - request_model="gpt-4", + result = handler.fail_agent( + invocation, Error(message="boom", type=RuntimeError) ) - handler.start_create_agent(creation) - handler.stop_create_agent(creation) - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - span = spans[0] - self.assertEqual(span.name, "create_agent New Agent") - self.assertEqual( - span.attributes[GenAI.GEN_AI_OPERATION_NAME], "create_agent" - ) - self.assertEqual( - span.attributes[GenAI.GEN_AI_AGENT_NAME], "New Agent" - ) - - def test_create_agent_span_kind_is_client(self) -> None: - handler = self._make_handler() - creation = AgentCreation(agent_name="Client Agent") - handler.start_create_agent(creation) - handler.stop_create_agent(creation) - - spans = self.span_exporter.get_finished_spans() - from opentelemetry.trace import SpanKind - - self.assertEqual(spans[0].kind, SpanKind.CLIENT) - - def test_create_agent_with_all_base_attributes(self) -> None: - handler = self._make_handler() - creation = AgentCreation( - agent_name="Full Agent", - agent_id="agent-123", - agent_description="A test agent", - agent_version="1.0.0", - provider="openai", - request_model="gpt-4", - server_address="api.openai.com", - server_port=443, - ) - handler.start_create_agent(creation) - handler.stop_create_agent(creation) - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - attrs = spans[0].attributes - self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "create_agent") - self.assertEqual(attrs[GenAI.GEN_AI_AGENT_NAME], "Full Agent") - self.assertEqual(attrs[GenAI.GEN_AI_AGENT_ID], "agent-123") - self.assertEqual( - attrs[GenAI.GEN_AI_AGENT_DESCRIPTION], "A test agent" - ) - self.assertEqual(attrs[GenAI.GEN_AI_AGENT_VERSION], "1.0.0") - self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "openai") - self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4") - - def test_fail_create_agent(self) -> None: - handler = self._make_handler() - creation = AgentCreation(agent_name="Bad Agent") - handler.start_create_agent(creation) - error = Error(message="creation failed", type=RuntimeError) - handler.fail_create_agent(creation, error) - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].status.description, "creation failed") - self.assertEqual(spans[0].attributes.get("error.type"), "RuntimeError") - - def test_create_agent_context_manager(self) -> None: - handler = self._make_handler() - creation = AgentCreation( - agent_name="CM Agent", - provider="openai", - ) - with handler.create_agent(creation) as cr: - cr.agent_id = "assigned-id" - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].name, "create_agent CM Agent") - - def test_create_agent_context_manager_error(self) -> None: - handler = self._make_handler() - with self.assertRaises(TypeError): - with handler.create_agent(AgentCreation(agent_name="Err")): - raise TypeError("bad type") - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - self.assertEqual(spans[0].attributes.get("error.type"), "TypeError") - - def test_create_agent_context_manager_default(self) -> None: - handler = self._make_handler() - with handler.create_agent() as cr: - cr.agent_name = "Dynamic Agent" - cr.provider = "openai" - - spans = self.span_exporter.get_finished_spans() - self.assertEqual(len(spans), 1) - - def test_stop_create_agent_without_start_is_noop(self) -> None: - handler = self._make_handler() - creation = AgentCreation(agent_name="Not Started") - result = handler.stop_create_agent(creation) - self.assertIs(result, creation) - self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) - - def test_fail_create_agent_without_start_is_noop(self) -> None: - handler = self._make_handler() - creation = AgentCreation(agent_name="Not Started") - error = Error(message="boom", type=RuntimeError) - result = handler.fail_create_agent(creation, error) - self.assertIs(result, creation) + self.assertIs(result, invocation) self.assertEqual(len(self.span_exporter.get_finished_spans()), 0) -class TestAgentTypes(TestCase): - """Unit tests for the AgentInvocation and AgentCreation dataclasses.""" +class TestAgentInvocationType(TestCase): - def test_agent_invocation_defaults(self) -> None: + def test_defaults(self) -> None: inv = AgentInvocation() self.assertEqual(inv.operation_name, "invoke_agent") self.assertIsNone(inv.agent_name) - self.assertIsNone(inv.agent_id) self.assertIsNone(inv.provider) self.assertIsNone(inv.request_model) self.assertTrue(inv.is_remote) self.assertEqual(inv.input_messages, []) self.assertEqual(inv.output_messages, []) - self.assertEqual(inv.system_instruction, []) self.assertIsNone(inv.tool_definitions) - self.assertIsNone(inv.span) - self.assertIsNone(inv.context_token) + self.assertIsNone(inv.cache_creation_input_tokens) + self.assertIsNone(inv.cache_read_input_tokens) - def test_agent_creation_defaults(self) -> None: - creation = AgentCreation() - self.assertEqual(creation.operation_name, "create_agent") - self.assertIsNone(creation.agent_name) - self.assertIsNone(creation.agent_id) - self.assertIsNone(creation.agent_description) - self.assertIsNone(creation.agent_version) - self.assertIsNone(creation.provider) - self.assertIsNone(creation.request_model) - self.assertEqual(creation.system_instruction, []) - self.assertIsNone(creation.server_address) - self.assertIsNone(creation.server_port) - self.assertIsNone(creation.span) - self.assertIsNone(creation.context_token) - - def test_agent_invocation_with_messages(self) -> None: + def test_with_messages(self) -> None: inv = AgentInvocation( agent_name="Test", input_messages=[ - InputMessage( - role="user", parts=[Text(content="Hello")] - ) + InputMessage(role="user", parts=[Text(content="Hello")]) ], output_messages=[ OutputMessage( @@ -386,19 +238,11 @@ def test_agent_invocation_with_messages(self) -> None: ], ) self.assertEqual(len(inv.input_messages), 1) - self.assertEqual(len(inv.output_messages), 1) self.assertEqual(inv.input_messages[0].role, "user") - def test_agent_invocation_custom_attributes(self) -> None: + def test_custom_attributes(self) -> None: inv = AgentInvocation( agent_name="Custom", attributes={"custom.key": "custom_value"}, ) self.assertEqual(inv.attributes["custom.key"], "custom_value") - - def test_agent_creation_custom_attributes(self) -> None: - creation = AgentCreation( - agent_name="Custom", - attributes={"custom.key": "custom_value"}, - ) - self.assertEqual(creation.attributes["custom.key"], "custom_value") From d6674d199476d3362243868e1fb5ae75d66a4bdc Mon Sep 17 00:00:00 2001 From: etserend Date: Fri, 6 Mar 2026 11:32:11 -0600 Subject: [PATCH 05/10] update span utils --- .../opentelemetry/util/genai/span_utils.py | 44 +++++++++++++------ 1 file changed, 30 insertions(+), 14 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 83d40f5aff..9236236209 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -76,6 +76,32 @@ def _get_llm_span_name(invocation: LLMInvocation) -> str: return f"{invocation.operation_name} {invocation.request_model}".strip() +def _get_system_instructions_for_span( + system_instruction: list[MessagePart] | None = None, +) -> dict[str, Any]: + """Get system instructions attribute formatted for span (JSON string format). + + Can be used with agent/llm/tool invocations. + Returns empty dict if not in experimental mode or content capturing is disabled. + """ + if ( + not is_experimental_mode() + or get_content_capturing_mode() + not in ( + ContentCapturingMode.SPAN_ONLY, + ContentCapturingMode.SPAN_AND_EVENT, + ) + or not system_instruction + ): + return {} + + return { + GenAI.GEN_AI_SYSTEM_INSTRUCTIONS: gen_ai_json_dumps( + [asdict(p) for p in system_instruction] + ) + } + + def _get_llm_messages_attributes_for_span( input_messages: list[InputMessage], output_messages: list[OutputMessage], @@ -429,20 +455,9 @@ def _apply_creation_finish_attributes( attributes: dict[str, Any] = {} attributes.update(_get_base_agent_common_attributes(creation)) - # System instructions (Opt-In) - if ( - is_experimental_mode() - and get_content_capturing_mode() - in ( - ContentCapturingMode.SPAN_ONLY, - ContentCapturingMode.SPAN_AND_EVENT, - ) - and creation.system_instruction - ): - attributes[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] = gen_ai_json_dumps( - [asdict(p) for p in creation.system_instruction] - ) - + attributes.update( + _get_system_instructions_for_span(creation.system_instruction) + ) attributes.update(creation.attributes) if attributes: @@ -461,6 +476,7 @@ def _apply_creation_finish_attributes( "_get_base_agent_span_name", "_apply_agent_finish_attributes", "_apply_creation_finish_attributes", + "_get_system_instructions_for_span", "_get_agent_common_attributes", "_get_agent_request_attributes", "_get_agent_response_attributes", From a310d6d2abd310c7ccf613ecfd7b511c4429ed15 Mon Sep 17 00:00:00 2001 From: etserend Date: Tue, 10 Mar 2026 10:55:29 -0500 Subject: [PATCH 06/10] apply _lifecycle_context and error handling --- .../src/opentelemetry/util/genai/handler.py | 205 ++++++++---------- .../opentelemetry/util/genai/span_utils.py | 24 +- .../src/opentelemetry/util/genai/types.py | 24 +- .../tests/test_handler_agent.py | 35 +-- 4 files changed, 101 insertions(+), 187 deletions(-) diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index c4a85f80af..d47bb3c975 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -60,9 +60,10 @@ from __future__ import annotations +import logging import timeit from contextlib import contextmanager -from typing import Iterator +from typing import Callable, Iterator, TypeVar from opentelemetry import context as otel_context from opentelemetry._logs import ( @@ -81,19 +82,58 @@ from opentelemetry.util.genai.metrics import InvocationMetricsRecorder from opentelemetry.util.genai.span_utils import ( _apply_agent_finish_attributes, - _apply_creation_finish_attributes, _apply_error_attributes, _apply_llm_finish_attributes, + _get_base_agent_common_attributes, + _get_base_agent_span_name, _maybe_emit_llm_event, ) from opentelemetry.util.genai.types import ( - AgentCreation, AgentInvocation, Error, + GenAIInvocation, LLMInvocation, ) from opentelemetry.util.genai.version import __version__ +_logger = logging.getLogger(__name__) + +_T = TypeVar("_T", bound=GenAIInvocation) + + +@contextmanager +def _lifecycle_context( + invocation: _T, + start: Callable[[_T], _T], + stop: Callable[[_T], _T], + fail: Callable[[_T, Error], _T], + label: str, +) -> Iterator[_T]: + """Shared lifecycle context manager for GenAI invocations. + + Wraps start/stop/fail calls with error handling so SDK-internal + errors never propagate to the caller. + """ + try: + start(invocation) + except Exception: + _logger.warning("Failed to start %s span", label, exc_info=True) + try: + yield invocation + except Exception as exc: + try: + fail(invocation, Error(message=str(exc), type=type(exc))) + except Exception: + _logger.warning( + "Failed to record %s failure", label, exc_info=True + ) + raise + else: + try: + stop(invocation) + except Exception: + _logger.warning("Failed to stop %s span", label, exc_info=True) + class TelemetryHandler: """ @@ -163,13 +203,13 @@ def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disab # TODO: Provide feedback that this invocation was not started return invocation - span = invocation.span - _apply_llm_finish_attributes(span, invocation) - self._record_llm_metrics(invocation, span) - _maybe_emit_llm_event(self._logger, span, invocation) - # Detach context and end span - otel_context.detach(invocation.context_token) - span.end() + try: + _apply_llm_finish_attributes(invocation.span, invocation) + self._record_llm_metrics(invocation, invocation.span) + _maybe_emit_llm_event(self._logger, invocation.span, invocation) + finally: + otel_context.detach(invocation.context_token) + invocation.span.end() return invocation def fail_llm( # pylint: disable=no-self-use @@ -180,15 +220,19 @@ def fail_llm( # pylint: disable=no-self-use # TODO: Provide feedback that this invocation was not started return invocation - span = invocation.span - _apply_llm_finish_attributes(invocation.span, invocation) - _apply_error_attributes(invocation.span, error) - error_type = getattr(error.type, "__qualname__", None) - self._record_llm_metrics(invocation, span, error_type=error_type) - _maybe_emit_llm_event(self._logger, span, invocation, error) - # Detach context and end span - otel_context.detach(invocation.context_token) - span.end() + try: + _apply_llm_finish_attributes(invocation.span, invocation) + _apply_error_attributes(invocation.span, error) + error_type = getattr(error.type, "__qualname__", None) + self._record_llm_metrics( + invocation, invocation.span, error_type=error_type + ) + _maybe_emit_llm_event( + self._logger, invocation.span, invocation, error + ) + finally: + otel_context.detach(invocation.context_token) + invocation.span.end() return invocation @contextmanager @@ -207,13 +251,10 @@ def llm( invocation = LLMInvocation( request_model="", ) - self.start_llm(invocation) - try: - yield invocation - except Exception as exc: - self.fail_llm(invocation, Error(message=str(exc), type=type(exc))) - raise - self.stop_llm(invocation) + with _lifecycle_context( + invocation, self.start_llm, self.stop_llm, self.fail_llm, "llm" + ) as inv: + yield inv # ---- Agent invocation lifecycle ---- @@ -222,11 +263,10 @@ def start_agent( invocation: AgentInvocation, ) -> AgentInvocation: """Start an agent invocation and create a pending span entry.""" - span_name = f"{invocation.operation_name} {invocation.agent_name}".strip() - kind = SpanKind.CLIENT if invocation.is_remote else SpanKind.INTERNAL span = self._tracer.start_span( - name=span_name, - kind=kind, + name=_get_base_agent_span_name(invocation), + kind=SpanKind.CLIENT, + attributes=_get_base_agent_common_attributes(invocation), ) invocation.monotonic_start_s = timeit.default_timer() invocation.span = span @@ -240,10 +280,11 @@ def stop_agent(self, invocation: AgentInvocation) -> AgentInvocation: # pylint: if invocation.context_token is None or invocation.span is None: return invocation - span = invocation.span - _apply_agent_finish_attributes(span, invocation) - otel_context.detach(invocation.context_token) - span.end() + try: + _apply_agent_finish_attributes(invocation.span, invocation) + finally: + otel_context.detach(invocation.context_token) + invocation.span.end() return invocation def fail_agent( # pylint: disable=no-self-use @@ -253,11 +294,12 @@ def fail_agent( # pylint: disable=no-self-use if invocation.context_token is None or invocation.span is None: return invocation - span = invocation.span - _apply_agent_finish_attributes(span, invocation) - _apply_error_attributes(span, error) - otel_context.detach(invocation.context_token) - span.end() + try: + _apply_agent_finish_attributes(invocation.span, invocation) + _apply_error_attributes(invocation.span, error) + finally: + otel_context.detach(invocation.context_token) + invocation.span.end() return invocation @contextmanager @@ -274,83 +316,14 @@ def agent( """ if invocation is None: invocation = AgentInvocation() - self.start_agent(invocation) - try: - yield invocation - except Exception as exc: - self.fail_agent( - invocation, Error(message=str(exc), type=type(exc)) - ) - raise - self.stop_agent(invocation) - - # ---- Agent creation lifecycle ---- - - def start_create_agent( - self, - creation: AgentCreation, - ) -> AgentCreation: - """Start an agent creation and create a pending span entry.""" - span_name = f"{creation.operation_name} {creation.agent_name}".strip() - span = self._tracer.start_span( - name=span_name, - kind=SpanKind.CLIENT, - ) - creation.monotonic_start_s = timeit.default_timer() - creation.span = span - creation.context_token = otel_context.attach( - set_span_in_context(span) - ) - return creation - - def stop_create_agent(self, creation: AgentCreation) -> AgentCreation: # pylint: disable=no-self-use - """Finalize an agent creation successfully and end its span.""" - if creation.context_token is None or creation.span is None: - return creation - - span = creation.span - _apply_creation_finish_attributes(span, creation) - otel_context.detach(creation.context_token) - span.end() - return creation - - def fail_create_agent( # pylint: disable=no-self-use - self, creation: AgentCreation, error: Error - ) -> AgentCreation: - """Fail an agent creation and end its span with error status.""" - if creation.context_token is None or creation.span is None: - return creation - - span = creation.span - _apply_creation_finish_attributes(span, creation) - _apply_error_attributes(span, error) - otel_context.detach(creation.context_token) - span.end() - return creation - - @contextmanager - def create_agent( - self, creation: AgentCreation | None = None - ) -> Iterator[AgentCreation]: - """Context manager for agent creation. - - Only set data attributes on the creation object, do not modify the span or context. - - Starts the span on entry. On normal exit, finalizes the creation and ends the span. - If an exception occurs inside the context, marks the span as error, ends it, and - re-raises the original exception. - """ - if creation is None: - creation = AgentCreation() - self.start_create_agent(creation) - try: - yield creation - except Exception as exc: - self.fail_create_agent( - creation, Error(message=str(exc), type=type(exc)) - ) - raise - self.stop_create_agent(creation) + with _lifecycle_context( + invocation, + self.start_agent, + self.stop_agent, + self.fail_agent, + "agent", + ) as inv: + yield inv def get_telemetry_handler( diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 9236236209..cff1bc2afb 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -32,7 +32,6 @@ from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.genai.types import ( - AgentCreation, AgentInvocation, Error, InputMessage, @@ -139,9 +138,7 @@ def _get_llm_messages_attributes_for_span( ), ( GenAI.GEN_AI_TOOL_DEFINITIONS, - gen_ai_json_dumps(tool_definitions) - if tool_definitions - else None, + gen_ai_json_dumps(tool_definitions) if tool_definitions else None, ), ) @@ -446,24 +443,6 @@ def _apply_agent_finish_attributes( span.set_attributes(attributes) -def _apply_creation_finish_attributes( - span: Span, creation: AgentCreation -) -> None: - """Apply attributes common to agent creation finish() paths.""" - span.update_name(_get_base_agent_span_name(creation)) - - attributes: dict[str, Any] = {} - attributes.update(_get_base_agent_common_attributes(creation)) - - attributes.update( - _get_system_instructions_for_span(creation.system_instruction) - ) - attributes.update(creation.attributes) - - if attributes: - span.set_attributes(attributes) - - __all__ = [ "_apply_llm_finish_attributes", "_apply_error_attributes", @@ -475,7 +454,6 @@ def _apply_creation_finish_attributes( "_get_base_agent_common_attributes", "_get_base_agent_span_name", "_apply_agent_finish_attributes", - "_apply_creation_finish_attributes", "_get_system_instructions_for_span", "_get_agent_common_attributes", "_get_agent_request_attributes", diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index 78c5fb17dc..4afd2af3e8 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -259,7 +259,7 @@ class LLMInvocation(GenAIInvocation): @dataclass class _BaseAgent(GenAIInvocation): """ - Shared base class for agent lifecycle types (AgentInvocation, AgentCreation). + Shared base class for agent lifecycle types. Contains fields common to all agent operations: identity, provider, model, system instructions, server info, and telemetry plumbing. @@ -267,7 +267,7 @@ class _BaseAgent(GenAIInvocation): Follows semconv for GenAI agent spans: https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-agent-spans.md - Do not instantiate directly — use AgentInvocation or AgentCreation. + Do not instantiate directly — use AgentInvocation. """ agent_name: str | None = None @@ -275,7 +275,6 @@ class _BaseAgent(GenAIInvocation): agent_description: str | None = None agent_version: str | None = None - operation_name: str = "" provider: str | None = None request_model: str | None = None @@ -296,21 +295,6 @@ class _BaseAgent(GenAIInvocation): monotonic_start_s: float | None = None -@dataclass -class AgentCreation(_BaseAgent): - """ - Represents agent creation/initialization (create_agent operation). - - Follows semconv for GenAI agent spans: - https://github.com/open-telemetry/semantic-conventions/blob/main/docs/gen-ai/gen-ai-agent-spans.md#create-agent-span - - When creating an AgentCreation object, only update the data attributes. - The span and context_token attributes are set by the TelemetryHandler. - """ - - operation_name: str = "create_agent" - - @dataclass class AgentInvocation(_BaseAgent): """ @@ -323,7 +307,7 @@ class AgentInvocation(_BaseAgent): The span and context_token attributes are set by the TelemetryHandler. """ - operation_name: str = "invoke_agent" + operation_name: str = GenAI.GenAiOperationNameValues.INVOKE_AGENT.value conversation_id: str | None = None data_source_id: str | None = None output_type: str | None = None @@ -353,8 +337,6 @@ class AgentInvocation(_BaseAgent): ) tool_definitions: list[dict[str, Any]] | None = None - is_remote: bool = True - metric_attributes: dict[str, Any] = field( default_factory=_new_str_any_dict ) diff --git a/util/opentelemetry-util-genai/tests/test_handler_agent.py b/util/opentelemetry-util-genai/tests/test_handler_agent.py index ec557d22dd..f011b2ac3c 100644 --- a/util/opentelemetry-util-genai/tests/test_handler_agent.py +++ b/util/opentelemetry-util-genai/tests/test_handler_agent.py @@ -38,7 +38,6 @@ def _make_handler(self) -> TelemetryHandler: class TestAgentInvocationHandler(_AgentTestBase): - def test_start_stop_creates_span(self) -> None: handler = self._make_handler() invocation = AgentInvocation( @@ -59,31 +58,18 @@ def test_start_stop_creates_span(self) -> None: self.assertEqual( span.attributes[GenAI.GEN_AI_AGENT_NAME], "Math Tutor" ) - self.assertEqual( - span.attributes[GenAI.GEN_AI_PROVIDER_NAME], "openai" - ) - self.assertEqual( - span.attributes[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4" - ) + self.assertEqual(span.attributes[GenAI.GEN_AI_PROVIDER_NAME], "openai") + self.assertEqual(span.attributes[GenAI.GEN_AI_REQUEST_MODEL], "gpt-4") def test_span_kind_client_by_default(self) -> None: handler = self._make_handler() - invocation = AgentInvocation(agent_name="Agent", is_remote=True) + invocation = AgentInvocation(agent_name="Agent") handler.start_agent(invocation) handler.stop_agent(invocation) self.assertEqual( self.span_exporter.get_finished_spans()[0].kind, SpanKind.CLIENT ) - def test_span_kind_internal_for_local(self) -> None: - handler = self._make_handler() - invocation = AgentInvocation(agent_name="Agent", is_remote=False) - handler.start_agent(invocation) - handler.stop_agent(invocation) - self.assertEqual( - self.span_exporter.get_finished_spans()[0].kind, SpanKind.INTERNAL - ) - def test_all_attributes(self) -> None: handler = self._make_handler() invocation = AgentInvocation( @@ -114,9 +100,7 @@ def test_all_attributes(self) -> None: attrs = self.span_exporter.get_finished_spans()[0].attributes self.assertEqual(attrs[GenAI.GEN_AI_AGENT_NAME], "Full Agent") self.assertEqual(attrs[GenAI.GEN_AI_AGENT_ID], "agent-123") - self.assertEqual( - attrs[GenAI.GEN_AI_AGENT_DESCRIPTION], "A test agent" - ) + self.assertEqual(attrs[GenAI.GEN_AI_AGENT_DESCRIPTION], "A test agent") self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "gpt-4-0613") self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "resp-abc") self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 100) @@ -142,9 +126,7 @@ def test_cache_token_attributes(self) -> None: self.assertEqual( attrs[GenAI.GEN_AI_USAGE_CACHE_CREATION_INPUT_TOKENS], 25 ) - self.assertEqual( - attrs[GenAI.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 50 - ) + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_CACHE_READ_INPUT_TOKENS], 50) def test_fail_sets_error_status(self) -> None: handler = self._make_handler() @@ -179,8 +161,9 @@ def test_context_manager_error(self) -> None: raise ValueError("test error") self.assertEqual( - self.span_exporter.get_finished_spans()[0] - .attributes.get("error.type"), + self.span_exporter.get_finished_spans()[0].attributes.get( + "error.type" + ), "ValueError", ) @@ -209,14 +192,12 @@ def test_fail_without_start_is_noop(self) -> None: class TestAgentInvocationType(TestCase): - def test_defaults(self) -> None: inv = AgentInvocation() self.assertEqual(inv.operation_name, "invoke_agent") self.assertIsNone(inv.agent_name) self.assertIsNone(inv.provider) self.assertIsNone(inv.request_model) - self.assertTrue(inv.is_remote) self.assertEqual(inv.input_messages, []) self.assertEqual(inv.output_messages, []) self.assertIsNone(inv.tool_definitions) From 8485542a5c0899acc3f3bba30da66219779725c0 Mon Sep 17 00:00:00 2001 From: etserend Date: Tue, 17 Mar 2026 13:21:11 -0500 Subject: [PATCH 07/10] update lifecycle context --- util/opentelemetry-util-genai/pyproject.toml | 6 +++--- .../src/opentelemetry/util/genai/handler.py | 13 ++++++------- 2 files changed, 9 insertions(+), 10 deletions(-) diff --git a/util/opentelemetry-util-genai/pyproject.toml b/util/opentelemetry-util-genai/pyproject.toml index c9d4d388c1..bac4191eee 100644 --- a/util/opentelemetry-util-genai/pyproject.toml +++ b/util/opentelemetry-util-genai/pyproject.toml @@ -26,9 +26,9 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "opentelemetry-instrumentation ~= 0.58b0", - "opentelemetry-semantic-conventions ~= 0.58b0", - "opentelemetry-api>=1.31.0", + "opentelemetry-instrumentation ~= 0.61b0", + "opentelemetry-semantic-conventions ~= 0.61b0", + "opentelemetry-api>=1.40.0", ] [project.entry-points.opentelemetry_genai_completion_hook] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index d47bb3c975..cf247aba4f 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -116,23 +116,22 @@ def _lifecycle_context( """ try: start(invocation) - except Exception: + except Exception: # pylint: disable=broad-exception-caught _logger.warning("Failed to start %s span", label, exc_info=True) try: yield invocation except Exception as exc: try: fail(invocation, Error(message=str(exc), type=type(exc))) - except Exception: + except Exception: # pylint: disable=broad-exception-caught _logger.warning( "Failed to record %s failure", label, exc_info=True ) raise - else: - try: - stop(invocation) - except Exception: - _logger.warning("Failed to stop %s span", label, exc_info=True) + try: + stop(invocation) + except Exception: # pylint: disable=broad-exception-caught + _logger.warning("Failed to stop %s span", label, exc_info=True) class TelemetryHandler: From a3a0f9e6088cbb61b7fa1a3c16b317d7476e2ab3 Mon Sep 17 00:00:00 2001 From: etserend Date: Tue, 17 Mar 2026 13:24:19 -0500 Subject: [PATCH 08/10] resolve merge conflict --- util/opentelemetry-util-genai/CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 1b0a444b95..d2b4fce24f 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add `AgentInvocation` type and agent invocation lifecycle support + ([#4274](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4274)) - Populate schema_url on metrics ([#4320](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4320)) - Add workflow invocation type to genAI utils From 4a2a5a906a456c09495efaa4e948ac52668ae168 Mon Sep 17 00:00:00 2001 From: etserend Date: Tue, 17 Mar 2026 15:07:21 -0500 Subject: [PATCH 09/10] invoke agent demo app --- .../examples/utils-demo/README.rst | 72 +++++++++++ .../examples/utils-demo/main.py | 121 ++++++++++++++++++ .../examples/utils-demo/requirements.txt | 9 ++ 3 files changed, 202 insertions(+) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst create mode 100644 instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/main.py create mode 100644 instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/requirements.txt diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst new file mode 100644 index 0000000000..012ab284a6 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst @@ -0,0 +1,72 @@ +OpenTelemetry GenAI Utils — invoke_agent Demo +================================================ + +This example shows how to combine ``VertexAIInstrumentor`` (automatic +instrumentation of Vertex AI SDK calls) with manual +``TelemetryHandler.agent()`` spans from ``opentelemetry-util-genai`` +to extend existing instrumentation with agent invocation lifecycle spans. + +The demo performs the following steps: + +1. **LLM call** — generates content via the ``google-genai`` SDK (API key). + Vertex AI SDK calls are auto-instrumented by ``VertexAIInstrumentor``. +2. **Create local agent** — creates a ``LanggraphAgent`` with a currency + exchange tool. +3. **Deploy to Agent Engine** — deploys the agent to Vertex AI Agent Engine. +4. **Query the remote agent** — sends a currency exchange query wrapped in + an ``invoke_agent`` span via ``TelemetryHandler.agent()``. +5. **Cleanup** — deletes the deployed agent. + +Prerequisites +------------- + +- A GCP project with the **Vertex AI API** enabled. +- **Application Default Credentials** (ADC) configured: + ``gcloud auth application-default login`` +- A **Google API key** for LLM calls (set as ``GOOGLE_API_KEY``). +- A **GCS staging bucket** for agent deployment: + +:: + + gsutil mb -p -l us-central1 -b on gs://-agent-staging/ + gsutil pap set enforced gs://-agent-staging/ + +Setup +----- + +An OTLP compatible endpoint should be listening for traces and logs on +http://localhost:4317. If not, update ``OTEL_EXPORTER_OTLP_ENDPOINT``. + +Set up a virtual environment: + +:: + + python3 -m venv .venv + source .venv/bin/activate + pip install -r requirements.txt + pip install -e ../../../../util/opentelemetry-util-genai/ + +Run +--- + +:: + + export GOOGLE_API_KEY="AIza..." + export GCP_PROJECT="your-project-id" + python main.py + +The deploy step takes 3-5 minutes. You should see an ``invoke_agent`` +span printed to the console and exported to your OTLP endpoint wrapping +the agent query, along with the agent's response to the currency +exchange query. + +Environment Variables +--------------------- + +- ``GOOGLE_API_KEY`` — API key for ``google-genai`` SDK LLM calls. +- ``GCP_PROJECT`` — GCP project ID (default: ``gcp-o11yinframon-nprd-81065``). +- ``GCP_LOCATION`` — GCP region (default: ``us-central1``). +- ``GCP_STAGING_BUCKET`` — GCS bucket for agent staging + (default: ``gs://-agent-staging``). +- ``OTEL_EXPORTER_OTLP_ENDPOINT`` — OTLP endpoint + (default: ``http://localhost:4317``). diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/main.py b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/main.py new file mode 100644 index 0000000000..8751b91551 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/main.py @@ -0,0 +1,121 @@ +# pylint: skip-file +""" +invoke_agent instrumentation demo. + +Combines automatic instrumentation (``VertexAIInstrumentor``) with +manual ``TelemetryHandler.agent()`` spans from ``opentelemetry-util-genai`` +to show how genai-utils extends the existing Vertex AI instrumentation +with agent invocation lifecycle spans. + +The ``generate_content`` call is made inside the ``invoke_agent`` context, +so the auto-instrumented LLM span appears as a child of the ``invoke_agent`` +parent span. + +Set environment variables before running: + + export GCP_PROJECT="your-project-id" + python main.py +""" + +import os + +import vertexai +from vertexai.generative_models import GenerativeModel + +# NOTE: OpenTelemetry Python Logs and Events APIs are in beta +from opentelemetry import _logs, metrics, trace +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( + OTLPLogExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( + OTLPSpanExporter, +) +from opentelemetry.instrumentation.vertexai import VertexAIInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.resources import Resource +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import ( + BatchSpanProcessor, + ConsoleSpanExporter, +) +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.types import AgentInvocation + +OTLP_ENDPOINT = os.environ.get( + "OTEL_EXPORTER_OTLP_ENDPOINT", "http://localhost:4317" +) +GCP_PROJECT = os.environ.get("GCP_PROJECT", "gcp-o11yinframon-nprd-81065") +GCP_LOCATION = os.environ.get("GCP_LOCATION", "us-central1") +MODEL = "gemini-2.5-flash" +AGENT_NAME = "Currency Exchange Agent" + +resource = Resource.create({"service.name": "invoke-agent-demo"}) + +# configure tracing +tracer_provider = TracerProvider(resource=resource) +tracer_provider.add_span_processor(BatchSpanProcessor(ConsoleSpanExporter())) +tracer_provider.add_span_processor( + BatchSpanProcessor(OTLPSpanExporter(endpoint=OTLP_ENDPOINT, insecure=True)) +) +trace.set_tracer_provider(tracer_provider) + +# configure metrics +metric_reader = PeriodicExportingMetricReader( + OTLPMetricExporter(endpoint=OTLP_ENDPOINT, insecure=True) +) +meter_provider = MeterProvider( + resource=resource, metric_readers=[metric_reader] +) +metrics.set_meter_provider(meter_provider) + +# configure logging and events +logger_provider = LoggerProvider(resource=resource) +logger_provider.add_log_record_processor( + BatchLogRecordProcessor( + OTLPLogExporter(endpoint=OTLP_ENDPOINT, insecure=True) + ) +) +_logs.set_logger_provider(logger_provider) + +# Auto-instrument Vertex AI SDK calls (generate_content, etc.) +VertexAIInstrumentor().instrument() + + +def main(): + vertexai.init(project=GCP_PROJECT, location=GCP_LOCATION) + model = GenerativeModel(MODEL) + handler = TelemetryHandler() + + # ----- invoke_agent span wrapping an LLM call ----- + # The generate_content call is auto-instrumented by VertexAIInstrumentor + # and appears as a child span under the invoke_agent parent span. + print("[invoke_agent] Starting agent invocation...") + with handler.agent( + AgentInvocation( + agent_name=AGENT_NAME, + provider="gcp_vertex_ai", + request_model=MODEL, + agent_description="Currency exchange agent demo", + server_address=f"{GCP_LOCATION}-aiplatform.googleapis.com", + ) + ) as invocation: + response = model.generate_content( + "What is the exchange rate from US dollars to SEK today?" + ) + # Populate response attributes on the invocation + usage = response.usage_metadata + invocation.input_tokens = usage.prompt_token_count + invocation.output_tokens = usage.candidates_token_count + invocation.finish_reasons = ["stop"] + + print(f"[invoke_agent] Response:\n{response.text}\n") + + +if __name__ == "__main__": + main() diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/requirements.txt b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/requirements.txt new file mode 100644 index 0000000000..724f8c719b --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/requirements.txt @@ -0,0 +1,9 @@ +google-cloud-aiplatform[agent_engines,langchain]>=1.64 +google-genai>=1.0 +langchain-google-vertexai +requests + +opentelemetry-sdk>=1.40 +opentelemetry-exporter-otlp-proto-grpc>=1.40 +opentelemetry-instrumentation-vertexai>=2.0b0 +opentelemetry-util-genai>=0.4b0.dev0 From 1e56418a2fc7acd36c9eccff4dd0c10f027a7314 Mon Sep 17 00:00:00 2001 From: etserend Date: Tue, 17 Mar 2026 15:39:45 -0500 Subject: [PATCH 10/10] update readme --- .../examples/utils-demo/README.rst | 59 +++++++++++-------- 1 file changed, 35 insertions(+), 24 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst index 012ab284a6..9ee1b5fd17 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst +++ b/instrumentation-genai/opentelemetry-instrumentation-vertexai/examples/utils-demo/README.rst @@ -6,16 +6,40 @@ instrumentation of Vertex AI SDK calls) with manual ``TelemetryHandler.agent()`` spans from ``opentelemetry-util-genai`` to extend existing instrumentation with agent invocation lifecycle spans. -The demo performs the following steps: +The ``generate_content`` call is made inside the ``invoke_agent`` context, +so the auto-instrumented LLM span appears as a child of the ``invoke_agent`` +parent span — all within the same trace. -1. **LLM call** — generates content via the ``google-genai`` SDK (API key). - Vertex AI SDK calls are auto-instrumented by ``VertexAIInstrumentor``. -2. **Create local agent** — creates a ``LanggraphAgent`` with a currency - exchange tool. -3. **Deploy to Agent Engine** — deploys the agent to Vertex AI Agent Engine. -4. **Query the remote agent** — sends a currency exchange query wrapped in - an ``invoke_agent`` span via ``TelemetryHandler.agent()``. -5. **Cleanup** — deletes the deployed agent. +Sample Trace +------------ + +:: + + Trace ID: 0xe71e16deb2ecd162e3f4fc67c240818b + | + +-- invoke_agent Currency Exchange Agent [4.16s, root span] + | gen_ai.operation.name: invoke_agent + | gen_ai.agent.name: Currency Exchange Agent + | gen_ai.agent.description: Currency exchange agent demo + | gen_ai.provider.name: gcp_vertex_ai + | gen_ai.request.model: gemini-2.5-flash + | gen_ai.response.finish_reasons: ["stop"] + | gen_ai.usage.input_tokens: 12 + | gen_ai.usage.output_tokens: 87 + | server.address: us-central1-aiplatform.googleapis.com + | scope: opentelemetry.util.genai.handler + | + +-- chat gemini-2.5-flash [4.12s, child span] + gen_ai.operation.name: chat + gen_ai.system: vertex_ai + gen_ai.request.model: gemini-2.5-flash + gen_ai.response.model: gemini-2.5-flash + gen_ai.response.finish_reasons: ["stop"] + gen_ai.usage.input_tokens: 12 + gen_ai.usage.output_tokens: 87 + server.address: us-central1-aiplatform.googleapis.com + server.port: 443 + scope: opentelemetry.instrumentation.vertexai Prerequisites ------------- @@ -23,13 +47,6 @@ Prerequisites - A GCP project with the **Vertex AI API** enabled. - **Application Default Credentials** (ADC) configured: ``gcloud auth application-default login`` -- A **Google API key** for LLM calls (set as ``GOOGLE_API_KEY``). -- A **GCS staging bucket** for agent deployment: - -:: - - gsutil mb -p -l us-central1 -b on gs://-agent-staging/ - gsutil pap set enforced gs://-agent-staging/ Setup ----- @@ -51,22 +68,16 @@ Run :: - export GOOGLE_API_KEY="AIza..." export GCP_PROJECT="your-project-id" python main.py -The deploy step takes 3-5 minutes. You should see an ``invoke_agent`` -span printed to the console and exported to your OTLP endpoint wrapping -the agent query, along with the agent's response to the currency -exchange query. +You should see an ``invoke_agent`` span wrapping a ``chat`` child span, +both printed to the console and exported to your OTLP endpoint. Environment Variables --------------------- -- ``GOOGLE_API_KEY`` — API key for ``google-genai`` SDK LLM calls. - ``GCP_PROJECT`` — GCP project ID (default: ``gcp-o11yinframon-nprd-81065``). - ``GCP_LOCATION`` — GCP region (default: ``us-central1``). -- ``GCP_STAGING_BUCKET`` — GCS bucket for agent staging - (default: ``gs://-agent-staging``). - ``OTEL_EXPORTER_OTLP_ENDPOINT`` — OTLP endpoint (default: ``http://localhost:4317``).