diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md index 90f8c11e01..37715894e7 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/CHANGELOG.md @@ -6,6 +6,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). ## Unreleased +- Handle `MCPListToolsSpanData` spans so they produce `mcp.list_tools` operations + with server and tool attributes instead of showing as `unknown`. + ([#4197](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4197)) - Align AgentSpanData test stubs and span processor with real OpenAI Agents SDK; remove non-existent `operation`, `description`, `agent_id`, and `model` fields. ([#4229](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4229)) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py index 74be663701..4ae6e68c35 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/src/opentelemetry/instrumentation/openai_agents/span_processor.py @@ -34,6 +34,7 @@ GenerationSpanData, GuardrailSpanData, HandoffSpanData, + MCPListToolsSpanData, ResponseSpanData, SpeechSpanData, TranscriptionSpanData, @@ -48,6 +49,7 @@ GenerationSpanData = getattr(tracing_module, "GenerationSpanData", Any) # type: ignore[assignment] GuardrailSpanData = getattr(tracing_module, "GuardrailSpanData", Any) # type: ignore[assignment] HandoffSpanData = getattr(tracing_module, "HandoffSpanData", Any) # type: ignore[assignment] + MCPListToolsSpanData = getattr(tracing_module, "MCPListToolsSpanData", Any) # type: ignore[assignment] ResponseSpanData = getattr(tracing_module, "ResponseSpanData", Any) # type: ignore[assignment] SpeechSpanData = getattr(tracing_module, "SpeechSpanData", Any) # type: ignore[assignment] TranscriptionSpanData = getattr( @@ -120,6 +122,7 @@ class GenAIOperationName: SPEECH = "speech_generation" GUARDRAIL = "guardrail_check" HANDOFF = "agent_handoff" + MCP_LIST_TOOLS = "mcp.list_tools" RESPONSE = "response" # internal aggregator in current processor CLASS_FALLBACK = { @@ -239,6 +242,9 @@ def _attr(name: str, fallback: str) -> str: GEN_AI_GUARDRAIL_TRIGGERED = "gen_ai.guardrail.triggered" GEN_AI_HANDOFF_FROM_AGENT = "gen_ai.handoff.from_agent" GEN_AI_HANDOFF_TO_AGENT = "gen_ai.handoff.to_agent" +MCP_SERVER_NAME = "mcp.server.name" +MCP_TOOLS_COUNT = "mcp.tools.count" +MCP_TOOLS_LIST = "mcp.tools.list" GEN_AI_EMBEDDINGS_DIMENSION_COUNT = "gen_ai.embeddings.dimension.count" GEN_AI_TOKEN_TYPE = _attr("GEN_AI_TOKEN_TYPE", "gen_ai.token.type") @@ -395,6 +401,7 @@ def get_span_name( model: Optional[str] = None, agent_name: Optional[str] = None, tool_name: Optional[str] = None, + mcp_server_name: Optional[str] = None, ) -> str: """Generate spec-compliant span name based on operation type.""" base_name = operation_name @@ -420,6 +427,13 @@ def get_span_name( if operation_name == GenAIOperationName.HANDOFF: return f"{base_name} {agent_name}" if agent_name else base_name + if operation_name == GenAIOperationName.MCP_LIST_TOOLS: + return ( + f"{base_name} {mcp_server_name}" + if mcp_server_name + else base_name + ) + return base_name @@ -1164,6 +1178,8 @@ def _infer_output_type(self, span_data: Any) -> str: return GenAIOutputType.TEXT if _is_instance_of(span_data, HandoffSpanData): return GenAIOutputType.TEXT + if _is_instance_of(span_data, MCPListToolsSpanData): + return GenAIOutputType.JSON # Check for embeddings operation if _is_instance_of(span_data, GenerationSpanData): @@ -1277,9 +1293,10 @@ def _get_span_kind(self, span_data: Any) -> SpanKind: ResponseSpanData, TranscriptionSpanData, SpeechSpanData, + MCPListToolsSpanData, ), ): - return SpanKind.CLIENT # API calls to model providers + return SpanKind.CLIENT # API calls to model providers / MCP servers if _is_instance_of(span_data, AgentSpanData): return SpanKind.CLIENT if _is_instance_of(span_data, (GuardrailSpanData, HandoffSpanData)): @@ -1364,8 +1381,18 @@ def on_span_start(self, span: Span[Any]) -> None: else None ) + # For MCP list tools spans, use server name in span name + mcp_server_name = ( + getattr(span.span_data, "server", None) + if _is_instance_of(span.span_data, MCPListToolsSpanData) + else None + ) + # Generate spec-compliant span name - span_name = get_span_name(operation_name, model, agent_name, tool_name) + span_name = get_span_name( + operation_name, model, agent_name, tool_name, + mcp_server_name=mcp_server_name, + ) attributes = { GEN_AI_PROVIDER_NAME: self.system_name, @@ -1544,6 +1571,8 @@ def _get_operation_name(self, span_data: Any) -> str: return GenAIOperationName.GUARDRAIL if _is_instance_of(span_data, HandoffSpanData): return GenAIOperationName.HANDOFF + if _is_instance_of(span_data, MCPListToolsSpanData): + return GenAIOperationName.MCP_LIST_TOOLS return "unknown" def _extract_genai_attributes( @@ -1604,6 +1633,10 @@ def _extract_genai_attributes( yield from self._get_attributes_from_guardrail_span_data(span_data) elif _is_instance_of(span_data, HandoffSpanData): yield from self._get_attributes_from_handoff_span_data(span_data) + elif _is_instance_of(span_data, MCPListToolsSpanData): + yield from self._get_attributes_from_mcp_list_tools_span_data( + span_data + ) def _get_attributes_from_generation_span_data( self, span_data: GenerationSpanData, payload: ContentPayload @@ -2169,6 +2202,28 @@ def _get_attributes_from_handoff_span_data( normalize_output_type(self._infer_output_type(span_data)), ) + def _get_attributes_from_mcp_list_tools_span_data( + self, span_data: MCPListToolsSpanData + ) -> Iterator[tuple[str, AttributeValue]]: + """Extract attributes from MCP list tools span.""" + yield GEN_AI_OPERATION_NAME, GenAIOperationName.MCP_LIST_TOOLS + + if span_data.server: + yield MCP_SERVER_NAME, span_data.server + + if span_data.result is not None: + yield MCP_TOOLS_COUNT, len(span_data.result) + if ( + self.include_sensitive_data + and self._content_mode.capture_in_span + ): + yield MCP_TOOLS_LIST, gen_ai_json_dumps(span_data.result) + + yield ( + GEN_AI_OUTPUT_TYPE, + normalize_output_type(self._infer_output_type(span_data)), + ) + def _cleanup_spans_for_trace(self, trace_id: str) -> None: """Clean up spans for a trace to prevent memory leaks.""" spans_to_remove = [ diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py index 509fd537b3..50fe930553 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/stubs/agents/tracing/__init__.py @@ -15,6 +15,7 @@ SPAN_TYPE_FUNCTION = "function" SPAN_TYPE_GENERATION = "generation" SPAN_TYPE_RESPONSE = "response" +SPAN_TYPE_MCP_TOOLS = "mcp_tools" __all__ = [ "TraceProvider", @@ -25,10 +26,12 @@ "generation_span", "function_span", "response_span", + "mcp_list_tools_span", "AgentSpanData", "GenerationSpanData", "FunctionSpanData", "ResponseSpanData", + "MCPListToolsSpanData", ] @@ -77,6 +80,16 @@ def type(self) -> str: return SPAN_TYPE_RESPONSE +@dataclass +class MCPListToolsSpanData: + server: str | None = None + result: list[str] | None = None + + @property + def type(self) -> str: + return SPAN_TYPE_MCP_TOOLS + + class _ProcessorFanout(TracingProcessor): def __init__(self) -> None: self._processors: list[TracingProcessor] = [] @@ -235,3 +248,14 @@ def response_span(**kwargs: Any): yield span finally: span.finish() + + +@contextmanager +def mcp_list_tools_span(**kwargs: Any): + data = MCPListToolsSpanData(**kwargs) + span = _PROVIDER.create_span(data, parent=_CURRENT_TRACE) + span.start() + try: + yield span + finally: + span.finish() diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py index 5c62fd492e..4fd0819576 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2/tests/test_tracer.py @@ -205,6 +205,37 @@ def test_agent_invoke_span_records_attributes(): exporter.clear() +def test_mcp_list_tools_span_records_attributes(): + instrumentor, exporter = _instrument_with_provider() + + try: + from agents.tracing import mcp_list_tools_span + + with trace("workflow"): + with mcp_list_tools_span( + server="Time", + result=["get_current_time", "convert_timezone"], + ): + pass + + spans = exporter.get_finished_spans() + mcp_span = next( + span + for span in spans + if span.attributes.get(GenAI.GEN_AI_OPERATION_NAME) + == "mcp.list_tools" + ) + + assert mcp_span.kind is SpanKind.CLIENT + assert mcp_span.name == "mcp.list_tools Time" + assert mcp_span.attributes[GEN_AI_PROVIDER_NAME] == "openai" + assert mcp_span.attributes["mcp.server.name"] == "Time" + assert mcp_span.attributes["mcp.tools.count"] == 2 + finally: + instrumentor.uninstrument() + exporter.clear() + + def _placeholder_message() -> dict[str, Any]: return { "role": "user",