diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added new file mode 100644 index 00000000..d78e078f --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added @@ -0,0 +1 @@ +Added tool spans and captured tool definitions on inference spans. diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py new file mode 100644 index 00000000..74d5001b --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py @@ -0,0 +1,117 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +""" +Tool-calling example without agents, built with LangChain. + +Uses ChatOpenAI with bind_tools to let the model call calculator tools directly, +then manually dispatches tool calls and feeds results back to the model. +OpenTelemetry LangChain instrumentation traces the LLM calls. +""" + +from __future__ import annotations + +import json + +from langchain_core.messages import HumanMessage, ToolMessage +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI + +from opentelemetry import _logs, metrics, trace +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( + OTLPLogExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( + OTLPSpanExporter, +) +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +# Configure tracing +trace.set_tracer_provider(TracerProvider()) +span_processor = BatchSpanProcessor(OTLPSpanExporter()) +trace.get_tracer_provider().add_span_processor(span_processor) + +# Configure logging +_logs.set_logger_provider(LoggerProvider()) +_logs.get_logger_provider().add_log_record_processor( + BatchLogRecordProcessor(OTLPLogExporter()) +) + +# Configure metrics +metrics.set_meter_provider( + MeterProvider( + metric_readers=[ + PeriodicExportingMetricReader( + OTLPMetricExporter(), + ), + ] + ) +) + + +@tool +def multiply(a: float, b: float) -> float: + """Multiply two numbers together.""" + return a * b + + +@tool +def add(a: float, b: float) -> float: + """Add two numbers together.""" + return a + b + + +TOOLS = [multiply, add] +TOOLS_BY_NAME = {t.name: t for t in TOOLS} + + +def main() -> None: + instrumentor = LangChainInstrumentor() + instrumentor.instrument() + + llm = ChatOpenAI( + model="gpt-3.5-turbo", + temperature=0.1, + max_tokens=100, + top_p=0.9, + seed=100, + ) + llm_with_tools = llm.bind_tools(TOOLS) + + messages = [HumanMessage(content="What is (3 * 4) + 7?")] + + # First LLM call — model may request tool calls + response = llm_with_tools.invoke(messages) + messages.append(response) + + # Dispatch tool calls until the model stops requesting them + while response.tool_calls: + for tool_call in response.tool_calls: + selected_tool = TOOLS_BY_NAME[tool_call["name"]] + tool_output = selected_tool.invoke(tool_call["args"]) + messages.append( + ToolMessage( + content=json.dumps(tool_output), + tool_call_id=tool_call["id"], + ) + ) + + response = llm_with_tools.invoke(messages) + messages.append(response) + + print("Final answer:", response.content) + + instrumentor.uninstrument() + + +if __name__ == "__main__": + main() diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/requirements.txt b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/requirements.txt new file mode 100644 index 00000000..7530f3e5 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/requirements.txt @@ -0,0 +1,5 @@ +langchain==0.3.21 +langchain_openai +langgraph +opentelemetry-sdk>=1.31.0 +opentelemetry-exporter-otlp-proto-grpc>=1.31.0 \ No newline at end of file diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 8779b3d4..84aa6b78 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -3,6 +3,7 @@ from __future__ import annotations +import json from typing import Any, Optional, cast from uuid import UUID @@ -21,11 +22,13 @@ from opentelemetry.instrumentation.genai.langchain.utils import ( make_input_message, make_last_output_message, + prepare_tool_definitions, ) from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.invocation import ( AgentInvocation, InferenceInvocation, + ToolInvocation, WorkflowInvocation, ) from opentelemetry.util.genai.types import ( @@ -33,6 +36,7 @@ MessagePart, OutputMessage, Text, + ToolCallRequest, ) @@ -286,6 +290,11 @@ def on_chat_model_start( llm_invocation.seed = seed llm_invocation.temperature = temperature llm_invocation.max_tokens = max_tokens + if params is not None: + tools = params.get("tools") or params.get("functions") + if tools: + tool_definitions = prepare_tool_definitions(tools) + llm_invocation.tool_definitions = tool_definitions self._invocation_manager.add_invocation_state( run_id=run_id, parent_run_id=parent_run_id, @@ -333,19 +342,33 @@ def on_llm_end( ) ) - # Get message content - parts = [ - Text( - content=chat_generation.message.content, - type="text", + if finish_reason == "tool_calls": + tool_calls: list[ToolCallRequest] = [] + for tool_call in chat_generation.message.tool_calls: + tool_call_request = ToolCallRequest( + name=tool_call["name"], + id=tool_call["id"], + arguments=tool_call["args"], + ) + tool_calls.append(tool_call_request) + output_message = OutputMessage( + role=chat_generation.message.type, + parts=cast(list[MessagePart], tool_calls), + finish_reason=finish_reason, + ) + else: + parts = [ + Text( + content=chat_generation.message.content, + type="text", + ) + ] + role = chat_generation.message.type + output_message = OutputMessage( + role=role, + parts=cast(list[MessagePart], parts), + finish_reason=finish_reason, ) - ] - role = chat_generation.message.type - output_message = OutputMessage( - role=role, - parts=cast(list[MessagePart], parts), - finish_reason=finish_reason, - ) output_messages.append(output_message) # Get token usage if available @@ -402,6 +425,72 @@ def on_llm_error( if not llm_invocation.span.is_recording(): self._invocation_manager.delete_invocation_state(run_id=run_id) + def on_tool_start( + self, + serialized: Optional[dict[str, Any]], + input_str: str, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[list[str]] = None, + metadata: Optional[dict[str, Any]] = None, + inputs: Optional[dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + name = "unknown" + description = None + if serialized is not None: + name = serialized.get("name") or "unknown" + description = serialized.get("description") + + raw_arguments: Any = inputs if inputs is not None else input_str + arguments: str | None + if isinstance(raw_arguments, dict): + arguments = json.dumps(raw_arguments) + elif isinstance(raw_arguments, str): + arguments = raw_arguments + else: + arguments = None + tool_invocation = self._telemetry_handler.tool( + name=name, tool_description=description, tool_type="function" + ) + tool_invocation.arguments = arguments + self._invocation_manager.add_invocation_state( + run_id, parent_run_id, tool_invocation + ) + + def on_tool_end( + self, + output: Any, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **_kwargs: Any, + ) -> None: + tool_invocation = self._invocation_manager.get_invocation(run_id) + if not isinstance(tool_invocation, ToolInvocation): + return + tool_invocation.tool_call_id = getattr(output, "tool_call_id", None) + tool_invocation.tool_result = getattr(output, "content", None) + tool_invocation.stop() + if not tool_invocation.span.is_recording(): + self._invocation_manager.delete_invocation_state(run_id=run_id) + + def on_tool_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **_: Any, + ) -> None: + tool_invocation = self._invocation_manager.get_invocation(run_id) + if not isinstance(tool_invocation, ToolInvocation): + return + tool_invocation.fail(error) + if not tool_invocation.span.is_recording(): + self._invocation_manager.delete_invocation_state(run_id=run_id) + def _find_nearest_agent( self, run_id: Optional[UUID] ) -> Optional[AgentInvocation]: diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py index 1636a1ec..45fba99b 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py @@ -7,12 +7,45 @@ from langchain_core.messages import AIMessage from opentelemetry.util.genai.types import ( + FunctionToolDefinition, InputMessage, OutputMessage, Text, + ToolDefinition, ) +def _get_property_value(obj: Any, property_name: str) -> Any: + if isinstance(obj, dict): + return cast(dict[str, Any], obj).get(property_name) + + return getattr(obj, property_name, None) + + +def prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: + if not tools: + return None + + definitions: list[ToolDefinition] = [] + for tool in tools: + tool_type = _get_property_value(tool, "type") + if tool_type == "function": + func = _get_property_value(tool, "function") + if func: + func_name = _get_property_value(func, "name") + func_description = _get_property_value(func, "description") + definitions.append( + FunctionToolDefinition( + name=str(func_name) if func_name is not None else "", + description=str(func_description) + if func_description is not None + else None, + parameters=_get_property_value(func, "parameters"), + ) + ) + return definitions or None + + def make_input_message(data: Any) -> list[InputMessage]: """Create structured input message with full data as JSON.""" if not isinstance(data, dict): diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml new file mode 100644 index 00000000..91244a0d --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml @@ -0,0 +1,368 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "system", + "content": "You're a helpful assistant." + }, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?" + } + ], + "model": "gpt-4o-mini", + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA" + } + }, + "required": [ + "location" + ], + "additionalProperties": false + } + } + } + ] + } + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.37.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.37.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.11 + authorization: + - Bearer test_openai_api_key + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-toolcall001first", + "object": "chat.completion", + "created": 1771535300, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "refusal": null, + "tool_calls": [ + { + "id": "call_weather_seattle", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"Seattle, WA\"}" + } + }, + { + "id": "call_weather_sf", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, CA\"}" + } + } + ] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 75, + "completion_tokens": 51, + "total_tokens": 126, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 10:00:00 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '750' + openai-organization: test_openai_org_id + openai-processing-ms: + - '250' + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199900' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 5ms + x-request-id: + - req_toolcall001first + status: + code: 200 + message: OK +- request: + body: |- + { + "messages": [ + { + "role": "system", + "content": "You're a helpful assistant." + }, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?" + }, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_weather_seattle", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"Seattle, WA\"}" + } + }, + { + "id": "call_weather_sf", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, CA\"}" + } + } + ] + }, + { + "role": "tool", + "content": "50 degrees and raining", + "tool_call_id": "call_weather_seattle" + }, + { + "role": "tool", + "content": "70 degrees and sunny", + "tool_call_id": "call_weather_sf" + } + ], + "model": "gpt-4o-mini", + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA" + } + }, + "required": [ + "location" + ], + "additionalProperties": false + } + } + } + ] + } + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.37.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.37.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.11 + authorization: + - Bearer test_openai_api_key + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-toolcall001final", + "object": "chat.completion", + "created": 1771535301, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Today, Seattle is experiencing 50 degrees and raining, while San Francisco has a pleasant 70 degrees and sunny weather.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 99, + "completion_tokens": 24, + "total_tokens": 123, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 10:00:01 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '600' + openai-organization: test_openai_org_id + openai-processing-ms: + - '180' + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9998' + x-ratelimit-remaining-tokens: + - '199800' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 5ms + x-request-id: + - req_toolcall001final + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py index 0fba6ca4..79708e71 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py @@ -45,11 +45,17 @@ class AgentScenario(Scenario): "gen_ai.client.token.usage", ) # langchain can't populate server.address on chat spans. + # invoke_agent provider is unknown at span creation; ls_provider is only + # available on the chat model callback, not the chain callback. expected_violations = ( ExpectedViolation( advice_id="genai_expected_attribute_missing", message_substring="server.address", ), + ExpectedViolation( + advice_id="required_attribute_not_present", + message_substring="gen_ai.provider.name", + ), ) def run( diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py new file mode 100644 index 00000000..aa5c8e07 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py @@ -0,0 +1,169 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Conformance scenario: langchain chat with tool calling via ChatOpenAI.""" + +from __future__ import annotations + +import json +import os +from typing import Any +from unittest import mock + +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from langchain_openai import ChatOpenAI + +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.test.weaver_live_check import LiveCheckReport +from opentelemetry.test_util_genai.conformance import ( + ExpectedViolation, + Scenario, +) +from opentelemetry.test_util_genai.instrumentor import instrument +from opentelemetry.util.genai.handler import TelemetryHandler + +DEFAULT_MODEL = "gpt-4o-mini" +WEATHER_TOOL_PROMPT = [ + SystemMessage(content="You're a helpful assistant."), + HumanMessage( + content="What's the weather in Seattle and San Francisco today?" + ), +] +# Tool outputs are pinned to the recorded cassette's second request body. +WEATHER_BY_LOCATION: dict[str, str] = { + "Seattle, WA": "50 degrees and raining", + "San Francisco, CA": "70 degrees and sunny", +} + + +def _get_current_weather_tool_definition() -> dict[str, Any]: + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + }, + } + + +def _execute_weather_tool(arguments: str) -> str: + location = json.loads(arguments)["location"] + return WEATHER_BY_LOCATION[location] + + +class ToolCallingScenario(Scenario): + expected_spans = ("chat", "execute_tool") + expected_metrics = ( + "gen_ai.client.operation.duration", + "gen_ai.client.token.usage", + ) + # langchain can't populate server.address on chat spans. + # execute_tool spans are provider-agnostic; gen_ai.provider.name is not available. + expected_violations = ( + ExpectedViolation( + advice_id="genai_expected_attribute_missing", + message_substring="server.address", + ), + ExpectedViolation( + advice_id="required_attribute_not_present", + message_substring="gen_ai.provider.name", + ), + ) + + def run( + self, + *, + tracer_provider: TracerProvider, + meter_provider: MeterProvider, + logger_provider: LoggerProvider, + vcr: Any, + ) -> None: + key_override = ( + {} + if os.getenv("OPENAI_API_KEY") + else {"OPENAI_API_KEY": "test_openai_api_key"} + ) + tool_handler = TelemetryHandler( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + with mock.patch.dict(os.environ, key_override): + with instrument( + LangChainInstrumentor(), + tracer_provider=tracer_provider, + logger_provider=logger_provider, + meter_provider=meter_provider, + semconv="gen_ai_latest_experimental", + content_capture="SPAN_ONLY", + ): + llm = ChatOpenAI( + model=DEFAULT_MODEL, + tool_choice="auto", + ) + llm_with_tools = llm.bind_tools( + [_get_current_weather_tool_definition()] + ) + + messages: list[Any] = list(WEATHER_TOOL_PROMPT) + + with vcr.use_cassette("tool_calling_conformance.yaml"): + first_response: AIMessage = llm_with_tools.invoke(messages) + messages.append(first_response) + + for tool_call in first_response.tool_calls: + with tool_handler.tool( + tool_call["name"], + tool_call_id=tool_call["id"], + tool_type="function", + ) as invocation: + result = _execute_weather_tool( + json.dumps(tool_call["args"]) + ) + invocation.tool_result = result + messages.append( + ToolMessage( + content=result, + tool_call_id=tool_call["id"], + ) + ) + + llm_with_tools.invoke(messages) + + def validate(self, report: LiveCheckReport) -> None: + super().validate(report) + operations = [ + attr["value"] + for entry in report["samples"] + if "span" in entry + for attr in entry["span"]["attributes"] + if attr["name"] == "gen_ai.operation.name" + ] + assert operations == [ + "chat", + "execute_tool", + "execute_tool", + "chat", + ], ( + "Tool calling exercises two chat completions with two execute_tool " + "spans in between (one per tool call); saw spans {operations}" + ) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py index 3bdf216b..d48fe98e 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py @@ -22,6 +22,7 @@ from .conformance.agent import AgentScenario from .conformance.inference import InferenceScenario +from .conformance.tool_calling import ToolCallingScenario from .conformance.workflow import WorkflowScenario @@ -30,6 +31,7 @@ [ InferenceScenario(), AgentScenario(), + ToolCallingScenario(), WorkflowScenario(), ], ids=lambda s: type(s).__name__, diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py new file mode 100644 index 00000000..db78a083 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py @@ -0,0 +1,940 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for tool-related support in the LangChain callback handler.""" + +from __future__ import annotations + +import json +from typing import Any +from unittest.mock import MagicMock +from uuid import uuid4 + +import pytest +from langchain_core.messages import AIMessage, HumanMessage +from langchain_core.outputs import ChatGeneration, LLMResult +from langchain_core.tools import tool + +from opentelemetry.instrumentation._semconv import ( + _OpenTelemetrySemanticConventionStability, +) +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor +from opentelemetry.instrumentation.genai.langchain.callback_handler import ( + OpenTelemetryLangChainCallbackHandler, +) +from opentelemetry.instrumentation.genai.langchain.utils import ( + _get_property_value, + prepare_tool_definitions, +) +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import ( + InMemoryLogRecordExporter, + SimpleLogRecordProcessor, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.semconv.attributes import error_attributes +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.types import FunctionToolDefinition + + +@pytest.fixture(autouse=True) +def reset_semconv_stability(): + """Reset the semconv stability cache before and after each test.""" + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + yield + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + + +def _enable_experimental_mode(): + """Call after setting OTEL_SEMCONV_STABILITY_OPT_IN env var to activate it.""" + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + _OpenTelemetrySemanticConventionStability._initialize() + + +# --------------------------------------------------------------------------- +# Unit tests for _get_property_value +# --------------------------------------------------------------------------- + + +def test_get_property_value_from_dict(): + assert _get_property_value({"name": "my_tool"}, "name") == "my_tool" + + +def test_get_property_value_from_dict_missing_key(): + assert _get_property_value({}, "name") is None + + +def test_get_property_value_from_object(): + obj = MagicMock() + obj.name = "obj_tool" + assert _get_property_value(obj, "name") == "obj_tool" + + +def test_get_property_value_from_object_missing_attr(): + class Plain: + pass + + assert _get_property_value(Plain(), "missing") is None + + +# --------------------------------------------------------------------------- +# Unit tests for prepare_tool_definitions +# --------------------------------------------------------------------------- + + +def test_prepare_tool_definitions_returns_none_for_empty(): + assert prepare_tool_definitions([]) is None + + +def test_prepare_tool_definitions_dict_tools(): + tools = [ + { + "type": "function", + "function": { + "name": "multiply", + "description": "Multiply two numbers", + "parameters": { + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + }, + }, + }, + } + ] + result = prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 1 + defn = result[0] + assert isinstance(defn, FunctionToolDefinition) + assert defn.name == "multiply" + assert defn.description == "Multiply two numbers" + assert defn.parameters is not None + + +def test_prepare_tool_definitions_skips_non_function_type(): + tools = [{"type": "retrieval", "retrieval": {}}] + result = prepare_tool_definitions(tools) + assert result is None + + +def test_prepare_tool_definitions_multiple_tools(): + tools = [ + { + "type": "function", + "function": {"name": "add", "description": "Add numbers"}, + }, + { + "type": "function", + "function": { + "name": "subtract", + "description": "Subtract numbers", + }, + }, + ] + result = prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 2 + assert result[0].name == "add" + assert result[1].name == "subtract" + + +def test_prepare_tool_definitions_missing_name_defaults_to_empty_string(): + tools = [ + { + "type": "function", + "function": {"description": "No name tool"}, + } + ] + result = prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 1 + assert result[0].name == "" + + +def test_prepare_tool_definitions_none_description_stays_none(): + tools = [ + { + "type": "function", + "function": {"name": "no_desc"}, + } + ] + result = prepare_tool_definitions(tools) + assert result is not None + assert result[0].description is None + + +def test_prepare_tool_definitions_object_tools(): + """Tools may be objects (e.g. pydantic models) rather than dicts.""" + + class FuncDef: + name = "get_weather" + description = "Get current weather" + parameters = {"type": "object"} + + class ToolDef: + type = "function" + function = FuncDef() + + result = prepare_tool_definitions([ToolDef()]) + assert result is not None + assert len(result) == 1 + assert result[0].name == "get_weather" + assert result[0].description == "Get current weather" + + +# --------------------------------------------------------------------------- +# Helpers shared by callback-handler integration tests +# --------------------------------------------------------------------------- + + +def _make_providers(): + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(log_exporter) + ) + + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + + return tracer_provider, span_exporter, logger_provider, meter_provider + + +def _make_handler(tracer_provider, logger_provider, meter_provider): + return TelemetryHandler( + tracer_provider=tracer_provider, + logger_provider=logger_provider, + meter_provider=meter_provider, + ) + + +def _make_callback_handler(tracer_provider, logger_provider, meter_provider): + telemetry_handler = _make_handler( + tracer_provider, logger_provider, meter_provider + ) + return OpenTelemetryLangChainCallbackHandler(telemetry_handler) + + +_OPENAI_SERIALIZED: dict[str, Any] = {"name": "ChatOpenAI"} +_OPENAI_INVOCATION_PARAMS: dict[str, Any] = { + "model_name": "gpt-4", + "temperature": 0.0, +} +_OPENAI_METADATA: dict[str, Any] = {"ls_provider": "openai"} + + +# --------------------------------------------------------------------------- +# on_tool_start / on_tool_end +# --------------------------------------------------------------------------- + + +def test_on_tool_start_and_end_creates_span(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "multiply", "description": "Multiply two numbers"}, + input_str="", + run_id=run_id, + inputs={"a": 3, "b": 4}, + ) + + output = MagicMock() + output.content = "12" + output.tool_call_id = "call_abc" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == "execute_tool multiply" + attrs = span.attributes + assert attrs[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "execute_tool" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + assert ( + attrs[gen_ai_attributes.GEN_AI_TOOL_DESCRIPTION] + == "Multiply two numbers" + ) + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == json.dumps( + {"a": 3, "b": 4} + ) + + +def test_on_tool_start_with_string_input(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "search"}, + input_str="Paris weather", + run_id=run_id, + ) + output = MagicMock() + output.content = "Sunny" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "search" + assert ( + attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "Paris weather" + ) + + +def test_on_tool_start_with_no_serialized(monkeypatch): + """on_tool_start with serialized=None falls back to name='unknown'.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized=None, + input_str="some input", + run_id=run_id, + ) + output = MagicMock() + output.content = "result" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "unknown" + + +def test_on_tool_error_records_error_type(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "failing_tool"}, + input_str="bad input", + run_id=run_id, + ) + exc = ValueError("something went wrong") + handler.on_tool_error(error=exc, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "failing_tool" + assert attrs[error_attributes.ERROR_TYPE] == "ValueError" + + +# --------------------------------------------------------------------------- +# on_chat_model_start with tool_definitions +# --------------------------------------------------------------------------- + + +def test_on_chat_model_start_with_tools_sets_definitions(monkeypatch): + """Tool definitions passed via invocation_params are captured on the span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + tools = [ + { + "type": "function", + "function": { + "name": "multiply", + "description": "Multiply two numbers", + "parameters": {"type": "object"}, + }, + } + ] + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What is 3 * 4?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params={**_OPENAI_INVOCATION_PARAMS, "tools": tools}, + ) + + # Finish the span so attributes are flushed + ai_msg = AIMessage(content="12") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="12") + generation.generation_info = {"finish_reason": "stop"} + result = LLMResult(generations=[[generation]]) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + # Tool definitions are serialised into gen_ai.tool.definitions + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS in attrs + tool_definitions = attrs[gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS] + assert "multiply" in tool_definitions + assert "Multiply two numbers" in tool_definitions + + +# --------------------------------------------------------------------------- +# on_llm_end with tool_calls finish reason +# --------------------------------------------------------------------------- + + +def _build_tool_call_llm_result( + tool_calls: list[dict[str, Any]], +) -> LLMResult: + """Build a fake LLMResult where the model responded with tool calls.""" + ai_msg = AIMessage(content="") + ai_msg.tool_calls = tool_calls # type: ignore[attr-defined] + ai_msg.response_metadata = {} + ai_msg.usage_metadata = None # type: ignore[assignment] + generation = ChatGeneration(message=ai_msg, text="") + generation.generation_info = {"finish_reason": "tool_calls"} + return LLMResult(generations=[[generation]]) + + +def test_on_llm_end_with_tool_calls_records_tool_call_requests(monkeypatch): + """When finish_reason is tool_calls the output message parts are ToolCallRequests.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What is 3 * 4?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + + result = _build_tool_call_llm_result( + [{"name": "multiply", "id": "call_001", "args": {"a": 3, "b": 4}}] + ) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES in attrs + output_messages = attrs[gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES] + assert "multiply" in output_messages + assert "tool_calls" in output_messages + + +def test_on_llm_end_with_multiple_tool_calls(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="Compute 3*4 and 5+6")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + + result = _build_tool_call_llm_result( + [ + {"name": "multiply", "id": "call_001", "args": {"a": 3, "b": 4}}, + {"name": "add", "id": "call_002", "args": {"a": 5, "b": 6}}, + ] + ) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + output_messages = attrs[gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES] + assert "multiply" in output_messages + assert "add" in output_messages + + +# --------------------------------------------------------------------------- +# Full LangChain tool invocation via instrumentor (no network) +# --------------------------------------------------------------------------- + + +def test_tool_span_created_via_instrumentor(monkeypatch): + """Using LangChainInstrumentor, on_tool_start/end produces an execute_tool span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(log_exporter) + ) + + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + + instrumentor = LangChainInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + + try: + + @tool + def multiply(a: int, b: int) -> int: + """Multiply two integers.""" + return a * b + + multiply.invoke({"a": 3, "b": 4}) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == "execute_tool multiply" + attrs = span.attributes + assert attrs[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "execute_tool" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + finally: + instrumentor.uninstrument() + + +# --------------------------------------------------------------------------- +# Content capturing off — arguments and result suppressed +# --------------------------------------------------------------------------- + + +def test_on_tool_start_and_end_no_content_capture_suppresses_arguments( + monkeypatch, +): + """Without content capture, arguments and result are absent from the span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + # OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT intentionally not set + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "multiply", "description": "Multiply two numbers"}, + input_str="", + run_id=run_id, + inputs={"a": 3, "b": 4}, + ) + output = MagicMock() + output.content = "12" + output.tool_call_id = "call_abc" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + assert gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs + assert gen_ai_attributes.GEN_AI_TOOL_CALL_RESULT not in attrs + + +def test_on_tool_end_captures_result_with_span_only_mode(monkeypatch): + """tool_result is set on the span when content capture is SPAN_ONLY.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "lookup"}, + input_str="query", + run_id=run_id, + ) + output = MagicMock() + output.content = "result text" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_RESULT] == "result text" + + +# --------------------------------------------------------------------------- +# on_tool_end attribute types +# --------------------------------------------------------------------------- + + +def test_on_tool_end_sets_tool_call_id_attribute(monkeypatch): + """tool_call_id from the output object is set on the span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "mytool"}, + input_str="", + run_id=run_id, + ) + output = MagicMock() + output.content = "done" + output.tool_call_id = "call_xyz" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ID] == "call_xyz" + + +def test_on_tool_end_with_none_tool_call_id_omits_attribute(monkeypatch): + """tool_call_id is absent when the output carries no call id.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "mytool"}, + input_str="", + run_id=run_id, + ) + output = MagicMock() + output.content = "done" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_TOOL_CALL_ID not in attrs + + +# --------------------------------------------------------------------------- +# on_tool_end / on_tool_error with unknown run_id — must not raise +# --------------------------------------------------------------------------- + + +def test_on_tool_end_unknown_run_id_does_not_raise(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + output = MagicMock() + output.content = "result" + output.tool_call_id = None + # No on_tool_start was called — should be a no-op + handler.on_tool_end(output=output, run_id=uuid4()) + + assert len(span_exporter.get_finished_spans()) == 0 + + +def test_on_tool_error_unknown_run_id_does_not_raise(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + handler.on_tool_error(error=RuntimeError("boom"), run_id=uuid4()) + + assert len(span_exporter.get_finished_spans()) == 0 + + +# --------------------------------------------------------------------------- +# on_tool_start: inputs=None falls back to input_str +# --------------------------------------------------------------------------- + + +def test_on_tool_start_uses_input_str_when_inputs_is_none(monkeypatch): + """When inputs kwarg is absent (None), input_str is used for arguments.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + # inputs not passed → defaults to None → callback uses input_str + handler.on_tool_start( + serialized={"name": "greet"}, + input_str="hello world", + run_id=run_id, + ) + output = MagicMock() + output.content = "hi" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "hello world" + + +def test_on_tool_start_inputs_takes_priority_over_input_str(monkeypatch): + """When both inputs dict and input_str are provided, inputs dict wins.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "add"}, + input_str="ignored", + run_id=run_id, + inputs={"x": 1, "y": 2}, + ) + output = MagicMock() + output.content = "3" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == json.dumps( + {"x": 1, "y": 2} + ) + + +# --------------------------------------------------------------------------- +# on_chat_model_start: functions key as alternative to tools +# --------------------------------------------------------------------------- + + +def test_on_chat_model_start_with_functions_key_sets_definitions(monkeypatch): + """Tool definitions are also picked up from the 'functions' invocation param.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + functions = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + }, + } + ] + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What's the weather?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params={ + **_OPENAI_INVOCATION_PARAMS, + "functions": functions, + }, + ) + ai_msg = AIMessage(content="It is sunny.") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="It is sunny.") + generation.generation_info = {"finish_reason": "stop"} + handler.on_llm_end( + response=LLMResult(generations=[[generation]]), run_id=run_id + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS in attrs + assert "get_weather" in attrs[gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS] + + +def test_on_chat_model_start_without_tools_omits_definitions(monkeypatch): + """No tool_definitions attribute when invocation_params has no tools.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="Hello")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + ai_msg = AIMessage(content="Hi") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="Hi") + generation.generation_info = {"finish_reason": "stop"} + handler.on_llm_end( + response=LLMResult(generations=[[generation]]), run_id=run_id + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS not in spans[0].attributes