From 3d60029f37d599e8bf84a840e3cef67f99772ece Mon Sep 17 00:00:00 2001 From: Wrisa Date: Sun, 17 May 2026 15:42:38 -0700 Subject: [PATCH 01/14] Added tools support --- .../langchain/callback_handler.py | 129 ++++++++++++++++-- 1 file changed, 117 insertions(+), 12 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py index a024ca53..e41e1eae 100644 --- a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py @@ -21,6 +21,8 @@ MessagePart, OutputMessage, Text, + ToolDefinition + FunctionToolDefinition ) @@ -144,6 +146,11 @@ def on_chat_model_start( llm_invocation = self._telemetry_handler.start_llm( invocation=llm_invocation ) + if "invocation_params" in kwargs: + tools = kwargs["invocation_params"].get("tools") or kwargs["invocation_params"].get("functions") + if tools: + tool_definitions = self._prepare_tool_definitions(tools) + llm_invocation.tool_definitions = tool_definitions self._invocation_manager.add_invocation_state( run_id=run_id, parent_run_id=parent_run_id, @@ -191,19 +198,33 @@ def on_llm_end( ) ) - # Get message content - parts = [ - Text( - content=chat_generation.message.content, - type="text", + if finish_reason == "tool_calls": + tool_calls = [] + for tool_call in chat_generation.message.tool_calls: + tool_call_request = ToolCallRequest( + name=tool_call.name, + id=tool_call.id, + arguments=tool_call.args, + ) + tool_calls.append(tool_call_request) + output_message = OutputMessage( + role="assistant", + parts=cast(list[MessagePart], tool_calls), + finish_reason=finish_reason, + ) + else: + parts = [ + Text( + content=chat_generation.message.content, + type="text", + ) + ] + role = chat_generation.message.type + output_message = OutputMessage( + role=role, + parts=cast(list[MessagePart], parts), + finish_reason=finish_reason, ) - ] - role = chat_generation.message.type - output_message = OutputMessage( - role=role, - parts=cast(list[MessagePart], parts), - finish_reason=finish_reason, - ) output_messages.append(output_message) # Get token usage if available @@ -264,3 +285,87 @@ def on_llm_error( ) if llm_invocation.span and not llm_invocation.span.is_recording(): self._invocation_manager.delete_invocation_state(run_id=run_id) + + def on_tool_start( + self, + serialized: Optional[dict[str, Any]], + input_str: str, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + tags: Optional[list[str]] = None, + metadata: Optional[dict[str, Any]] = None, + inputs: Optional[dict[str, Any]] = None, + **kwargs: Any, + ) -> None: + name = "unknown_tool" + description = None + if serialized is not None: + name = serialized.get("name") + description = serialized.get("description") + + arguments: Any = inputs if inputs is not None else input_str + tool_invocation = ToolInvocation( + name=name, + description=description, + arguments=arguments, + ) + tool_invocation = self._handler.start_tool_call(tool_invocation) + self._invocation_manager.add(run_id, parent_run_id, tool_invocation) + + def on_tool_end( + self, + output: Any, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **_kwargs: Any, + ) -> None: + tool_invocation = self._invocation_manager.get(run_id) + if not isinstance(tool_invocation, ToolInvocation): + return + tool_invocation.tool_call_id = getattr(output, "tool_call_id", None) + tool_invocation.tool_result = getattr(output, "content", None) + tool_invocation.stop() + if not tool_invocation.span.is_recording(): + self._invocation_manager.delete_invocation_state(run_id=run_id) + + def on_tool_error( + self, + error: BaseException, + *, + run_id: UUID, + parent_run_id: Optional[UUID] = None, + **_: Any, + ) -> None: + tool_invocation = self._invocation_manager.get(run_id) + if not isinstance(tool_invocation, ToolInvocation): + return + tool_invocation.fail(error) + if not tool_invocation.span.is_recording(): + self._invocation_manager.delete_invocation_state(run_id=run_id) + + def get_property_value(obj, property_name): + if isinstance(obj, dict): + return obj.get(property_name, None) + + return getattr(obj, property_name, None) + + def _prepare_tool_definitions(tools) -> list[ToolDefinition] | None: + if not tools: + return None + + definitions: list[ToolDefinition] = [] + for tool in tools: + tool_type = get_property_value(tool, "type") + if tool_type == "function": + func = get_property_value(tool, "function") + if func: + definitions.append( + FunctionToolDefinition( + name=get_property_value(func, "name") or "", + description=get_property_value(func, "description"), + parameters=get_property_value(func, "parameters"), + ) + ) + return definitions From 2fb92340db86471c686acf14202af32b9133c3d4 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 18 May 2026 10:20:45 -0700 Subject: [PATCH 02/14] Add tools support in langchain --- .../examples/tool/main.py | 116 ++++ .../examples/tool/requirements.txt | 8 + .../langchain/callback_handler.py | 83 ++- .../instrumentation/langchain/utils.py | 42 ++ .../tests/test_tools.py | 564 ++++++++++++++++++ .../util/genai/_inference_invocation.py | 3 + 6 files changed, 769 insertions(+), 47 deletions(-) create mode 100644 instrumentation/opentelemetry-instrumentation-langchain/examples/tool/main.py create mode 100644 instrumentation/opentelemetry-instrumentation-langchain/examples/tool/requirements.txt create mode 100644 instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py create mode 100644 instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py diff --git a/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/main.py b/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/main.py new file mode 100644 index 00000000..847dea14 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/main.py @@ -0,0 +1,116 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +""" +Tool-calling example without agents, built with LangChain. + +Uses ChatOpenAI with bind_tools to let the model call calculator tools directly, +then manually dispatches tool calls and feeds results back to the model. +OpenTelemetry LangChain instrumentation traces the LLM calls. +""" + +from __future__ import annotations + +import json + +from langchain_core.messages import HumanMessage, ToolMessage +from langchain_core.tools import tool +from langchain_openai import ChatOpenAI + +from opentelemetry import _logs, metrics, trace +from opentelemetry.exporter.otlp.proto.grpc._log_exporter import ( + OTLPLogExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.metric_exporter import ( + OTLPMetricExporter, +) +from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( + OTLPSpanExporter, +) +from opentelemetry.instrumentation.langchain import LangChainInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import BatchLogRecordProcessor +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import PeriodicExportingMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import BatchSpanProcessor + +# Configure tracing +trace.set_tracer_provider(TracerProvider()) +span_processor = BatchSpanProcessor(OTLPSpanExporter()) +trace.get_tracer_provider().add_span_processor(span_processor) + +# Configure logging +_logs.set_logger_provider(LoggerProvider()) +_logs.get_logger_provider().add_log_record_processor( + BatchLogRecordProcessor(OTLPLogExporter()) +) + +# Configure metrics +metrics.set_meter_provider( + MeterProvider( + metric_readers=[ + PeriodicExportingMetricReader( + OTLPMetricExporter(), + ), + ] + ) +) + + +@tool +def multiply(a: float, b: float) -> float: + """Multiply two numbers together.""" + return a * b + + +@tool +def add(a: float, b: float) -> float: + """Add two numbers together.""" + return a + b + + +TOOLS = [multiply, add] +TOOLS_BY_NAME = {t.name: t for t in TOOLS} + + +def main() -> None: + LangChainInstrumentor().instrument() + + llm = ChatOpenAI( + model="gpt-3.5-turbo", + temperature=0.1, + max_tokens=100, + top_p=0.9, + seed=100, + ) + llm_with_tools = llm.bind_tools(TOOLS) + + messages = [HumanMessage(content="What is (3 * 4) + 7?")] + + # First LLM call — model may request tool calls + response = llm_with_tools.invoke(messages) + messages.append(response) + + # Dispatch tool calls until the model stops requesting them + while response.tool_calls: + for tool_call in response.tool_calls: + selected_tool = TOOLS_BY_NAME[tool_call["name"]] + tool_output = selected_tool.invoke(tool_call["args"]) + messages.append( + ToolMessage( + content=json.dumps(tool_output), + tool_call_id=tool_call["id"], + ) + ) + + response = llm_with_tools.invoke(messages) + messages.append(response) + + print("Final answer:", response.content) + + LangChainInstrumentor().uninstrument() + + +if __name__ == "__main__": + main() diff --git a/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/requirements.txt b/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/requirements.txt new file mode 100644 index 00000000..8b403318 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-langchain/examples/tool/requirements.txt @@ -0,0 +1,8 @@ +langchain==0.3.21 +langchain_openai +langgraph +opentelemetry-sdk>=1.31.0 +opentelemetry-exporter-otlp-proto-grpc>=1.31.0 + +# Uncomment after lanchain instrumetation is released +# opentelemetry-instrumentation-langchain~=2.0b0.dev \ No newline at end of file diff --git a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py index e41e1eae..92ed8db7 100644 --- a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/callback_handler.py @@ -3,6 +3,7 @@ from __future__ import annotations +import json from typing import Any, Optional, cast from uuid import UUID @@ -13,7 +14,13 @@ from opentelemetry.instrumentation.langchain.invocation_manager import ( _InvocationManager, ) +from opentelemetry.instrumentation.langchain.utils import ( + _prepare_tool_definitions, +) from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.invocation import ( + ToolInvocation, +) from opentelemetry.util.genai.types import ( Error, InputMessage, @@ -21,8 +28,7 @@ MessagePart, OutputMessage, Text, - ToolDefinition - FunctionToolDefinition + ToolCallRequest, ) @@ -143,14 +149,16 @@ def on_chat_model_start( temperature=temperature, max_tokens=max_tokens, ) - llm_invocation = self._telemetry_handler.start_llm( - invocation=llm_invocation - ) if "invocation_params" in kwargs: - tools = kwargs["invocation_params"].get("tools") or kwargs["invocation_params"].get("functions") + tools = kwargs["invocation_params"].get("tools") or kwargs[ + "invocation_params" + ].get("functions") if tools: - tool_definitions = self._prepare_tool_definitions(tools) + tool_definitions = _prepare_tool_definitions(tools) llm_invocation.tool_definitions = tool_definitions + llm_invocation = self._telemetry_handler.start_llm( + invocation=llm_invocation + ) self._invocation_manager.add_invocation_state( run_id=run_id, parent_run_id=parent_run_id, @@ -199,12 +207,12 @@ def on_llm_end( ) if finish_reason == "tool_calls": - tool_calls = [] + tool_calls: list[ToolCallRequest] = [] for tool_call in chat_generation.message.tool_calls: tool_call_request = ToolCallRequest( - name=tool_call.name, - id=tool_call.id, - arguments=tool_call.args, + name=tool_call["name"], + id=tool_call["id"], + arguments=tool_call["args"], ) tool_calls.append(tool_call_request) output_message = OutputMessage( @@ -298,20 +306,26 @@ def on_tool_start( inputs: Optional[dict[str, Any]] = None, **kwargs: Any, ) -> None: - name = "unknown_tool" + name = "unknown" description = None if serialized is not None: - name = serialized.get("name") + name = serialized.get("name") or "unknown" description = serialized.get("description") - arguments: Any = inputs if inputs is not None else input_str - tool_invocation = ToolInvocation( - name=name, - description=description, - arguments=arguments, + raw_arguments: Any = inputs if inputs is not None else input_str + arguments: str | None + if isinstance(raw_arguments, dict): + arguments = json.dumps(raw_arguments) + elif isinstance(raw_arguments, str): + arguments = raw_arguments + else: + arguments = None + tool_invocation = self._telemetry_handler.start_tool( + name=name, tool_description=description, arguments=arguments + ) + self._invocation_manager.add_invocation_state( + run_id, parent_run_id, tool_invocation ) - tool_invocation = self._handler.start_tool_call(tool_invocation) - self._invocation_manager.add(run_id, parent_run_id, tool_invocation) def on_tool_end( self, @@ -321,7 +335,7 @@ def on_tool_end( parent_run_id: Optional[UUID] = None, **_kwargs: Any, ) -> None: - tool_invocation = self._invocation_manager.get(run_id) + tool_invocation = self._invocation_manager.get_invocation(run_id) if not isinstance(tool_invocation, ToolInvocation): return tool_invocation.tool_call_id = getattr(output, "tool_call_id", None) @@ -338,34 +352,9 @@ def on_tool_error( parent_run_id: Optional[UUID] = None, **_: Any, ) -> None: - tool_invocation = self._invocation_manager.get(run_id) + tool_invocation = self._invocation_manager.get_invocation(run_id) if not isinstance(tool_invocation, ToolInvocation): return tool_invocation.fail(error) if not tool_invocation.span.is_recording(): self._invocation_manager.delete_invocation_state(run_id=run_id) - - def get_property_value(obj, property_name): - if isinstance(obj, dict): - return obj.get(property_name, None) - - return getattr(obj, property_name, None) - - def _prepare_tool_definitions(tools) -> list[ToolDefinition] | None: - if not tools: - return None - - definitions: list[ToolDefinition] = [] - for tool in tools: - tool_type = get_property_value(tool, "type") - if tool_type == "function": - func = get_property_value(tool, "function") - if func: - definitions.append( - FunctionToolDefinition( - name=get_property_value(func, "name") or "", - description=get_property_value(func, "description"), - parameters=get_property_value(func, "parameters"), - ) - ) - return definitions diff --git a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py new file mode 100644 index 00000000..67c04eeb --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py @@ -0,0 +1,42 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +from __future__ import annotations + +from typing import Any, cast + +from opentelemetry.util.genai.types import ( + FunctionToolDefinition, + ToolDefinition, +) + +__all__ = ["_prepare_tool_definitions"] + + +def _get_property_value(obj: Any, property_name: str) -> Any: + if isinstance(obj, dict): + return cast(dict[str, Any], obj).get(property_name) + + return getattr(obj, property_name, None) + + +def _prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: + if not tools: + return None + + definitions: list[ToolDefinition] = [] + for tool in tools: + tool_type = _get_property_value(tool, "type") + if tool_type == "function": + func = _get_property_value(tool, "function") + if func: + func_name = _get_property_value(func, "name") + func_description = _get_property_value(func, "description") + definitions.append( + FunctionToolDefinition( + name=str(func_name) if func_name is not None else "", + description=str(func_description) if func_description is not None else None, + parameters=_get_property_value(func, "parameters"), + ) + ) + return definitions diff --git a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py new file mode 100644 index 00000000..eb30f385 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py @@ -0,0 +1,564 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Tests for tool-related support in the LangChain callback handler.""" + +from __future__ import annotations + +import json +from typing import Any +from unittest.mock import MagicMock +from uuid import UUID, uuid4 + +import pytest +from langchain_core.messages import AIMessage +from langchain_core.outputs import ChatGeneration, LLMResult + +from opentelemetry.instrumentation.langchain import LangChainInstrumentor +from opentelemetry.instrumentation.langchain.callback_handler import ( + OpenTelemetryLangChainCallbackHandler, +) +from opentelemetry.instrumentation.langchain.utils import ( + _get_property_value, + _prepare_tool_definitions, +) +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import ( + InMemoryLogRecordExporter, + SimpleLogRecordProcessor, +) +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.metrics.export import InMemoryMetricReader +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.instrumentation._semconv import ( + _OpenTelemetrySemanticConventionStability, +) +from opentelemetry.semconv._incubating.attributes import gen_ai_attributes +from opentelemetry.semconv.attributes import error_attributes +from opentelemetry.util.genai.handler import TelemetryHandler +from opentelemetry.util.genai.types import FunctionToolDefinition + + +@pytest.fixture(autouse=True) +def reset_semconv_stability(): + """Reset the semconv stability cache before and after each test.""" + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + yield + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + + +def _enable_experimental_mode(): + """Call after setting OTEL_SEMCONV_STABILITY_OPT_IN env var to activate it.""" + _OpenTelemetrySemanticConventionStability._initialized = False + _OpenTelemetrySemanticConventionStability._OTEL_SEMCONV_STABILITY_SIGNAL_MAPPING = {} + _OpenTelemetrySemanticConventionStability._initialize() + + +# --------------------------------------------------------------------------- +# Unit tests for _get_property_value +# --------------------------------------------------------------------------- + + +def test_get_property_value_from_dict(): + assert _get_property_value({"name": "my_tool"}, "name") == "my_tool" + + +def test_get_property_value_from_dict_missing_key(): + assert _get_property_value({}, "name") is None + + +def test_get_property_value_from_object(): + obj = MagicMock() + obj.name = "obj_tool" + assert _get_property_value(obj, "name") == "obj_tool" + + +def test_get_property_value_from_object_missing_attr(): + class Plain: + pass + + assert _get_property_value(Plain(), "missing") is None + + +# --------------------------------------------------------------------------- +# Unit tests for _prepare_tool_definitions +# --------------------------------------------------------------------------- + + +def test_prepare_tool_definitions_returns_none_for_empty(): + assert _prepare_tool_definitions([]) is None + + +def test_prepare_tool_definitions_dict_tools(): + tools = [ + { + "type": "function", + "function": { + "name": "multiply", + "description": "Multiply two numbers", + "parameters": { + "type": "object", + "properties": { + "a": {"type": "integer"}, + "b": {"type": "integer"}, + }, + }, + }, + } + ] + result = _prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 1 + defn = result[0] + assert isinstance(defn, FunctionToolDefinition) + assert defn.name == "multiply" + assert defn.description == "Multiply two numbers" + assert defn.parameters is not None + + +def test_prepare_tool_definitions_skips_non_function_type(): + tools = [{"type": "retrieval", "retrieval": {}}] + result = _prepare_tool_definitions(tools) + # No function-type tools → empty list returned (not None, but falsy) + assert not result + + +def test_prepare_tool_definitions_multiple_tools(): + tools = [ + { + "type": "function", + "function": {"name": "add", "description": "Add numbers"}, + }, + { + "type": "function", + "function": {"name": "subtract", "description": "Subtract numbers"}, + }, + ] + result = _prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 2 + assert result[0].name == "add" + assert result[1].name == "subtract" + + +def test_prepare_tool_definitions_missing_name_defaults_to_empty_string(): + tools = [ + { + "type": "function", + "function": {"description": "No name tool"}, + } + ] + result = _prepare_tool_definitions(tools) + assert result is not None + assert len(result) == 1 + assert result[0].name == "" + + +def test_prepare_tool_definitions_none_description_stays_none(): + tools = [ + { + "type": "function", + "function": {"name": "no_desc"}, + } + ] + result = _prepare_tool_definitions(tools) + assert result is not None + assert result[0].description is None + + +def test_prepare_tool_definitions_object_tools(): + """Tools may be objects (e.g. pydantic models) rather than dicts.""" + + class FuncDef: + name = "get_weather" + description = "Get current weather" + parameters = {"type": "object"} + + class ToolDef: + type = "function" + function = FuncDef() + + result = _prepare_tool_definitions([ToolDef()]) + assert result is not None + assert len(result) == 1 + assert result[0].name == "get_weather" + assert result[0].description == "Get current weather" + + +# --------------------------------------------------------------------------- +# Helpers shared by callback-handler integration tests +# --------------------------------------------------------------------------- + + +def _make_providers(): + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(log_exporter) + ) + + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + + return tracer_provider, span_exporter, logger_provider, meter_provider + + +def _make_handler(tracer_provider, logger_provider, meter_provider): + return TelemetryHandler( + tracer_provider=tracer_provider, + logger_provider=logger_provider, + meter_provider=meter_provider, + ) + + +def _make_callback_handler(tracer_provider, logger_provider, meter_provider): + telemetry_handler = _make_handler( + tracer_provider, logger_provider, meter_provider + ) + return OpenTelemetryLangChainCallbackHandler(telemetry_handler) + + +_OPENAI_SERIALIZED: dict[str, Any] = {"name": "ChatOpenAI"} +_OPENAI_INVOCATION_PARAMS: dict[str, Any] = { + "model_name": "gpt-4", + "temperature": 0.0, +} +_OPENAI_METADATA: dict[str, Any] = {"ls_provider": "openai"} + + +# --------------------------------------------------------------------------- +# on_tool_start / on_tool_end +# --------------------------------------------------------------------------- + + +def test_on_tool_start_and_end_creates_span(monkeypatch): + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "multiply", "description": "Multiply two numbers"}, + input_str="", + run_id=run_id, + inputs={"a": 3, "b": 4}, + ) + + output = MagicMock() + output.content = "12" + output.tool_call_id = "call_abc" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == "execute_tool multiply" + attrs = span.attributes + assert attrs[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "execute_tool" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_DESCRIPTION] == "Multiply two numbers" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == json.dumps( + {"a": 3, "b": 4} + ) + + +def test_on_tool_start_with_string_input(monkeypatch): + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "search"}, + input_str="Paris weather", + run_id=run_id, + ) + output = MagicMock() + output.content = "Sunny" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "search" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "Paris weather" + + +def test_on_tool_start_with_no_serialized(monkeypatch): + """on_tool_start with serialized=None falls back to name='unknown'.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized=None, + input_str="some input", + run_id=run_id, + ) + output = MagicMock() + output.content = "result" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "unknown" + + +def test_on_tool_error_records_error_type(monkeypatch): + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "failing_tool"}, + input_str="bad input", + run_id=run_id, + ) + exc = ValueError("something went wrong") + handler.on_tool_error(error=exc, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "failing_tool" + assert attrs[error_attributes.ERROR_TYPE] == "ValueError" + + +# --------------------------------------------------------------------------- +# on_chat_model_start with tool_definitions +# --------------------------------------------------------------------------- + + +def test_on_chat_model_start_with_tools_sets_definitions(monkeypatch): + """Tool definitions passed via invocation_params are captured on the span.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + tools = [ + { + "type": "function", + "function": { + "name": "multiply", + "description": "Multiply two numbers", + "parameters": {"type": "object"}, + }, + } + ] + + from langchain_core.messages import HumanMessage + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What is 3 * 4?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params={**_OPENAI_INVOCATION_PARAMS, "tools": tools}, + ) + + # Finish the span so attributes are flushed + from langchain_core.messages import AIMessage + from langchain_core.outputs import ChatGeneration, LLMResult + + ai_msg = AIMessage(content="12") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="12") + generation.generation_info = {"finish_reason": "stop"} + result = LLMResult(generations=[[generation]]) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + # Tool definitions are serialised into gen_ai.tool.definitions + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS in attrs + tool_definitions = attrs[gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS] + assert "multiply" in tool_definitions + assert "Multiply two numbers" in tool_definitions + + +# --------------------------------------------------------------------------- +# on_llm_end with tool_calls finish reason +# --------------------------------------------------------------------------- + + +def _build_tool_call_llm_result( + tool_calls: list[dict[str, Any]], +) -> LLMResult: + """Build a fake LLMResult where the model responded with tool calls.""" + ai_msg = AIMessage(content="") + ai_msg.tool_calls = tool_calls # type: ignore[attr-defined] + ai_msg.response_metadata = {} + ai_msg.usage_metadata = None # type: ignore[assignment] + generation = ChatGeneration(message=ai_msg, text="") + generation.generation_info = {"finish_reason": "tool_calls"} + return LLMResult(generations=[[generation]]) + + +def test_on_llm_end_with_tool_calls_records_tool_call_requests(monkeypatch): + """When finish_reason is tool_calls the output message parts are ToolCallRequests.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + from langchain_core.messages import HumanMessage + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What is 3 * 4?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + + result = _build_tool_call_llm_result( + [{"name": "multiply", "id": "call_001", "args": {"a": 3, "b": 4}}] + ) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES in attrs + output_messages = attrs[gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES] + assert "multiply" in output_messages + assert "tool_calls" in output_messages + + +def test_on_llm_end_with_multiple_tool_calls(monkeypatch): + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + from langchain_core.messages import HumanMessage + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="Compute 3*4 and 5+6")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + + result = _build_tool_call_llm_result( + [ + {"name": "multiply", "id": "call_001", "args": {"a": 3, "b": 4}}, + {"name": "add", "id": "call_002", "args": {"a": 5, "b": 6}}, + ] + ) + handler.on_llm_end(response=result, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + output_messages = attrs[gen_ai_attributes.GEN_AI_OUTPUT_MESSAGES] + assert "multiply" in output_messages + assert "add" in output_messages + + +# --------------------------------------------------------------------------- +# Full LangChain tool invocation via instrumentor (no network) +# --------------------------------------------------------------------------- + + +def test_tool_span_created_via_instrumentor(monkeypatch): + """Using LangChainInstrumentor, on_tool_start/end produces an execute_tool span.""" + monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + + span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor(SimpleSpanProcessor(span_exporter)) + + log_exporter = InMemoryLogRecordExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(log_exporter) + ) + + metric_reader = InMemoryMetricReader() + meter_provider = MeterProvider(metric_readers=[metric_reader]) + + instrumentor = LangChainInstrumentor() + instrumentor.instrument( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) + + try: + from langchain_core.tools import tool + + @tool + def multiply(a: int, b: int) -> int: + """Multiply two integers.""" + return a * b + + multiply.invoke({"a": 3, "b": 4}) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + span = spans[0] + assert span.name == "execute_tool multiply" + attrs = span.attributes + assert attrs[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "execute_tool" + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + finally: + instrumentor.uninstrument() diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py index 017c3e3f..08ee7cca 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py @@ -252,6 +252,7 @@ class LLMInvocation: seed: int | None = None server_address: str | None = None server_port: int | None = None + tool_definitions: list[ToolDefinition] | None = None _inference_invocation: InferenceInvocation | None = field( default=None, init=False, repr=False @@ -290,6 +291,7 @@ def _start_with_handler( inv.max_tokens = self.max_tokens inv.stop_sequences = self.stop_sequences inv.seed = self.seed + inv.tool_definitions = self.tool_definitions inv.attributes.update(self.attributes) inv.metric_attributes.update(self.metric_attributes) self._inference_invocation = inv @@ -317,6 +319,7 @@ def _sync_to_invocation(self) -> None: inv.seed = self.seed inv.server_address = self.server_address inv.server_port = self.server_port + inv.tool_definitions = self.tool_definitions inv.attributes = self.attributes inv.metric_attributes = self.metric_attributes From 93cf06aa862554eb6cf7135640557e9b32755b76 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 18 May 2026 10:42:46 -0700 Subject: [PATCH 03/14] added changelog --- .../opentelemetry-instrumentation-langchain/.changelog/25.added | 1 + 1 file changed, 1 insertion(+) create mode 100644 instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added diff --git a/instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added b/instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added new file mode 100644 index 00000000..332d175e --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added @@ -0,0 +1 @@ +Add tools support in LangChain instrumentation From 0badaaeb58f3bf41036bf9cdee64f26a8bebcb17 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 18 May 2026 10:46:42 -0700 Subject: [PATCH 04/14] updated changelog --- .../.changelog/{25.added => 37.added} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename instrumentation/opentelemetry-instrumentation-langchain/.changelog/{25.added => 37.added} (100%) diff --git a/instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added b/instrumentation/opentelemetry-instrumentation-langchain/.changelog/37.added similarity index 100% rename from instrumentation/opentelemetry-instrumentation-langchain/.changelog/25.added rename to instrumentation/opentelemetry-instrumentation-langchain/.changelog/37.added From cdc1e869fdc9c17aced5de9c0d7545a9cdae92b5 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 18 May 2026 11:05:56 -0700 Subject: [PATCH 05/14] fixed errors --- .../instrumentation/langchain/utils.py | 4 +- .../tests/test_tools.py | 54 +++++++++++++------ 2 files changed, 42 insertions(+), 16 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py index 67c04eeb..9aacb4ce 100644 --- a/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py +++ b/instrumentation/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/utils.py @@ -35,7 +35,9 @@ def _prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: definitions.append( FunctionToolDefinition( name=str(func_name) if func_name is not None else "", - description=str(func_description) if func_description is not None else None, + description=str(func_description) + if func_description is not None + else None, parameters=_get_property_value(func, "parameters"), ) ) diff --git a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py index eb30f385..1e7c37a5 100644 --- a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py +++ b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py @@ -8,12 +8,15 @@ import json from typing import Any from unittest.mock import MagicMock -from uuid import UUID, uuid4 +from uuid import uuid4 import pytest from langchain_core.messages import AIMessage from langchain_core.outputs import ChatGeneration, LLMResult +from opentelemetry.instrumentation._semconv import ( + _OpenTelemetrySemanticConventionStability, +) from opentelemetry.instrumentation.langchain import LangChainInstrumentor from opentelemetry.instrumentation.langchain.callback_handler import ( OpenTelemetryLangChainCallbackHandler, @@ -34,9 +37,6 @@ from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( InMemorySpanExporter, ) -from opentelemetry.instrumentation._semconv import ( - _OpenTelemetrySemanticConventionStability, -) from opentelemetry.semconv._incubating.attributes import gen_ai_attributes from opentelemetry.semconv.attributes import error_attributes from opentelemetry.util.genai.handler import TelemetryHandler @@ -137,7 +137,10 @@ def test_prepare_tool_definitions_multiple_tools(): }, { "type": "function", - "function": {"name": "subtract", "description": "Subtract numbers"}, + "function": { + "name": "subtract", + "description": "Subtract numbers", + }, }, ] result = _prepare_tool_definitions(tools) @@ -242,7 +245,9 @@ def _make_callback_handler(tracer_provider, logger_provider, meter_provider): def test_on_tool_start_and_end_creates_span(monkeypatch): - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -270,14 +275,19 @@ def test_on_tool_start_and_end_creates_span(monkeypatch): attrs = span.attributes assert attrs[gen_ai_attributes.GEN_AI_OPERATION_NAME] == "execute_tool" assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" - assert attrs[gen_ai_attributes.GEN_AI_TOOL_DESCRIPTION] == "Multiply two numbers" + assert ( + attrs[gen_ai_attributes.GEN_AI_TOOL_DESCRIPTION] + == "Multiply two numbers" + ) assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == json.dumps( {"a": 3, "b": 4} ) def test_on_tool_start_with_string_input(monkeypatch): - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -300,12 +310,16 @@ def test_on_tool_start_with_string_input(monkeypatch): assert len(spans) == 1 attrs = spans[0].attributes assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "search" - assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "Paris weather" + assert ( + attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "Paris weather" + ) def test_on_tool_start_with_no_serialized(monkeypatch): """on_tool_start with serialized=None falls back to name='unknown'.""" - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -331,7 +345,9 @@ def test_on_tool_start_with_no_serialized(monkeypatch): def test_on_tool_error_records_error_type(monkeypatch): - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -362,7 +378,9 @@ def test_on_tool_error_records_error_type(monkeypatch): def test_on_chat_model_start_with_tools_sets_definitions(monkeypatch): """Tool definitions passed via invocation_params are captured on the span.""" - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) monkeypatch.setenv( "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" ) @@ -437,7 +455,9 @@ def _build_tool_call_llm_result( def test_on_llm_end_with_tool_calls_records_tool_call_requests(monkeypatch): """When finish_reason is tool_calls the output message parts are ToolCallRequests.""" - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) monkeypatch.setenv( "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" ) @@ -475,7 +495,9 @@ def test_on_llm_end_with_tool_calls_records_tool_call_requests(monkeypatch): def test_on_llm_end_with_multiple_tool_calls(monkeypatch): - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) monkeypatch.setenv( "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" ) @@ -521,7 +543,9 @@ def test_on_llm_end_with_multiple_tool_calls(monkeypatch): def test_tool_span_created_via_instrumentor(monkeypatch): """Using LangChainInstrumentor, on_tool_start/end produces an execute_tool span.""" - monkeypatch.setenv("OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental") + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) span_exporter = InMemorySpanExporter() tracer_provider = TracerProvider() From 070c81e8d065792862cf4d496a585036b2a91d81 Mon Sep 17 00:00:00 2001 From: wrisa Date: Mon, 18 May 2026 11:16:24 -0700 Subject: [PATCH 06/14] fix: move imports to top-level to fix PLC0415 lint errors Assisted-by: Claude Opus 4.6 --- .../tests/test_tools.py | 13 ++----------- 1 file changed, 2 insertions(+), 11 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py index 1e7c37a5..757d0d7e 100644 --- a/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py +++ b/instrumentation/opentelemetry-instrumentation-langchain/tests/test_tools.py @@ -11,8 +11,9 @@ from uuid import uuid4 import pytest -from langchain_core.messages import AIMessage +from langchain_core.messages import AIMessage, HumanMessage from langchain_core.outputs import ChatGeneration, LLMResult +from langchain_core.tools import tool from opentelemetry.instrumentation._semconv import ( _OpenTelemetrySemanticConventionStability, @@ -404,8 +405,6 @@ def test_on_chat_model_start_with_tools_sets_definitions(monkeypatch): } ] - from langchain_core.messages import HumanMessage - handler.on_chat_model_start( serialized=_OPENAI_SERIALIZED, messages=[[HumanMessage(content="What is 3 * 4?")]], @@ -415,9 +414,6 @@ def test_on_chat_model_start_with_tools_sets_definitions(monkeypatch): ) # Finish the span so attributes are flushed - from langchain_core.messages import AIMessage - from langchain_core.outputs import ChatGeneration, LLMResult - ai_msg = AIMessage(content="12") ai_msg.response_metadata = {"finish_reason": "stop"} generation = ChatGeneration(message=ai_msg, text="12") @@ -470,8 +466,6 @@ def test_on_llm_end_with_tool_calls_records_tool_call_requests(monkeypatch): ) run_id = uuid4() - from langchain_core.messages import HumanMessage - handler.on_chat_model_start( serialized=_OPENAI_SERIALIZED, messages=[[HumanMessage(content="What is 3 * 4?")]], @@ -510,8 +504,6 @@ def test_on_llm_end_with_multiple_tool_calls(monkeypatch): ) run_id = uuid4() - from langchain_core.messages import HumanMessage - handler.on_chat_model_start( serialized=_OPENAI_SERIALIZED, messages=[[HumanMessage(content="Compute 3*4 and 5+6")]], @@ -568,7 +560,6 @@ def test_tool_span_created_via_instrumentor(monkeypatch): ) try: - from langchain_core.tools import tool @tool def multiply(a: int, b: int) -> int: From 0ed892184ab7fcb57741798ad189dd6ab9953ea5 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Fri, 5 Jun 2026 09:44:44 -0700 Subject: [PATCH 07/14] fixed errors --- .../examples/tools/main.py | 2 +- .../genai/langchain/callback_handler.py | 7 +- .../instrumentation/genai/langchain/utils.py | 4 +- .../tests/test_tools.py | 383 +++++++++++++++++- .../util/genai/_inference_invocation.py | 2 - 5 files changed, 377 insertions(+), 21 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py index 161f7bcf..847dea14 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py @@ -113,4 +113,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main() diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 944f316f..080e3927 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -20,9 +20,9 @@ resolve_agent_name, ) from opentelemetry.instrumentation.genai.langchain.utils import ( - _prepare_tool_definitions, make_input_message, make_last_output_message, + prepare_tool_definitions, ) from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.invocation import ( @@ -294,7 +294,7 @@ def on_chat_model_start( "invocation_params" ].get("functions") if tools: - tool_definitions = _prepare_tool_definitions(tools) + tool_definitions = prepare_tool_definitions(tools) llm_invocation.tool_definitions = tool_definitions self._invocation_manager.add_invocation_state( run_id=run_id, @@ -453,8 +453,9 @@ def on_tool_start( else: arguments = None tool_invocation = self._telemetry_handler.start_tool( - name=name, tool_description=description, arguments=arguments + name=name, tool_description=description ) + tool_invocation.arguments = arguments self._invocation_manager.add_invocation_state( run_id, parent_run_id, tool_invocation ) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py index f3753c52..1c60e25c 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py @@ -14,8 +14,6 @@ ToolDefinition, ) -__all__ = ["_prepare_tool_definitions"] - def _get_property_value(obj: Any, property_name: str) -> Any: if isinstance(obj, dict): @@ -24,7 +22,7 @@ def _get_property_value(obj: Any, property_name: str) -> Any: return getattr(obj, property_name, None) -def _prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: +def prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: if not tools: return None diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py index 757d0d7e..abfd2e5c 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py @@ -18,13 +18,13 @@ from opentelemetry.instrumentation._semconv import ( _OpenTelemetrySemanticConventionStability, ) -from opentelemetry.instrumentation.langchain import LangChainInstrumentor -from opentelemetry.instrumentation.langchain.callback_handler import ( +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor +from opentelemetry.instrumentation.genai.langchain.callback_handler import ( OpenTelemetryLangChainCallbackHandler, ) -from opentelemetry.instrumentation.langchain.utils import ( +from opentelemetry.instrumentation.genai.langchain.utils import ( _get_property_value, - _prepare_tool_definitions, + prepare_tool_definitions, ) from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import ( @@ -88,12 +88,12 @@ class Plain: # --------------------------------------------------------------------------- -# Unit tests for _prepare_tool_definitions +# Unit tests for prepare_tool_definitions # --------------------------------------------------------------------------- def test_prepare_tool_definitions_returns_none_for_empty(): - assert _prepare_tool_definitions([]) is None + assert prepare_tool_definitions([]) is None def test_prepare_tool_definitions_dict_tools(): @@ -113,7 +113,7 @@ def test_prepare_tool_definitions_dict_tools(): }, } ] - result = _prepare_tool_definitions(tools) + result = prepare_tool_definitions(tools) assert result is not None assert len(result) == 1 defn = result[0] @@ -125,7 +125,7 @@ def test_prepare_tool_definitions_dict_tools(): def test_prepare_tool_definitions_skips_non_function_type(): tools = [{"type": "retrieval", "retrieval": {}}] - result = _prepare_tool_definitions(tools) + result = prepare_tool_definitions(tools) # No function-type tools → empty list returned (not None, but falsy) assert not result @@ -144,7 +144,7 @@ def test_prepare_tool_definitions_multiple_tools(): }, }, ] - result = _prepare_tool_definitions(tools) + result = prepare_tool_definitions(tools) assert result is not None assert len(result) == 2 assert result[0].name == "add" @@ -158,7 +158,7 @@ def test_prepare_tool_definitions_missing_name_defaults_to_empty_string(): "function": {"description": "No name tool"}, } ] - result = _prepare_tool_definitions(tools) + result = prepare_tool_definitions(tools) assert result is not None assert len(result) == 1 assert result[0].name == "" @@ -171,7 +171,7 @@ def test_prepare_tool_definitions_none_description_stays_none(): "function": {"name": "no_desc"}, } ] - result = _prepare_tool_definitions(tools) + result = prepare_tool_definitions(tools) assert result is not None assert result[0].description is None @@ -188,7 +188,7 @@ class ToolDef: type = "function" function = FuncDef() - result = _prepare_tool_definitions([ToolDef()]) + result = prepare_tool_definitions([ToolDef()]) assert result is not None assert len(result) == 1 assert result[0].name == "get_weather" @@ -249,6 +249,10 @@ def test_on_tool_start_and_end_creates_span(monkeypatch): monkeypatch.setenv( "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -289,6 +293,10 @@ def test_on_tool_start_with_string_input(monkeypatch): monkeypatch.setenv( "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -577,3 +585,354 @@ def multiply(a: int, b: int) -> int: assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" finally: instrumentor.uninstrument() + + +# --------------------------------------------------------------------------- +# Content capturing off — arguments and result suppressed +# --------------------------------------------------------------------------- + + +def test_on_tool_start_and_end_no_content_capture_suppresses_arguments( + monkeypatch, +): + """Without content capture, arguments and result are absent from the span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + # OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT intentionally not set + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "multiply", "description": "Multiply two numbers"}, + input_str="", + run_id=run_id, + inputs={"a": 3, "b": 4}, + ) + output = MagicMock() + output.content = "12" + output.tool_call_id = "call_abc" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_NAME] == "multiply" + assert gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS not in attrs + assert gen_ai_attributes.GEN_AI_TOOL_CALL_RESULT not in attrs + + +def test_on_tool_end_captures_result_with_span_only_mode(monkeypatch): + """tool_result is set on the span when content capture is SPAN_ONLY.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "lookup"}, + input_str="query", + run_id=run_id, + ) + output = MagicMock() + output.content = "result text" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_RESULT] == "result text" + + +# --------------------------------------------------------------------------- +# on_tool_end attribute types +# --------------------------------------------------------------------------- + + +def test_on_tool_end_sets_tool_call_id_attribute(monkeypatch): + """tool_call_id from the output object is set on the span.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "mytool"}, + input_str="", + run_id=run_id, + ) + output = MagicMock() + output.content = "done" + output.tool_call_id = "call_xyz" + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ID] == "call_xyz" + + +def test_on_tool_end_with_none_tool_call_id_omits_attribute(monkeypatch): + """tool_call_id is absent when the output carries no call id.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "mytool"}, + input_str="", + run_id=run_id, + ) + output = MagicMock() + output.content = "done" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_TOOL_CALL_ID not in attrs + + +# --------------------------------------------------------------------------- +# on_tool_end / on_tool_error with unknown run_id — must not raise +# --------------------------------------------------------------------------- + + +def test_on_tool_end_unknown_run_id_does_not_raise(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + output = MagicMock() + output.content = "result" + output.tool_call_id = None + # No on_tool_start was called — should be a no-op + handler.on_tool_end(output=output, run_id=uuid4()) + + assert len(span_exporter.get_finished_spans()) == 0 + + +def test_on_tool_error_unknown_run_id_does_not_raise(monkeypatch): + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + handler.on_tool_error(error=RuntimeError("boom"), run_id=uuid4()) + + assert len(span_exporter.get_finished_spans()) == 0 + + +# --------------------------------------------------------------------------- +# on_tool_start: inputs=None falls back to input_str +# --------------------------------------------------------------------------- + + +def test_on_tool_start_uses_input_str_when_inputs_is_none(monkeypatch): + """When inputs kwarg is absent (None), input_str is used for arguments.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + # inputs not passed → defaults to None → callback uses input_str + handler.on_tool_start( + serialized={"name": "greet"}, + input_str="hello world", + run_id=run_id, + ) + output = MagicMock() + output.content = "hi" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == "hello world" + + +def test_on_tool_start_inputs_takes_priority_over_input_str(monkeypatch): + """When both inputs dict and input_str are provided, inputs dict wins.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_tool_start( + serialized={"name": "add"}, + input_str="ignored", + run_id=run_id, + inputs={"x": 1, "y": 2}, + ) + output = MagicMock() + output.content = "3" + output.tool_call_id = None + handler.on_tool_end(output=output, run_id=run_id) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert attrs[gen_ai_attributes.GEN_AI_TOOL_CALL_ARGUMENTS] == json.dumps( + {"x": 1, "y": 2} + ) + + +# --------------------------------------------------------------------------- +# on_chat_model_start: functions key as alternative to tools +# --------------------------------------------------------------------------- + + +def test_on_chat_model_start_with_functions_key_sets_definitions(monkeypatch): + """Tool definitions are also picked up from the 'functions' invocation param.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + functions = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Get current weather", + }, + } + ] + + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="What's the weather?")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params={ + **_OPENAI_INVOCATION_PARAMS, + "functions": functions, + }, + ) + ai_msg = AIMessage(content="It is sunny.") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="It is sunny.") + generation.generation_info = {"finish_reason": "stop"} + handler.on_llm_end( + response=LLMResult(generations=[[generation]]), run_id=run_id + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + attrs = spans[0].attributes + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS in attrs + assert "get_weather" in attrs[gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS] + + +def test_on_chat_model_start_without_tools_omits_definitions(monkeypatch): + """No tool_definitions attribute when invocation_params has no tools.""" + monkeypatch.setenv( + "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" + ) + monkeypatch.setenv( + "OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT", "SPAN_ONLY" + ) + _enable_experimental_mode() + tracer_provider, span_exporter, logger_provider, meter_provider = ( + _make_providers() + ) + handler = _make_callback_handler( + tracer_provider, logger_provider, meter_provider + ) + + run_id = uuid4() + handler.on_chat_model_start( + serialized=_OPENAI_SERIALIZED, + messages=[[HumanMessage(content="Hello")]], + run_id=run_id, + metadata=_OPENAI_METADATA, + invocation_params=_OPENAI_INVOCATION_PARAMS, + ) + ai_msg = AIMessage(content="Hi") + ai_msg.response_metadata = {"finish_reason": "stop"} + generation = ChatGeneration(message=ai_msg, text="Hi") + generation.generation_info = {"finish_reason": "stop"} + handler.on_llm_end( + response=LLMResult(generations=[[generation]]), run_id=run_id + ) + + spans = span_exporter.get_finished_spans() + assert len(spans) == 1 + assert gen_ai_attributes.GEN_AI_TOOL_DEFINITIONS not in spans[0].attributes diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py index ef8c2506..e05b21ab 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_inference_invocation.py @@ -289,7 +289,6 @@ def _start_with_handler( inv.max_tokens = self.max_tokens inv.stop_sequences = self.stop_sequences inv.seed = self.seed - inv.tool_definitions = self.tool_definitions inv.attributes.update(self.attributes) inv.metric_attributes.update(self.metric_attributes) self._inference_invocation = inv @@ -317,7 +316,6 @@ def _sync_to_invocation(self) -> None: inv.seed = self.seed inv.server_address = self.server_address inv.server_port = self.server_port - inv.tool_definitions = self.tool_definitions inv.attributes = self.attributes inv.metric_attributes = self.metric_attributes From 8c5adadd634d14c592a9f0c21cb29faa34dbfa2f Mon Sep 17 00:00:00 2001 From: Wrisa Date: Fri, 5 Jun 2026 09:58:45 -0700 Subject: [PATCH 08/14] fixed error --- .../instrumentation/genai/langchain/callback_handler.py | 1 + 1 file changed, 1 insertion(+) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 080e3927..cdb667a6 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -280,6 +280,7 @@ def on_chat_model_start( llm_invocation = self._telemetry_handler.inference( provider, + request_model=request_model, ) llm_invocation.input_messages = input_messages llm_invocation.top_p = top_p From efb82649d5b349ea0f148f10f79f3ae2883e5b0f Mon Sep 17 00:00:00 2001 From: Wrisa Date: Fri, 5 Jun 2026 12:24:50 -0700 Subject: [PATCH 09/14] fixed tool.type --- .../instrumentation/genai/langchain/callback_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index cdb667a6..39b85ad3 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -454,7 +454,7 @@ def on_tool_start( else: arguments = None tool_invocation = self._telemetry_handler.start_tool( - name=name, tool_description=description + name=name, tool_description=description, tool_type="function" ) tool_invocation.arguments = arguments self._invocation_manager.add_invocation_state( From 2a95a90263fce744edeedd85f0e7cc299e9a35ed Mon Sep 17 00:00:00 2001 From: Wrisa Date: Fri, 5 Jun 2026 12:42:51 -0700 Subject: [PATCH 10/14] added conformance tests --- .../cassettes/tool_calling_conformance.yaml | 368 ++++++++++++++++++ .../tests/conformance/tool_calling.py | 136 +++++++ .../tests/test_conformance.py | 2 + 3 files changed, 506 insertions(+) create mode 100644 instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml create mode 100644 instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml new file mode 100644 index 00000000..91244a0d --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/cassettes/tool_calling_conformance.yaml @@ -0,0 +1,368 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "system", + "content": "You're a helpful assistant." + }, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?" + } + ], + "model": "gpt-4o-mini", + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA" + } + }, + "required": [ + "location" + ], + "additionalProperties": false + } + } + } + ] + } + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.37.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.37.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.11 + authorization: + - Bearer test_openai_api_key + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-toolcall001first", + "object": "chat.completion", + "created": 1771535300, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": null, + "refusal": null, + "tool_calls": [ + { + "id": "call_weather_seattle", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"Seattle, WA\"}" + } + }, + { + "id": "call_weather_sf", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, CA\"}" + } + } + ] + }, + "logprobs": null, + "finish_reason": "tool_calls" + } + ], + "usage": { + "prompt_tokens": 75, + "completion_tokens": 51, + "total_tokens": 126, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 10:00:00 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '750' + openai-organization: test_openai_org_id + openai-processing-ms: + - '250' + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199900' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 5ms + x-request-id: + - req_toolcall001first + status: + code: 200 + message: OK +- request: + body: |- + { + "messages": [ + { + "role": "system", + "content": "You're a helpful assistant." + }, + { + "role": "user", + "content": "What's the weather in Seattle and San Francisco today?" + }, + { + "role": "assistant", + "content": null, + "tool_calls": [ + { + "id": "call_weather_seattle", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"Seattle, WA\"}" + } + }, + { + "id": "call_weather_sf", + "type": "function", + "function": { + "name": "get_current_weather", + "arguments": "{\"location\": \"San Francisco, CA\"}" + } + } + ] + }, + { + "role": "tool", + "content": "50 degrees and raining", + "tool_call_id": "call_weather_seattle" + }, + { + "role": "tool", + "content": "70 degrees and sunny", + "tool_call_id": "call_weather_sf" + } + ], + "model": "gpt-4o-mini", + "stream": false, + "tool_choice": "auto", + "tools": [ + { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA" + } + }, + "required": [ + "location" + ], + "additionalProperties": false + } + } + } + ] + } + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - OpenAI/Python 2.37.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - 'false' + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.37.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.12.11 + authorization: + - Bearer test_openai_api_key + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-toolcall001final", + "object": "chat.completion", + "created": 1771535301, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "Today, Seattle is experiencing 50 degrees and raining, while San Francisco has a pleasant 70 degrees and sunny weather.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 99, + "completion_tokens": 24, + "total_tokens": 123, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "service_tier": "default", + "system_fingerprint": null + } + headers: + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Thu, 21 May 2026 10:00:01 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + content-length: + - '600' + openai-organization: test_openai_org_id + openai-processing-ms: + - '180' + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9998' + x-ratelimit-remaining-tokens: + - '199800' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 5ms + x-request-id: + - req_toolcall001final + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py new file mode 100644 index 00000000..9dd04fe6 --- /dev/null +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py @@ -0,0 +1,136 @@ +# Copyright The OpenTelemetry Authors +# SPDX-License-Identifier: Apache-2.0 + +"""Conformance scenario: langchain chat with tool calling via ChatOpenAI.""" + +from __future__ import annotations + +import json +import os +from typing import Any +from unittest import mock + +from langchain_core.messages import ( + AIMessage, + HumanMessage, + SystemMessage, + ToolMessage, +) +from langchain_openai import ChatOpenAI + +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk.metrics import MeterProvider +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.test.weaver_live_check import LiveCheckReport +from opentelemetry.test_util_genai.conformance import Scenario +from opentelemetry.test_util_genai.instrumentor import instrument + +DEFAULT_MODEL = "gpt-4o-mini" +WEATHER_TOOL_PROMPT = [ + SystemMessage(content="You're a helpful assistant."), + HumanMessage( + content="What's the weather in Seattle and San Francisco today?" + ), +] +# Tool outputs are pinned to the recorded cassette's second request body. +WEATHER_BY_LOCATION: dict[str, str] = { + "Seattle, WA": "50 degrees and raining", + "San Francisco, CA": "70 degrees and sunny", +} + + +def _get_current_weather_tool_definition() -> dict[str, Any]: + return { + "type": "function", + "function": { + "name": "get_current_weather", + "description": "Get the current weather in a given location", + "parameters": { + "type": "object", + "properties": { + "location": { + "type": "string", + "description": "The city and state, e.g. Boston, MA", + }, + }, + "required": ["location"], + "additionalProperties": False, + }, + }, + } + + +def _execute_weather_tool(arguments: str) -> str: + location = json.loads(arguments)["location"] + return WEATHER_BY_LOCATION[location] + + +class ToolCallingScenario(Scenario): + expected_spans = ("chat",) + expected_metrics = ( + "gen_ai.client.operation.duration", + "gen_ai.client.token.usage", + ) + + def run( + self, + *, + tracer_provider: TracerProvider, + meter_provider: MeterProvider, + logger_provider: LoggerProvider, + vcr: Any, + ) -> None: + key_override = ( + {} + if os.getenv("OPENAI_API_KEY") + else {"OPENAI_API_KEY": "test_openai_api_key"} + ) + with mock.patch.dict(os.environ, key_override): + with instrument( + LangChainInstrumentor(), + tracer_provider=tracer_provider, + logger_provider=logger_provider, + meter_provider=meter_provider, + semconv="gen_ai_latest_experimental", + content_capture="SPAN_ONLY", + ): + llm = ChatOpenAI( + model=DEFAULT_MODEL, + tool_choice="auto", + ) + llm_with_tools = llm.bind_tools( + [_get_current_weather_tool_definition()] + ) + + messages: list[Any] = list(WEATHER_TOOL_PROMPT) + + with vcr.use_cassette("tool_calling_conformance.yaml"): + first_response: AIMessage = llm_with_tools.invoke(messages) + messages.append(first_response) + + for tool_call in first_response.tool_calls: + messages.append( + ToolMessage( + content=_execute_weather_tool( + json.dumps(tool_call["args"]) + ), + tool_call_id=tool_call["id"], + ) + ) + + llm_with_tools.invoke(messages) + + def validate(self, report: LiveCheckReport) -> None: + super().validate(report) + operations = [ + attr["value"] + for entry in report["samples"] + if "span" in entry + for attr in entry["span"]["attributes"] + if attr["name"] == "gen_ai.operation.name" + ] + assert operations == ["chat", "chat"], ( + "Tool calling exercises two chat completions (initial request and " + f"follow-up with tool results); saw spans {operations}" + ) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py index 3bdf216b..d48fe98e 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_conformance.py @@ -22,6 +22,7 @@ from .conformance.agent import AgentScenario from .conformance.inference import InferenceScenario +from .conformance.tool_calling import ToolCallingScenario from .conformance.workflow import WorkflowScenario @@ -30,6 +31,7 @@ [ InferenceScenario(), AgentScenario(), + ToolCallingScenario(), WorkflowScenario(), ], ids=lambda s: type(s).__name__, From a89f6c74915e2b683f97539fdc8fe4dae4768a0e Mon Sep 17 00:00:00 2001 From: Wrisa Date: Thu, 11 Jun 2026 11:49:24 -0700 Subject: [PATCH 11/14] addressed review comments --- .../.changelog/37.added | 2 +- .../examples/tools/main.py | 7 ++-- .../genai/langchain/callback_handler.py | 10 +++--- .../instrumentation/genai/langchain/utils.py | 2 +- .../tests/conformance/tool_calling.py | 32 +++++++++++++++---- .../tests/test_tools.py | 6 ++-- 6 files changed, 39 insertions(+), 20 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added index 13e13bb9..d78e078f 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/.changelog/37.added @@ -1 +1 @@ -Added tools span and tools definitions in inference span. +Added tool spans and captured tool definitions on inference spans. diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py index 847dea14..74d5001b 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/examples/tools/main.py @@ -27,7 +27,7 @@ from opentelemetry.exporter.otlp.proto.grpc.trace_exporter import ( OTLPSpanExporter, ) -from opentelemetry.instrumentation.langchain import LangChainInstrumentor +from opentelemetry.instrumentation.genai.langchain import LangChainInstrumentor from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import BatchLogRecordProcessor from opentelemetry.sdk.metrics import MeterProvider @@ -75,7 +75,8 @@ def add(a: float, b: float) -> float: def main() -> None: - LangChainInstrumentor().instrument() + instrumentor = LangChainInstrumentor() + instrumentor.instrument() llm = ChatOpenAI( model="gpt-3.5-turbo", @@ -109,7 +110,7 @@ def main() -> None: print("Final answer:", response.content) - LangChainInstrumentor().uninstrument() + instrumentor.uninstrument() if __name__ == "__main__": diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 39b85ad3..a6bb4970 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -290,10 +290,8 @@ def on_chat_model_start( llm_invocation.seed = seed llm_invocation.temperature = temperature llm_invocation.max_tokens = max_tokens - if "invocation_params" in kwargs: - tools = kwargs["invocation_params"].get("tools") or kwargs[ - "invocation_params" - ].get("functions") + if params is not None: + tools = params.get("tools") or params.get("functions") if tools: tool_definitions = prepare_tool_definitions(tools) llm_invocation.tool_definitions = tool_definitions @@ -354,7 +352,7 @@ def on_llm_end( ) tool_calls.append(tool_call_request) output_message = OutputMessage( - role="assistant", + role=chat_generation.message.type, parts=cast(list[MessagePart], tool_calls), finish_reason=finish_reason, ) @@ -453,7 +451,7 @@ def on_tool_start( arguments = raw_arguments else: arguments = None - tool_invocation = self._telemetry_handler.start_tool( + tool_invocation = self._telemetry_handler.tool( name=name, tool_description=description, tool_type="function" ) tool_invocation.arguments = arguments diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py index 1c60e25c..45fba99b 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/utils.py @@ -43,7 +43,7 @@ def prepare_tool_definitions(tools: list[Any]) -> list[ToolDefinition] | None: parameters=_get_property_value(func, "parameters"), ) ) - return definitions + return definitions or None def make_input_message(data: Any) -> list[InputMessage]: diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py index 9dd04fe6..611fc1ce 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py @@ -25,6 +25,7 @@ from opentelemetry.test.weaver_live_check import LiveCheckReport from opentelemetry.test_util_genai.conformance import Scenario from opentelemetry.test_util_genai.instrumentor import instrument +from opentelemetry.util.genai.handler import TelemetryHandler DEFAULT_MODEL = "gpt-4o-mini" WEATHER_TOOL_PROMPT = [ @@ -67,7 +68,7 @@ def _execute_weather_tool(arguments: str) -> str: class ToolCallingScenario(Scenario): - expected_spans = ("chat",) + expected_spans = ("chat", "execute_tool") expected_metrics = ( "gen_ai.client.operation.duration", "gen_ai.client.token.usage", @@ -86,6 +87,11 @@ def run( if os.getenv("OPENAI_API_KEY") else {"OPENAI_API_KEY": "test_openai_api_key"} ) + tool_handler = TelemetryHandler( + tracer_provider=tracer_provider, + meter_provider=meter_provider, + logger_provider=logger_provider, + ) with mock.patch.dict(os.environ, key_override): with instrument( LangChainInstrumentor(), @@ -110,11 +116,18 @@ def run( messages.append(first_response) for tool_call in first_response.tool_calls: + with tool_handler.tool( + tool_call["name"], + tool_call_id=tool_call["id"], + tool_type="function", + ) as invocation: + result = _execute_weather_tool( + json.dumps(tool_call["args"]) + ) + invocation.tool_result = result messages.append( ToolMessage( - content=_execute_weather_tool( - json.dumps(tool_call["args"]) - ), + content=result, tool_call_id=tool_call["id"], ) ) @@ -130,7 +143,12 @@ def validate(self, report: LiveCheckReport) -> None: for attr in entry["span"]["attributes"] if attr["name"] == "gen_ai.operation.name" ] - assert operations == ["chat", "chat"], ( - "Tool calling exercises two chat completions (initial request and " - f"follow-up with tool results); saw spans {operations}" + assert operations == [ + "chat", + "execute_tool", + "execute_tool", + "chat", + ], ( + "Tool calling exercises two chat completions with two execute_tool " + "spans in between (one per tool call); saw spans {operations}" ) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py index abfd2e5c..db78a083 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/test_tools.py @@ -126,8 +126,7 @@ def test_prepare_tool_definitions_dict_tools(): def test_prepare_tool_definitions_skips_non_function_type(): tools = [{"type": "retrieval", "retrieval": {}}] result = prepare_tool_definitions(tools) - # No function-type tools → empty list returned (not None, but falsy) - assert not result + assert result is None def test_prepare_tool_definitions_multiple_tools(): @@ -329,6 +328,7 @@ def test_on_tool_start_with_no_serialized(monkeypatch): monkeypatch.setenv( "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" ) + _enable_experimental_mode() tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -357,6 +357,7 @@ def test_on_tool_error_records_error_type(monkeypatch): monkeypatch.setenv( "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" ) + _enable_experimental_mode() tracer_provider, span_exporter, logger_provider, meter_provider = ( _make_providers() ) @@ -546,6 +547,7 @@ def test_tool_span_created_via_instrumentor(monkeypatch): monkeypatch.setenv( "OTEL_SEMCONV_STABILITY_OPT_IN", "gen_ai_latest_experimental" ) + _enable_experimental_mode() span_exporter = InMemorySpanExporter() tracer_provider = TracerProvider() From 459b1bb86700fccbefb50f145dce85a2e05eb2ca Mon Sep 17 00:00:00 2001 From: Wrisa Date: Thu, 11 Jun 2026 11:57:08 -0700 Subject: [PATCH 12/14] fixed error --- .../instrumentation/genai/langchain/callback_handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index a6bb4970..84aa6b78 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -254,7 +254,7 @@ def on_chat_model_start( for sub_messages in messages: for message in sub_messages: # Cast to Any to avoid type checking issues with LangChain's complex content type - raw_content: Any = message.content # type: ignore[misc] + raw_content: Any = message.content role = message.type parts: list[Text] = [] From 72e77242caec6472048be269d880fc352823001d Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 15 Jun 2026 09:31:55 -0700 Subject: [PATCH 13/14] fixed conformance tests --- .../genai/langchain/callback_handler.py | 18 ++++++++++++++++++ .../tests/conformance/tool_calling.py | 12 +++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index 84aa6b78..d25023cb 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -24,6 +24,9 @@ make_last_output_message, prepare_tool_definitions, ) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.invocation import ( AgentInvocation, @@ -250,6 +253,21 @@ def on_chat_model_start( if "ls_max_tokens" in metadata: max_tokens = metadata.get("ls_max_tokens") + # Backfill provider on any ancestor invoke_agent span that was created + # before the provider was known (LangChain chain callbacks don't carry + # ls_provider; only the chat model callback does). + if provider != "unknown": + agent_invocation = self._find_nearest_agent(parent_run_id) + if ( + agent_invocation is not None + and agent_invocation.provider == "unknown" + and agent_invocation.span.is_recording() + ): + agent_invocation.provider = provider + agent_invocation.span.set_attribute( + GenAI.GEN_AI_PROVIDER_NAME, provider + ) + input_messages: list[InputMessage] = [] for sub_messages in messages: for message in sub_messages: diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py index 611fc1ce..820f2331 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py @@ -23,7 +23,10 @@ from opentelemetry.sdk.metrics import MeterProvider from opentelemetry.sdk.trace import TracerProvider from opentelemetry.test.weaver_live_check import LiveCheckReport -from opentelemetry.test_util_genai.conformance import Scenario +from opentelemetry.test_util_genai.conformance import ( + ExpectedViolation, + Scenario, +) from opentelemetry.test_util_genai.instrumentor import instrument from opentelemetry.util.genai.handler import TelemetryHandler @@ -73,6 +76,13 @@ class ToolCallingScenario(Scenario): "gen_ai.client.operation.duration", "gen_ai.client.token.usage", ) + # langchain can't populate server.address on chat spans. + expected_violations = ( + ExpectedViolation( + advice_id="genai_expected_attribute_missing", + message_substring="server.address", + ), + ) def run( self, From bd79d8ce9660f5eb45e7e0af3c892fcf2776c186 Mon Sep 17 00:00:00 2001 From: Wrisa Date: Mon, 15 Jun 2026 10:52:22 -0700 Subject: [PATCH 14/14] fixed conformance tests and reverted change --- .../genai/langchain/callback_handler.py | 18 ------------------ .../tests/conformance/agent.py | 6 ++++++ .../tests/conformance/tool_calling.py | 5 +++++ 3 files changed, 11 insertions(+), 18 deletions(-) diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py index d25023cb..84aa6b78 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/src/opentelemetry/instrumentation/genai/langchain/callback_handler.py @@ -24,9 +24,6 @@ make_last_output_message, prepare_tool_definitions, ) -from opentelemetry.semconv._incubating.attributes import ( - gen_ai_attributes as GenAI, -) from opentelemetry.util.genai.handler import TelemetryHandler from opentelemetry.util.genai.invocation import ( AgentInvocation, @@ -253,21 +250,6 @@ def on_chat_model_start( if "ls_max_tokens" in metadata: max_tokens = metadata.get("ls_max_tokens") - # Backfill provider on any ancestor invoke_agent span that was created - # before the provider was known (LangChain chain callbacks don't carry - # ls_provider; only the chat model callback does). - if provider != "unknown": - agent_invocation = self._find_nearest_agent(parent_run_id) - if ( - agent_invocation is not None - and agent_invocation.provider == "unknown" - and agent_invocation.span.is_recording() - ): - agent_invocation.provider = provider - agent_invocation.span.set_attribute( - GenAI.GEN_AI_PROVIDER_NAME, provider - ) - input_messages: list[InputMessage] = [] for sub_messages in messages: for message in sub_messages: diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py index 0fba6ca4..79708e71 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/agent.py @@ -45,11 +45,17 @@ class AgentScenario(Scenario): "gen_ai.client.token.usage", ) # langchain can't populate server.address on chat spans. + # invoke_agent provider is unknown at span creation; ls_provider is only + # available on the chat model callback, not the chain callback. expected_violations = ( ExpectedViolation( advice_id="genai_expected_attribute_missing", message_substring="server.address", ), + ExpectedViolation( + advice_id="required_attribute_not_present", + message_substring="gen_ai.provider.name", + ), ) def run( diff --git a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py index 820f2331..aa5c8e07 100644 --- a/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py +++ b/instrumentation/opentelemetry-instrumentation-genai-langchain/tests/conformance/tool_calling.py @@ -77,11 +77,16 @@ class ToolCallingScenario(Scenario): "gen_ai.client.token.usage", ) # langchain can't populate server.address on chat spans. + # execute_tool spans are provider-agnostic; gen_ai.provider.name is not available. expected_violations = ( ExpectedViolation( advice_id="genai_expected_attribute_missing", message_substring="server.address", ), + ExpectedViolation( + advice_id="required_attribute_not_present", + message_substring="gen_ai.provider.name", + ), ) def run(