GenAI Utils | Add metrics to ToolInvocations (#4443)

keith-decker · web-flow · commit c9be5e1f8041 · 2026-04-21T16:16:01.000-04:00
* toolinvocation metrics and tests

* lint fixes

* changelog

* remove provider and server attributes from tool metrics

* nit fix
diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 of repeatedly failing on every upload ([#4390](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4390)).
 - Refactor public API: add factory methods (`start_inference`, `start_embedding`, `start_tool`, `start_workflow`) and invocation-owned lifecycle (`invocation.stop()` / `invocation.fail(exc)`); rename `LLMInvocation` → `InferenceInvocation` and `ToolCall` → `ToolInvocation`. Existing usages remain fully functional via deprecated aliases.
   ([#4391](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4391))
+- Add metrics to ToolInvocations ([#4443](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4443))
 
 
 ## Version 0.3b0 (2026-02-20)
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py
@@ -79,6 +79,13 @@ def __init__(
         self.tool_result = tool_result
         self._start()
 
+    def _get_metric_attributes(self) -> dict[str, Any]:
+        attrs: dict[str, Any] = {
+            GenAI.GEN_AI_OPERATION_NAME: self._operation_name,
+        }
+        attrs.update(self.metric_attributes)
+        return attrs
+
     def _apply_finish(self, error: Error | None = None) -> None:
         if error is not None:
             self._apply_error_attributes(error)
@@ -94,3 +101,4 @@ def _apply_finish(self, error: Error | None = None) -> None:
         }
         attributes.update(self.attributes)
         self.span.set_attributes(attributes)
+        self._metrics_recorder.record(self)
diff --git a/util/opentelemetry-util-genai/tests/test_handler_metrics.py b/util/opentelemetry-util-genai/tests/test_handler_metrics.py
@@ -323,3 +323,69 @@ def test_stop_embedding_without_tokens(self) -> None:
 
         # Token metrics should NOT be recorded when input_tokens is not set
         self.assertNotIn("gen_ai.client.token.usage", metrics)
+
+
+class TelemetryHandlerToolMetricsTest(TestBase):
+    def _harvest_metrics(self) -> Dict[str, List[Any]]:
+        metrics = self.get_sorted_metrics()
+        metrics_by_name: Dict[str, List[Any]] = {}
+        for metric in metrics or []:
+            points = metric.data.data_points or []
+            metrics_by_name.setdefault(metric.name, []).extend(points)
+        return metrics_by_name
+
+    def test_stop_tool_records_duration(self) -> None:
+        handler = TelemetryHandler(
+            tracer_provider=self.tracer_provider,
+            meter_provider=self.meter_provider,
+        )
+        with patch("timeit.default_timer", return_value=1000.0):
+            invocation = handler.start_tool("get_weather")
+        invocation.metric_attributes = {"custom.key": "custom_value"}
+
+        with patch("timeit.default_timer", return_value=1002.5):
+            invocation.stop()
+
+        metrics = self._harvest_metrics()
+        self.assertIn("gen_ai.client.operation.duration", metrics)
+        duration_points = metrics["gen_ai.client.operation.duration"]
+        self.assertEqual(len(duration_points), 1)
+        duration_point = duration_points[0]
+
+        self.assertEqual(
+            duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
+            "execute_tool",
+        )
+        self.assertEqual(
+            duration_point.attributes["custom.key"], "custom_value"
+        )
+        self.assertAlmostEqual(duration_point.sum, 2.5, places=3)
+        self.assertNotIn("gen_ai.client.token.usage", metrics)
+
+    def test_fail_tool_records_duration_with_error(self) -> None:
+        handler = TelemetryHandler(
+            tracer_provider=self.tracer_provider,
+            meter_provider=self.meter_provider,
+        )
+        with patch("timeit.default_timer", return_value=500.0):
+            invocation = handler.start_tool("failing_tool")
+
+        error = Error(message="Tool execution failed", type=RuntimeError)
+        with patch("timeit.default_timer", return_value=501.5):
+            invocation.fail(error)
+
+        metrics = self._harvest_metrics()
+        self.assertIn("gen_ai.client.operation.duration", metrics)
+        duration_points = metrics["gen_ai.client.operation.duration"]
+        self.assertEqual(len(duration_points), 1)
+        duration_point = duration_points[0]
+
+        self.assertEqual(
+            duration_point.attributes["error.type"], "RuntimeError"
+        )
+        self.assertEqual(
+            duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
+            "execute_tool",
+        )
+        self.assertAlmostEqual(duration_point.sum, 1.5, places=3)
+        self.assertNotIn("gen_ai.client.token.usage", metrics)