Skip to content

Commit c9be5e1

Browse files
authored
GenAI Utils | Add metrics to ToolInvocations (#4443)
* toolinvocation metrics and tests * lint fixes * changelog * remove provider and server attributes from tool metrics * nit fix
1 parent 87a01e6 commit c9be5e1

3 files changed

Lines changed: 75 additions & 0 deletions

File tree

util/opentelemetry-util-genai/CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2323
of repeatedly failing on every upload ([#4390](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4390)).
2424
- Refactor public API: add factory methods (`start_inference`, `start_embedding`, `start_tool`, `start_workflow`) and invocation-owned lifecycle (`invocation.stop()` / `invocation.fail(exc)`); rename `LLMInvocation``InferenceInvocation` and `ToolCall``ToolInvocation`. Existing usages remain fully functional via deprecated aliases.
2525
([#4391](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4391))
26+
- Add metrics to ToolInvocations ([#4443](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4443))
2627

2728

2829
## Version 0.3b0 (2026-02-20)

util/opentelemetry-util-genai/src/opentelemetry/util/genai/_tool_invocation.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,13 @@ def __init__(
7979
self.tool_result = tool_result
8080
self._start()
8181

82+
def _get_metric_attributes(self) -> dict[str, Any]:
83+
attrs: dict[str, Any] = {
84+
GenAI.GEN_AI_OPERATION_NAME: self._operation_name,
85+
}
86+
attrs.update(self.metric_attributes)
87+
return attrs
88+
8289
def _apply_finish(self, error: Error | None = None) -> None:
8390
if error is not None:
8491
self._apply_error_attributes(error)
@@ -94,3 +101,4 @@ def _apply_finish(self, error: Error | None = None) -> None:
94101
}
95102
attributes.update(self.attributes)
96103
self.span.set_attributes(attributes)
104+
self._metrics_recorder.record(self)

util/opentelemetry-util-genai/tests/test_handler_metrics.py

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -323,3 +323,69 @@ def test_stop_embedding_without_tokens(self) -> None:
323323

324324
# Token metrics should NOT be recorded when input_tokens is not set
325325
self.assertNotIn("gen_ai.client.token.usage", metrics)
326+
327+
328+
class TelemetryHandlerToolMetricsTest(TestBase):
329+
def _harvest_metrics(self) -> Dict[str, List[Any]]:
330+
metrics = self.get_sorted_metrics()
331+
metrics_by_name: Dict[str, List[Any]] = {}
332+
for metric in metrics or []:
333+
points = metric.data.data_points or []
334+
metrics_by_name.setdefault(metric.name, []).extend(points)
335+
return metrics_by_name
336+
337+
def test_stop_tool_records_duration(self) -> None:
338+
handler = TelemetryHandler(
339+
tracer_provider=self.tracer_provider,
340+
meter_provider=self.meter_provider,
341+
)
342+
with patch("timeit.default_timer", return_value=1000.0):
343+
invocation = handler.start_tool("get_weather")
344+
invocation.metric_attributes = {"custom.key": "custom_value"}
345+
346+
with patch("timeit.default_timer", return_value=1002.5):
347+
invocation.stop()
348+
349+
metrics = self._harvest_metrics()
350+
self.assertIn("gen_ai.client.operation.duration", metrics)
351+
duration_points = metrics["gen_ai.client.operation.duration"]
352+
self.assertEqual(len(duration_points), 1)
353+
duration_point = duration_points[0]
354+
355+
self.assertEqual(
356+
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
357+
"execute_tool",
358+
)
359+
self.assertEqual(
360+
duration_point.attributes["custom.key"], "custom_value"
361+
)
362+
self.assertAlmostEqual(duration_point.sum, 2.5, places=3)
363+
self.assertNotIn("gen_ai.client.token.usage", metrics)
364+
365+
def test_fail_tool_records_duration_with_error(self) -> None:
366+
handler = TelemetryHandler(
367+
tracer_provider=self.tracer_provider,
368+
meter_provider=self.meter_provider,
369+
)
370+
with patch("timeit.default_timer", return_value=500.0):
371+
invocation = handler.start_tool("failing_tool")
372+
373+
error = Error(message="Tool execution failed", type=RuntimeError)
374+
with patch("timeit.default_timer", return_value=501.5):
375+
invocation.fail(error)
376+
377+
metrics = self._harvest_metrics()
378+
self.assertIn("gen_ai.client.operation.duration", metrics)
379+
duration_points = metrics["gen_ai.client.operation.duration"]
380+
self.assertEqual(len(duration_points), 1)
381+
duration_point = duration_points[0]
382+
383+
self.assertEqual(
384+
duration_point.attributes["error.type"], "RuntimeError"
385+
)
386+
self.assertEqual(
387+
duration_point.attributes[GenAI.GEN_AI_OPERATION_NAME],
388+
"execute_tool",
389+
)
390+
self.assertAlmostEqual(duration_point.sum, 1.5, places=3)
391+
self.assertNotIn("gen_ai.client.token.usage", metrics)

0 commit comments

Comments
 (0)