Skip to content

Commit fec2c6d

Browse files
authored
Merge branch 'main' into 4209
2 parents 85aa873 + 7e07feb commit fec2c6d

5 files changed

Lines changed: 189 additions & 113 deletions

File tree

instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
11+
- Migrate experimental path from deprecated `LLMInvocation` to `InferenceInvocation`,
12+
using `handler.start_inference()` and `invocation.stop()`/`invocation.fail()` directly
13+
([#4502](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4502))
14+
- Use `create_duration_histogram` and `create_token_histogram` from
15+
`opentelemetry-util-genai` instead of defining bucket boundaries locally
16+
([#4501](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4501))
1017
- Import `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` from
1118
`opentelemetry.util.genai.environment_variables` instead of re-defining it locally,
1219
making `opentelemetry-util-genai` the single source of truth for this constant.
@@ -28,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
2835
- Add strongly typed Responses API extractors with validation and content
2936
extraction improvements
3037
([#4337](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4337))
38+
- Default empty string for `gen_ai.request.model` attribute on missing model.
39+
([#4494](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4494))
3140

3241
## Version 2.3b0 (2025-12-24)
3342

Lines changed: 7 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -1,52 +1,13 @@
11
from opentelemetry.metrics import Histogram, Meter
2-
from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
3-
4-
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
5-
0.01,
6-
0.02,
7-
0.04,
8-
0.08,
9-
0.16,
10-
0.32,
11-
0.64,
12-
1.28,
13-
2.56,
14-
5.12,
15-
10.24,
16-
20.48,
17-
40.96,
18-
81.92,
19-
]
20-
21-
_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
22-
1,
23-
4,
24-
16,
25-
64,
26-
256,
27-
1024,
28-
4096,
29-
16384,
30-
65536,
31-
262144,
32-
1048576,
33-
4194304,
34-
16777216,
35-
67108864,
36-
]
2+
from opentelemetry.util.genai.instruments import (
3+
create_duration_histogram,
4+
create_token_histogram,
5+
)
376

387

398
class Instruments:
409
def __init__(self, meter: Meter):
41-
self.operation_duration_histogram: Histogram = meter.create_histogram(
42-
name=gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION,
43-
description="GenAI operation duration",
44-
unit="s",
45-
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
46-
)
47-
self.token_usage_histogram: Histogram = meter.create_histogram(
48-
name=gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE,
49-
description="Measures number of input and output tokens used",
50-
unit="{token}",
51-
explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
10+
self.operation_duration_histogram: Histogram = (
11+
create_duration_histogram(meter)
5212
)
13+
self.token_usage_histogram: Histogram = create_token_histogram(meter)

instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py

Lines changed: 26 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
from opentelemetry.trace import Span, SpanKind, Tracer
3434
from opentelemetry.trace.propagation import set_span_in_context
3535
from opentelemetry.util.genai.handler import TelemetryHandler
36+
from opentelemetry.util.genai.invocation import InferenceInvocation
3637
from opentelemetry.util.genai.types import (
3738
ContentCapturingMode,
3839
Error,
39-
LLMInvocation, # pylint: disable=no-name-in-module # TODO: migrate to InferenceInvocation
4040
OutputMessage,
4141
Text,
4242
ToolCallRequest,
@@ -68,7 +68,9 @@ def traced_method(wrapped, instance, args, kwargs):
6868
**get_llm_request_attributes(kwargs, instance, False)
6969
}
7070

71-
span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
71+
operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
72+
model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
73+
span_name = f"{operation_name} {model}" if model else operation_name
7274
with tracer.start_as_current_span(
7375
name=span_name,
7476
kind=SpanKind.CLIENT,
@@ -128,10 +130,8 @@ def chat_completions_create_v_new(
128130
capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
129131

130132
def traced_method(wrapped, instance, args, kwargs):
131-
chat_invocation = handler.start_llm(
132-
create_chat_invocation(
133-
kwargs, instance, capture_content=capture_content
134-
)
133+
chat_invocation = create_chat_invocation(
134+
handler, kwargs, instance, capture_content=capture_content
135135
)
136136

137137
try:
@@ -143,18 +143,16 @@ def traced_method(wrapped, instance, args, kwargs):
143143
parsed_result = result
144144
if is_streaming(kwargs):
145145
return ChatStreamWrapper(
146-
parsed_result, handler, chat_invocation, capture_content
146+
parsed_result, chat_invocation, capture_content
147147
)
148148

149149
_set_response_properties(
150150
chat_invocation, parsed_result, capture_content
151151
)
152-
handler.stop_llm(chat_invocation)
152+
chat_invocation.stop()
153153
return result
154154
except Exception as error:
155-
handler.fail_llm(
156-
chat_invocation, Error(type=type(error), message=str(error))
157-
)
155+
chat_invocation.fail(Error(type=type(error), message=str(error)))
158156
raise
159157

160158
return traced_method
@@ -173,7 +171,9 @@ async def traced_method(wrapped, instance, args, kwargs):
173171
**get_llm_request_attributes(kwargs, instance, False)
174172
}
175173

176-
span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
174+
operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
175+
model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
176+
span_name = f"{operation_name} {model}" if model else operation_name
177177
with tracer.start_as_current_span(
178178
name=span_name,
179179
kind=SpanKind.CLIENT,
@@ -232,10 +232,8 @@ def async_chat_completions_create_v_new(
232232
capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
233233

234234
async def traced_method(wrapped, instance, args, kwargs):
235-
chat_invocation = handler.start_llm(
236-
create_chat_invocation(
237-
kwargs, instance, capture_content=capture_content
238-
)
235+
chat_invocation = create_chat_invocation(
236+
handler, kwargs, instance, capture_content=capture_content
239237
)
240238

241239
try:
@@ -247,19 +245,17 @@ async def traced_method(wrapped, instance, args, kwargs):
247245
parsed_result = result
248246
if is_streaming(kwargs):
249247
return ChatStreamWrapper(
250-
parsed_result, handler, chat_invocation, capture_content
248+
parsed_result, chat_invocation, capture_content
251249
)
252250

253251
_set_response_properties(
254252
chat_invocation, parsed_result, capture_content
255253
)
256-
handler.stop_llm(chat_invocation)
254+
chat_invocation.stop()
257255
return result
258256

259257
except Exception as error:
260-
handler.fail_llm(
261-
chat_invocation, Error(type=type(error), message=str(error))
262-
)
258+
chat_invocation.fail(Error(type=type(error), message=str(error)))
263259
raise
264260

265261
return traced_method
@@ -373,7 +369,9 @@ async def traced_method(wrapped, instance, args, kwargs):
373369

374370
def _get_embeddings_span_name(span_attributes):
375371
"""Get span name for embeddings operations."""
376-
return f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
372+
operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
373+
model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
374+
return f"{operation_name} {model}" if model else operation_name
377375

378376

379377
def _record_metrics(
@@ -495,8 +493,8 @@ def _set_response_attributes(span, result):
495493

496494

497495
def _set_response_properties(
498-
chat_invocation: LLMInvocation, result, capture_content: bool
499-
) -> LLMInvocation:
496+
chat_invocation: InferenceInvocation, result, capture_content: bool
497+
) -> InferenceInvocation:
500498
if getattr(result, "model", None):
501499
chat_invocation.response_model_name = result.model
502500

@@ -868,8 +866,7 @@ def cleanup(self, error: Optional[BaseException] = None):
868866

869867

870868
class ChatStreamWrapper(BaseStreamWrapper):
871-
handler: TelemetryHandler
872-
invocation: LLMInvocation
869+
invocation: InferenceInvocation
873870
response_id: Optional[str] = None
874871
response_model: Optional[str] = None
875872
service_tier: Optional[str] = None
@@ -880,13 +877,11 @@ class ChatStreamWrapper(BaseStreamWrapper):
880877
def __init__(
881878
self,
882879
stream: Stream,
883-
handler: TelemetryHandler,
884-
invocation: LLMInvocation,
880+
invocation: InferenceInvocation,
885881
capture_content: bool,
886882
):
887883
super().__init__(stream, capture_content=capture_content)
888884
self.stream = stream
889-
self.handler = handler
890885
self.invocation = invocation
891886
self.choice_buffers = []
892887

@@ -944,9 +939,7 @@ def cleanup(self, error: Optional[BaseException] = None):
944939
self._set_output_messages()
945940

946941
if error:
947-
self.handler.fail_llm(
948-
self.invocation, Error(type=type(error), message=str(error))
949-
)
942+
self.invocation.fail(Error(type=type(error), message=str(error)))
950943
else:
951-
self.handler.stop_llm(self.invocation)
944+
self.invocation.stop()
952945
self._started = False

instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py

Lines changed: 32 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@
3939
from opentelemetry.util.genai.environment_variables import (
4040
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
4141
)
42+
from opentelemetry.util.genai.handler import TelemetryHandler
43+
from opentelemetry.util.genai.invocation import InferenceInvocation
4244
from opentelemetry.util.genai.types import (
4345
InputMessage,
44-
LLMInvocation, # pylint: disable=no-name-in-module # TODO: migrate to InferenceInvocation
4546
OutputMessage,
4647
Text,
4748
ToolCallRequest,
@@ -210,13 +211,16 @@ def get_llm_request_attributes(
210211
latest_experimental_enabled,
211212
operation_name=GenAIAttributes.GenAiOperationNameValues.CHAT.value,
212213
):
213-
# pylint: disable=too-many-branches
214+
# pylint: disable=too-many-branches,too-many-locals
214215

215216
attributes = {
216217
GenAIAttributes.GEN_AI_OPERATION_NAME: operation_name,
217-
GenAIAttributes.GEN_AI_REQUEST_MODEL: kwargs.get("model"),
218218
}
219219

220+
model = kwargs.get("model")
221+
if model:
222+
attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
223+
220224
if latest_experimental_enabled:
221225
attributes.update(
222226
{
@@ -328,42 +332,37 @@ def get_llm_request_attributes(
328332

329333

330334
def create_chat_invocation(
335+
handler: TelemetryHandler,
331336
kwargs,
332337
client_instance,
333338
capture_content: bool,
334-
) -> LLMInvocation:
339+
) -> InferenceInvocation:
335340
# pylint: disable=too-many-branches
336341

337-
llm_invocation = LLMInvocation(request_model=kwargs.get("model", ""))
338-
llm_invocation.provider = (
339-
GenAIAttributes.GenAiProviderNameValues.OPENAI.value
340-
)
341-
llm_invocation.temperature = get_value(kwargs.get("temperature"))
342-
llm_invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
343-
llm_invocation.max_tokens = get_value(kwargs.get("max_tokens"))
344-
llm_invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
345-
llm_invocation.frequency_penalty = get_value(
346-
kwargs.get("frequency_penalty")
342+
address, port = get_server_address_and_port(client_instance)
343+
invocation = handler.start_inference(
344+
GenAIAttributes.GenAiProviderNameValues.OPENAI.value,
345+
request_model=kwargs.get("model", ""),
346+
server_address=address if address else None,
347+
server_port=port if port else None,
347348
)
348-
llm_invocation.seed = get_value(kwargs.get("seed"))
349+
invocation.temperature = get_value(kwargs.get("temperature"))
350+
invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
351+
invocation.max_tokens = get_value(kwargs.get("max_tokens"))
352+
invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
353+
invocation.frequency_penalty = get_value(kwargs.get("frequency_penalty"))
354+
invocation.seed = get_value(kwargs.get("seed"))
349355
if (stop_sequences := get_value(kwargs.get("stop"))) is not None:
350356
if isinstance(stop_sequences, str):
351357
stop_sequences = [stop_sequences]
352-
llm_invocation.stop_sequences = stop_sequences
358+
invocation.stop_sequences = stop_sequences
353359

354-
address, port = get_server_address_and_port(client_instance)
355-
if address:
356-
llm_invocation.server_address = address
357-
if port:
358-
llm_invocation.server_port = port
359-
360-
attributes = {}
361360
if (choice_count := get_value(kwargs.get("n"))) is not None:
362361
# Only add non default, meaningful values
363362
if isinstance(choice_count, int) and choice_count != 1:
364-
attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = (
365-
choice_count
366-
)
363+
invocation.attributes[
364+
GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT
365+
] = choice_count
367366

368367
if (
369368
response_format := get_value(kwargs.get("response_format"))
@@ -373,11 +372,11 @@ def create_chat_invocation(
373372
if (
374373
response_format_type := get_value(response_format.get("type"))
375374
) is not None:
376-
attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
375+
invocation.attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
377376
response_format_type
378377
)
379378
else:
380-
attributes[
379+
invocation.attributes[
381380
GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT
382381
] = response_format
383382

@@ -388,16 +387,15 @@ def create_chat_invocation(
388387
if isinstance(extra_body, Mapping):
389388
service_tier = get_value(extra_body.get("service_tier"))
390389
if service_tier is not None:
391-
attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = service_tier
392-
393-
if len(attributes) > 0:
394-
llm_invocation.attributes = attributes
390+
invocation.attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = (
391+
service_tier
392+
)
395393

396394
if capture_content: # optimization
397-
llm_invocation.input_messages = _prepare_input_messages(
395+
invocation.input_messages = _prepare_input_messages(
398396
kwargs.get("messages", [])
399397
)
400-
return llm_invocation
398+
return invocation
401399

402400

403401
def get_value(v: Any):

0 commit comments

Comments
 (0)