Skip to content

Commit b51543a

Browse files
authored
openai-v2: migrate _v_new path from LLMInvocation to InferenceInvocation (#4502)
1 parent e00e30d commit b51543a

3 files changed

Lines changed: 48 additions & 62 deletions

File tree

instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
77

88
## Unreleased
99

10+
11+
- Migrate experimental path from deprecated `LLMInvocation` to `InferenceInvocation`,
12+
using `handler.start_inference()` and `invocation.stop()`/`invocation.fail()` directly
13+
([#4502](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4502))
1014
- Use `create_duration_histogram` and `create_token_histogram` from
1115
`opentelemetry-util-genai` instead of defining bucket boundaries locally
1216
([#4501](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4501))

instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py

Lines changed: 17 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@
3333
from opentelemetry.trace import Span, SpanKind, Tracer
3434
from opentelemetry.trace.propagation import set_span_in_context
3535
from opentelemetry.util.genai.handler import TelemetryHandler
36+
from opentelemetry.util.genai.invocation import InferenceInvocation
3637
from opentelemetry.util.genai.types import (
3738
ContentCapturingMode,
3839
Error,
39-
LLMInvocation, # pylint: disable=no-name-in-module # TODO: migrate to InferenceInvocation
4040
OutputMessage,
4141
Text,
4242
ToolCallRequest,
@@ -128,10 +128,8 @@ def chat_completions_create_v_new(
128128
capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
129129

130130
def traced_method(wrapped, instance, args, kwargs):
131-
chat_invocation = handler.start_llm(
132-
create_chat_invocation(
133-
kwargs, instance, capture_content=capture_content
134-
)
131+
chat_invocation = create_chat_invocation(
132+
handler, kwargs, instance, capture_content=capture_content
135133
)
136134

137135
try:
@@ -143,18 +141,16 @@ def traced_method(wrapped, instance, args, kwargs):
143141
parsed_result = result
144142
if is_streaming(kwargs):
145143
return ChatStreamWrapper(
146-
parsed_result, handler, chat_invocation, capture_content
144+
parsed_result, chat_invocation, capture_content
147145
)
148146

149147
_set_response_properties(
150148
chat_invocation, parsed_result, capture_content
151149
)
152-
handler.stop_llm(chat_invocation)
150+
chat_invocation.stop()
153151
return result
154152
except Exception as error:
155-
handler.fail_llm(
156-
chat_invocation, Error(type=type(error), message=str(error))
157-
)
153+
chat_invocation.fail(Error(type=type(error), message=str(error)))
158154
raise
159155

160156
return traced_method
@@ -232,10 +228,8 @@ def async_chat_completions_create_v_new(
232228
capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
233229

234230
async def traced_method(wrapped, instance, args, kwargs):
235-
chat_invocation = handler.start_llm(
236-
create_chat_invocation(
237-
kwargs, instance, capture_content=capture_content
238-
)
231+
chat_invocation = create_chat_invocation(
232+
handler, kwargs, instance, capture_content=capture_content
239233
)
240234

241235
try:
@@ -247,19 +241,17 @@ async def traced_method(wrapped, instance, args, kwargs):
247241
parsed_result = result
248242
if is_streaming(kwargs):
249243
return ChatStreamWrapper(
250-
parsed_result, handler, chat_invocation, capture_content
244+
parsed_result, chat_invocation, capture_content
251245
)
252246

253247
_set_response_properties(
254248
chat_invocation, parsed_result, capture_content
255249
)
256-
handler.stop_llm(chat_invocation)
250+
chat_invocation.stop()
257251
return result
258252

259253
except Exception as error:
260-
handler.fail_llm(
261-
chat_invocation, Error(type=type(error), message=str(error))
262-
)
254+
chat_invocation.fail(Error(type=type(error), message=str(error)))
263255
raise
264256

265257
return traced_method
@@ -495,8 +487,8 @@ def _set_response_attributes(span, result):
495487

496488

497489
def _set_response_properties(
498-
chat_invocation: LLMInvocation, result, capture_content: bool
499-
) -> LLMInvocation:
490+
chat_invocation: InferenceInvocation, result, capture_content: bool
491+
) -> InferenceInvocation:
500492
if getattr(result, "model", None):
501493
chat_invocation.response_model_name = result.model
502494

@@ -868,8 +860,7 @@ def cleanup(self, error: Optional[BaseException] = None):
868860

869861

870862
class ChatStreamWrapper(BaseStreamWrapper):
871-
handler: TelemetryHandler
872-
invocation: LLMInvocation
863+
invocation: InferenceInvocation
873864
response_id: Optional[str] = None
874865
response_model: Optional[str] = None
875866
service_tier: Optional[str] = None
@@ -880,13 +871,11 @@ class ChatStreamWrapper(BaseStreamWrapper):
880871
def __init__(
881872
self,
882873
stream: Stream,
883-
handler: TelemetryHandler,
884-
invocation: LLMInvocation,
874+
invocation: InferenceInvocation,
885875
capture_content: bool,
886876
):
887877
super().__init__(stream, capture_content=capture_content)
888878
self.stream = stream
889-
self.handler = handler
890879
self.invocation = invocation
891880
self.choice_buffers = []
892881

@@ -944,9 +933,7 @@ def cleanup(self, error: Optional[BaseException] = None):
944933
self._set_output_messages()
945934

946935
if error:
947-
self.handler.fail_llm(
948-
self.invocation, Error(type=type(error), message=str(error))
949-
)
936+
self.invocation.fail(Error(type=type(error), message=str(error)))
950937
else:
951-
self.handler.stop_llm(self.invocation)
938+
self.invocation.stop()
952939
self._started = False

instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py

Lines changed: 27 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,10 @@
3939
from opentelemetry.util.genai.environment_variables import (
4040
OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
4141
)
42+
from opentelemetry.util.genai.handler import TelemetryHandler
43+
from opentelemetry.util.genai.invocation import InferenceInvocation
4244
from opentelemetry.util.genai.types import (
4345
InputMessage,
44-
LLMInvocation, # pylint: disable=no-name-in-module # TODO: migrate to InferenceInvocation
4546
OutputMessage,
4647
Text,
4748
ToolCallRequest,
@@ -328,42 +329,37 @@ def get_llm_request_attributes(
328329

329330

330331
def create_chat_invocation(
332+
handler: TelemetryHandler,
331333
kwargs,
332334
client_instance,
333335
capture_content: bool,
334-
) -> LLMInvocation:
336+
) -> InferenceInvocation:
335337
# pylint: disable=too-many-branches
336338

337-
llm_invocation = LLMInvocation(request_model=kwargs.get("model", ""))
338-
llm_invocation.provider = (
339-
GenAIAttributes.GenAiProviderNameValues.OPENAI.value
340-
)
341-
llm_invocation.temperature = get_value(kwargs.get("temperature"))
342-
llm_invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
343-
llm_invocation.max_tokens = get_value(kwargs.get("max_tokens"))
344-
llm_invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
345-
llm_invocation.frequency_penalty = get_value(
346-
kwargs.get("frequency_penalty")
339+
address, port = get_server_address_and_port(client_instance)
340+
invocation = handler.start_inference(
341+
GenAIAttributes.GenAiProviderNameValues.OPENAI.value,
342+
request_model=kwargs.get("model", ""),
343+
server_address=address if address else None,
344+
server_port=port if port else None,
347345
)
348-
llm_invocation.seed = get_value(kwargs.get("seed"))
346+
invocation.temperature = get_value(kwargs.get("temperature"))
347+
invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
348+
invocation.max_tokens = get_value(kwargs.get("max_tokens"))
349+
invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
350+
invocation.frequency_penalty = get_value(kwargs.get("frequency_penalty"))
351+
invocation.seed = get_value(kwargs.get("seed"))
349352
if (stop_sequences := get_value(kwargs.get("stop"))) is not None:
350353
if isinstance(stop_sequences, str):
351354
stop_sequences = [stop_sequences]
352-
llm_invocation.stop_sequences = stop_sequences
355+
invocation.stop_sequences = stop_sequences
353356

354-
address, port = get_server_address_and_port(client_instance)
355-
if address:
356-
llm_invocation.server_address = address
357-
if port:
358-
llm_invocation.server_port = port
359-
360-
attributes = {}
361357
if (choice_count := get_value(kwargs.get("n"))) is not None:
362358
# Only add non default, meaningful values
363359
if isinstance(choice_count, int) and choice_count != 1:
364-
attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = (
365-
choice_count
366-
)
360+
invocation.attributes[
361+
GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT
362+
] = choice_count
367363

368364
if (
369365
response_format := get_value(kwargs.get("response_format"))
@@ -373,11 +369,11 @@ def create_chat_invocation(
373369
if (
374370
response_format_type := get_value(response_format.get("type"))
375371
) is not None:
376-
attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
372+
invocation.attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
377373
response_format_type
378374
)
379375
else:
380-
attributes[
376+
invocation.attributes[
381377
GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT
382378
] = response_format
383379

@@ -388,16 +384,15 @@ def create_chat_invocation(
388384
if isinstance(extra_body, Mapping):
389385
service_tier = get_value(extra_body.get("service_tier"))
390386
if service_tier is not None:
391-
attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = service_tier
392-
393-
if len(attributes) > 0:
394-
llm_invocation.attributes = attributes
387+
invocation.attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = (
388+
service_tier
389+
)
395390

396391
if capture_content: # optimization
397-
llm_invocation.input_messages = _prepare_input_messages(
392+
invocation.input_messages = _prepare_input_messages(
398393
kwargs.get("messages", [])
399394
)
400-
return llm_invocation
395+
return invocation
401396

402397

403398
def get_value(v: Any):

0 commit comments

Comments
 (0)