openai-v2: migrate _v_new path from LLMInvocation to InferenceInvocation (#4502)

lzchen · web-flow · commit b51543af304f · 2026-04-29T11:31:48.000-05:00
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+
+- Migrate experimental path from deprecated `LLMInvocation` to `InferenceInvocation`,
+  using `handler.start_inference()` and `invocation.stop()`/`invocation.fail()` directly
+  ([#4502](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4502))
 - Use `create_duration_histogram` and `create_token_histogram` from
   `opentelemetry-util-genai` instead of defining bucket boundaries locally
   ([#4501](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4501))
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -33,10 +33,10 @@
 from opentelemetry.trace import Span, SpanKind, Tracer
 from opentelemetry.trace.propagation import set_span_in_context
 from opentelemetry.util.genai.handler import TelemetryHandler
+from opentelemetry.util.genai.invocation import InferenceInvocation
 from opentelemetry.util.genai.types import (
     ContentCapturingMode,
     Error,
-    LLMInvocation,  # pylint: disable=no-name-in-module  # TODO: migrate to InferenceInvocation
     OutputMessage,
     Text,
     ToolCallRequest,
@@ -128,10 +128,8 @@ def chat_completions_create_v_new(
     capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
 
     def traced_method(wrapped, instance, args, kwargs):
-        chat_invocation = handler.start_llm(
-            create_chat_invocation(
-                kwargs, instance, capture_content=capture_content
-            )
+        chat_invocation = create_chat_invocation(
+            handler, kwargs, instance, capture_content=capture_content
         )
 
         try:
@@ -143,18 +141,16 @@ def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, handler, chat_invocation, capture_content
+                    parsed_result, chat_invocation, capture_content
                 )
 
             _set_response_properties(
                 chat_invocation, parsed_result, capture_content
             )
-            handler.stop_llm(chat_invocation)
+            chat_invocation.stop()
             return result
         except Exception as error:
-            handler.fail_llm(
-                chat_invocation, Error(type=type(error), message=str(error))
-            )
+            chat_invocation.fail(Error(type=type(error), message=str(error)))
             raise
 
     return traced_method
@@ -232,10 +228,8 @@ def async_chat_completions_create_v_new(
     capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
 
     async def traced_method(wrapped, instance, args, kwargs):
-        chat_invocation = handler.start_llm(
-            create_chat_invocation(
-                kwargs, instance, capture_content=capture_content
-            )
+        chat_invocation = create_chat_invocation(
+            handler, kwargs, instance, capture_content=capture_content
         )
 
         try:
@@ -247,19 +241,17 @@ async def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, handler, chat_invocation, capture_content
+                    parsed_result, chat_invocation, capture_content
                 )
 
             _set_response_properties(
                 chat_invocation, parsed_result, capture_content
             )
-            handler.stop_llm(chat_invocation)
+            chat_invocation.stop()
             return result
 
         except Exception as error:
-            handler.fail_llm(
-                chat_invocation, Error(type=type(error), message=str(error))
-            )
+            chat_invocation.fail(Error(type=type(error), message=str(error)))
             raise
 
     return traced_method
@@ -495,8 +487,8 @@ def _set_response_attributes(span, result):
 
 
 def _set_response_properties(
-    chat_invocation: LLMInvocation, result, capture_content: bool
-) -> LLMInvocation:
+    chat_invocation: InferenceInvocation, result, capture_content: bool
+) -> InferenceInvocation:
     if getattr(result, "model", None):
         chat_invocation.response_model_name = result.model
 
@@ -868,8 +860,7 @@ def cleanup(self, error: Optional[BaseException] = None):
 
 
 class ChatStreamWrapper(BaseStreamWrapper):
-    handler: TelemetryHandler
-    invocation: LLMInvocation
+    invocation: InferenceInvocation
     response_id: Optional[str] = None
     response_model: Optional[str] = None
     service_tier: Optional[str] = None
@@ -880,13 +871,11 @@ class ChatStreamWrapper(BaseStreamWrapper):
     def __init__(
         self,
         stream: Stream,
-        handler: TelemetryHandler,
-        invocation: LLMInvocation,
+        invocation: InferenceInvocation,
         capture_content: bool,
     ):
         super().__init__(stream, capture_content=capture_content)
         self.stream = stream
-        self.handler = handler
         self.invocation = invocation
         self.choice_buffers = []
 
@@ -944,9 +933,7 @@ def cleanup(self, error: Optional[BaseException] = None):
         self._set_output_messages()
 
         if error:
-            self.handler.fail_llm(
-                self.invocation, Error(type=type(error), message=str(error))
-            )
+            self.invocation.fail(Error(type=type(error), message=str(error)))
         else:
-            self.handler.stop_llm(self.invocation)
+            self.invocation.stop()
         self._started = False
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py
@@ -39,9 +39,10 @@
 from opentelemetry.util.genai.environment_variables import (
     OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
 )
+from opentelemetry.util.genai.handler import TelemetryHandler
+from opentelemetry.util.genai.invocation import InferenceInvocation
 from opentelemetry.util.genai.types import (
     InputMessage,
-    LLMInvocation,  # pylint: disable=no-name-in-module  # TODO: migrate to InferenceInvocation
     OutputMessage,
     Text,
     ToolCallRequest,
@@ -328,42 +329,37 @@ def get_llm_request_attributes(
 
 
 def create_chat_invocation(
+    handler: TelemetryHandler,
     kwargs,
     client_instance,
     capture_content: bool,
-) -> LLMInvocation:
+) -> InferenceInvocation:
     # pylint: disable=too-many-branches
 
-    llm_invocation = LLMInvocation(request_model=kwargs.get("model", ""))
-    llm_invocation.provider = (
-        GenAIAttributes.GenAiProviderNameValues.OPENAI.value
-    )
-    llm_invocation.temperature = get_value(kwargs.get("temperature"))
-    llm_invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
-    llm_invocation.max_tokens = get_value(kwargs.get("max_tokens"))
-    llm_invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
-    llm_invocation.frequency_penalty = get_value(
-        kwargs.get("frequency_penalty")
+    address, port = get_server_address_and_port(client_instance)
+    invocation = handler.start_inference(
+        GenAIAttributes.GenAiProviderNameValues.OPENAI.value,
+        request_model=kwargs.get("model", ""),
+        server_address=address if address else None,
+        server_port=port if port else None,
     )
-    llm_invocation.seed = get_value(kwargs.get("seed"))
+    invocation.temperature = get_value(kwargs.get("temperature"))
+    invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
+    invocation.max_tokens = get_value(kwargs.get("max_tokens"))
+    invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
+    invocation.frequency_penalty = get_value(kwargs.get("frequency_penalty"))
+    invocation.seed = get_value(kwargs.get("seed"))
     if (stop_sequences := get_value(kwargs.get("stop"))) is not None:
         if isinstance(stop_sequences, str):
             stop_sequences = [stop_sequences]
-        llm_invocation.stop_sequences = stop_sequences
+        invocation.stop_sequences = stop_sequences
 
-    address, port = get_server_address_and_port(client_instance)
-    if address:
-        llm_invocation.server_address = address
-    if port:
-        llm_invocation.server_port = port
-
-    attributes = {}
     if (choice_count := get_value(kwargs.get("n"))) is not None:
         # Only add non default, meaningful values
         if isinstance(choice_count, int) and choice_count != 1:
-            attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = (
-                choice_count
-            )
+            invocation.attributes[
+                GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT
+            ] = choice_count
 
     if (
         response_format := get_value(kwargs.get("response_format"))
@@ -373,11 +369,11 @@ def create_chat_invocation(
             if (
                 response_format_type := get_value(response_format.get("type"))
             ) is not None:
-                attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
+                invocation.attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
                     response_format_type
                 )
         else:
-            attributes[
+            invocation.attributes[
                 GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT
             ] = response_format
 
@@ -388,16 +384,15 @@ def create_chat_invocation(
         if isinstance(extra_body, Mapping):
             service_tier = get_value(extra_body.get("service_tier"))
     if service_tier is not None:
-        attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = service_tier
-
-    if len(attributes) > 0:
-        llm_invocation.attributes = attributes
+        invocation.attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = (
+            service_tier
+        )
 
     if capture_content:  # optimization
-        llm_invocation.input_messages = _prepare_input_messages(
+        invocation.input_messages = _prepare_input_messages(
             kwargs.get("messages", [])
         )
-    return llm_invocation
+    return invocation
 
 
 def get_value(v: Any):