Merge branch 'main' into 4209

iblancasa · web-flow · commit fec2c6de55c5 · 2026-04-30T12:30:51.000+02:00
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+
+- Migrate experimental path from deprecated `LLMInvocation` to `InferenceInvocation`,
+  using `handler.start_inference()` and `invocation.stop()`/`invocation.fail()` directly
+  ([#4502](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4502))
+- Use `create_duration_histogram` and `create_token_histogram` from
+  `opentelemetry-util-genai` instead of defining bucket boundaries locally
+  ([#4501](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4501))
 - Import `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` from
   `opentelemetry.util.genai.environment_variables` instead of re-defining it locally,
   making `opentelemetry-util-genai` the single source of truth for this constant.
@@ -28,6 +35,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 - Add strongly typed Responses API extractors with validation and content
   extraction improvements
   ([#4337](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4337))
+- Default empty string for `gen_ai.request.model` attribute on missing model.
+  ([#4494](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4494))
 
 ## Version 2.3b0 (2025-12-24)
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/instruments.py
@@ -1,52 +1,13 @@
 from opentelemetry.metrics import Histogram, Meter
-from opentelemetry.semconv._incubating.metrics import gen_ai_metrics
-
-_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS = [
-    0.01,
-    0.02,
-    0.04,
-    0.08,
-    0.16,
-    0.32,
-    0.64,
-    1.28,
-    2.56,
-    5.12,
-    10.24,
-    20.48,
-    40.96,
-    81.92,
-]
-
-_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS = [
-    1,
-    4,
-    16,
-    64,
-    256,
-    1024,
-    4096,
-    16384,
-    65536,
-    262144,
-    1048576,
-    4194304,
-    16777216,
-    67108864,
-]
+from opentelemetry.util.genai.instruments import (
+    create_duration_histogram,
+    create_token_histogram,
+)
 
 
 class Instruments:
     def __init__(self, meter: Meter):
-        self.operation_duration_histogram: Histogram = meter.create_histogram(
-            name=gen_ai_metrics.GEN_AI_CLIENT_OPERATION_DURATION,
-            description="GenAI operation duration",
-            unit="s",
-            explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS,
-        )
-        self.token_usage_histogram: Histogram = meter.create_histogram(
-            name=gen_ai_metrics.GEN_AI_CLIENT_TOKEN_USAGE,
-            description="Measures number of input and output tokens used",
-            unit="{token}",
-            explicit_bucket_boundaries_advisory=_GEN_AI_CLIENT_TOKEN_USAGE_BUCKETS,
+        self.operation_duration_histogram: Histogram = (
+            create_duration_histogram(meter)
         )
+        self.token_usage_histogram: Histogram = create_token_histogram(meter)
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -33,10 +33,10 @@
 from opentelemetry.trace import Span, SpanKind, Tracer
 from opentelemetry.trace.propagation import set_span_in_context
 from opentelemetry.util.genai.handler import TelemetryHandler
+from opentelemetry.util.genai.invocation import InferenceInvocation
 from opentelemetry.util.genai.types import (
     ContentCapturingMode,
     Error,
-    LLMInvocation,  # pylint: disable=no-name-in-module  # TODO: migrate to InferenceInvocation
     OutputMessage,
     Text,
     ToolCallRequest,
@@ -68,7 +68,9 @@ def traced_method(wrapped, instance, args, kwargs):
             **get_llm_request_attributes(kwargs, instance, False)
         }
 
-        span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
+        operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
+        model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
+        span_name = f"{operation_name} {model}" if model else operation_name
         with tracer.start_as_current_span(
             name=span_name,
             kind=SpanKind.CLIENT,
@@ -128,10 +130,8 @@ def chat_completions_create_v_new(
     capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
 
     def traced_method(wrapped, instance, args, kwargs):
-        chat_invocation = handler.start_llm(
-            create_chat_invocation(
-                kwargs, instance, capture_content=capture_content
-            )
+        chat_invocation = create_chat_invocation(
+            handler, kwargs, instance, capture_content=capture_content
         )
 
         try:
@@ -143,18 +143,16 @@ def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, handler, chat_invocation, capture_content
+                    parsed_result, chat_invocation, capture_content
                 )
 
             _set_response_properties(
                 chat_invocation, parsed_result, capture_content
             )
-            handler.stop_llm(chat_invocation)
+            chat_invocation.stop()
             return result
         except Exception as error:
-            handler.fail_llm(
-                chat_invocation, Error(type=type(error), message=str(error))
-            )
+            chat_invocation.fail(Error(type=type(error), message=str(error)))
             raise
 
     return traced_method
@@ -173,7 +171,9 @@ async def traced_method(wrapped, instance, args, kwargs):
             **get_llm_request_attributes(kwargs, instance, False)
         }
 
-        span_name = f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
+        operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
+        model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
+        span_name = f"{operation_name} {model}" if model else operation_name
         with tracer.start_as_current_span(
             name=span_name,
             kind=SpanKind.CLIENT,
@@ -232,10 +232,8 @@ def async_chat_completions_create_v_new(
     capture_content = content_capturing_mode != ContentCapturingMode.NO_CONTENT
 
     async def traced_method(wrapped, instance, args, kwargs):
-        chat_invocation = handler.start_llm(
-            create_chat_invocation(
-                kwargs, instance, capture_content=capture_content
-            )
+        chat_invocation = create_chat_invocation(
+            handler, kwargs, instance, capture_content=capture_content
         )
 
         try:
@@ -247,19 +245,17 @@ async def traced_method(wrapped, instance, args, kwargs):
                 parsed_result = result
             if is_streaming(kwargs):
                 return ChatStreamWrapper(
-                    parsed_result, handler, chat_invocation, capture_content
+                    parsed_result, chat_invocation, capture_content
                 )
 
             _set_response_properties(
                 chat_invocation, parsed_result, capture_content
             )
-            handler.stop_llm(chat_invocation)
+            chat_invocation.stop()
             return result
 
         except Exception as error:
-            handler.fail_llm(
-                chat_invocation, Error(type=type(error), message=str(error))
-            )
+            chat_invocation.fail(Error(type=type(error), message=str(error)))
             raise
 
     return traced_method
@@ -373,7 +369,9 @@ async def traced_method(wrapped, instance, args, kwargs):
 
 def _get_embeddings_span_name(span_attributes):
     """Get span name for embeddings operations."""
-    return f"{span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]} {span_attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL]}"
+    operation_name = span_attributes[GenAIAttributes.GEN_AI_OPERATION_NAME]
+    model = span_attributes.get(GenAIAttributes.GEN_AI_REQUEST_MODEL)
+    return f"{operation_name} {model}" if model else operation_name
 
 
 def _record_metrics(
@@ -495,8 +493,8 @@ def _set_response_attributes(span, result):
 
 
 def _set_response_properties(
-    chat_invocation: LLMInvocation, result, capture_content: bool
-) -> LLMInvocation:
+    chat_invocation: InferenceInvocation, result, capture_content: bool
+) -> InferenceInvocation:
     if getattr(result, "model", None):
         chat_invocation.response_model_name = result.model
 
@@ -868,8 +866,7 @@ def cleanup(self, error: Optional[BaseException] = None):
 
 
 class ChatStreamWrapper(BaseStreamWrapper):
-    handler: TelemetryHandler
-    invocation: LLMInvocation
+    invocation: InferenceInvocation
     response_id: Optional[str] = None
     response_model: Optional[str] = None
     service_tier: Optional[str] = None
@@ -880,13 +877,11 @@ class ChatStreamWrapper(BaseStreamWrapper):
     def __init__(
         self,
         stream: Stream,
-        handler: TelemetryHandler,
-        invocation: LLMInvocation,
+        invocation: InferenceInvocation,
         capture_content: bool,
     ):
         super().__init__(stream, capture_content=capture_content)
         self.stream = stream
-        self.handler = handler
         self.invocation = invocation
         self.choice_buffers = []
 
@@ -944,9 +939,7 @@ def cleanup(self, error: Optional[BaseException] = None):
         self._set_output_messages()
 
         if error:
-            self.handler.fail_llm(
-                self.invocation, Error(type=type(error), message=str(error))
-            )
+            self.invocation.fail(Error(type=type(error), message=str(error)))
         else:
-            self.handler.stop_llm(self.invocation)
+            self.invocation.stop()
         self._started = False
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/utils.py
@@ -39,9 +39,10 @@
 from opentelemetry.util.genai.environment_variables import (
     OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT,
 )
+from opentelemetry.util.genai.handler import TelemetryHandler
+from opentelemetry.util.genai.invocation import InferenceInvocation
 from opentelemetry.util.genai.types import (
     InputMessage,
-    LLMInvocation,  # pylint: disable=no-name-in-module  # TODO: migrate to InferenceInvocation
     OutputMessage,
     Text,
     ToolCallRequest,
@@ -210,13 +211,16 @@ def get_llm_request_attributes(
     latest_experimental_enabled,
     operation_name=GenAIAttributes.GenAiOperationNameValues.CHAT.value,
 ):
-    # pylint: disable=too-many-branches
+    # pylint: disable=too-many-branches,too-many-locals
 
     attributes = {
         GenAIAttributes.GEN_AI_OPERATION_NAME: operation_name,
-        GenAIAttributes.GEN_AI_REQUEST_MODEL: kwargs.get("model"),
     }
 
+    model = kwargs.get("model")
+    if model:
+        attributes[GenAIAttributes.GEN_AI_REQUEST_MODEL] = model
+
     if latest_experimental_enabled:
         attributes.update(
             {
@@ -328,42 +332,37 @@ def get_llm_request_attributes(
 
 
 def create_chat_invocation(
+    handler: TelemetryHandler,
     kwargs,
     client_instance,
     capture_content: bool,
-) -> LLMInvocation:
+) -> InferenceInvocation:
     # pylint: disable=too-many-branches
 
-    llm_invocation = LLMInvocation(request_model=kwargs.get("model", ""))
-    llm_invocation.provider = (
-        GenAIAttributes.GenAiProviderNameValues.OPENAI.value
-    )
-    llm_invocation.temperature = get_value(kwargs.get("temperature"))
-    llm_invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
-    llm_invocation.max_tokens = get_value(kwargs.get("max_tokens"))
-    llm_invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
-    llm_invocation.frequency_penalty = get_value(
-        kwargs.get("frequency_penalty")
+    address, port = get_server_address_and_port(client_instance)
+    invocation = handler.start_inference(
+        GenAIAttributes.GenAiProviderNameValues.OPENAI.value,
+        request_model=kwargs.get("model", ""),
+        server_address=address if address else None,
+        server_port=port if port else None,
     )
-    llm_invocation.seed = get_value(kwargs.get("seed"))
+    invocation.temperature = get_value(kwargs.get("temperature"))
+    invocation.top_p = get_value(kwargs.get("p") or kwargs.get("top_p"))
+    invocation.max_tokens = get_value(kwargs.get("max_tokens"))
+    invocation.presence_penalty = get_value(kwargs.get("presence_penalty"))
+    invocation.frequency_penalty = get_value(kwargs.get("frequency_penalty"))
+    invocation.seed = get_value(kwargs.get("seed"))
     if (stop_sequences := get_value(kwargs.get("stop"))) is not None:
         if isinstance(stop_sequences, str):
             stop_sequences = [stop_sequences]
-        llm_invocation.stop_sequences = stop_sequences
+        invocation.stop_sequences = stop_sequences
 
-    address, port = get_server_address_and_port(client_instance)
-    if address:
-        llm_invocation.server_address = address
-    if port:
-        llm_invocation.server_port = port
-
-    attributes = {}
     if (choice_count := get_value(kwargs.get("n"))) is not None:
         # Only add non default, meaningful values
         if isinstance(choice_count, int) and choice_count != 1:
-            attributes[GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT] = (
-                choice_count
-            )
+            invocation.attributes[
+                GenAIAttributes.GEN_AI_REQUEST_CHOICE_COUNT
+            ] = choice_count
 
     if (
         response_format := get_value(kwargs.get("response_format"))
@@ -373,11 +372,11 @@ def create_chat_invocation(
             if (
                 response_format_type := get_value(response_format.get("type"))
             ) is not None:
-                attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
+                invocation.attributes[GenAIAttributes.GEN_AI_OUTPUT_TYPE] = (
                     response_format_type
                 )
         else:
-            attributes[
+            invocation.attributes[
                 GenAIAttributes.GEN_AI_OPENAI_REQUEST_RESPONSE_FORMAT
             ] = response_format
 
@@ -388,16 +387,15 @@ def create_chat_invocation(
         if isinstance(extra_body, Mapping):
             service_tier = get_value(extra_body.get("service_tier"))
     if service_tier is not None:
-        attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = service_tier
-
-    if len(attributes) > 0:
-        llm_invocation.attributes = attributes
+        invocation.attributes[OpenAIAttributes.OPENAI_REQUEST_SERVICE_TIER] = (
+            service_tier
+        )
 
     if capture_content:  # optimization
-        llm_invocation.input_messages = _prepare_input_messages(
+        invocation.input_messages = _prepare_input_messages(
             kwargs.get("messages", [])
         )
-    return llm_invocation
+    return invocation
 
 
 def get_value(v: Any):
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_request_attributes.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_request_attributes.py