diff --git a/instrumentation-genai/opentelemetry-instrumentation-anthropic/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-anthropic/pyproject.toml index 74c411a1a5..5cc6754ef7 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-anthropic/pyproject.toml +++ b/instrumentation-genai/opentelemetry-instrumentation-anthropic/pyproject.toml @@ -26,16 +26,14 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "opentelemetry-api ~= 1.37", - "opentelemetry-instrumentation ~= 0.58b0", - "opentelemetry-semantic-conventions ~= 0.58b0", + "opentelemetry-api ~= 1.39", + "opentelemetry-instrumentation ~= 0.60b0", + "opentelemetry-semantic-conventions ~= 0.60b0", "opentelemetry-util-genai >= 0.2b0, <0.4b0", ] [project.optional-dependencies] -instruments = [ - "anthropic >= 0.51.0", -] +instruments = ["anthropic >= 0.51.0"] [project.entry-points.opentelemetry_instrumentor] anthropic = "opentelemetry.instrumentation.anthropic:AnthropicInstrumentor" @@ -48,15 +46,10 @@ Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" path = "src/opentelemetry/instrumentation/anthropic/version.py" [tool.hatch.build.targets.sdist] -include = [ - "/src", - "/tests", - "/examples", -] +include = ["/src", "/tests", "/examples"] [tool.hatch.build.targets.wheel] packages = ["src/opentelemetry"] [tool.pytest.ini_options] testpaths = ["tests"] - diff --git a/instrumentation-genai/opentelemetry-instrumentation-anthropic/tests/requirements.oldest.txt b/instrumentation-genai/opentelemetry-instrumentation-anthropic/tests/requirements.oldest.txt index 0b77b1a7cb..1b1d2b1994 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-anthropic/tests/requirements.oldest.txt +++ b/instrumentation-genai/opentelemetry-instrumentation-anthropic/tests/requirements.oldest.txt @@ -21,8 +21,8 @@ pytest==7.4.4 pytest-vcr==1.0.2 pytest-asyncio==0.21.0 wrapt==1.16.0 -opentelemetry-api==1.37 # when updating, also update in pyproject.toml -opentelemetry-sdk==1.37 # when updating, also update in pyproject.toml -opentelemetry-semantic-conventions==0.58b0 # when updating, also update in pyproject.toml +opentelemetry-api==1.39 # when updating, also update in pyproject.toml +opentelemetry-sdk==1.39 # when updating, also update in pyproject.toml +opentelemetry-semantic-conventions==0.60b0 # when updating, also update in pyproject.toml -e instrumentation-genai/opentelemetry-instrumentation-anthropic diff --git a/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/pyproject.toml index 3149629fac..fb38e61589 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/pyproject.toml +++ b/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/pyproject.toml @@ -24,16 +24,14 @@ classifiers = [ "Programming Language :: Python :: 3.13", ] dependencies = [ - "opentelemetry-api ~= 1.37", - "opentelemetry-instrumentation ~= 0.58b0", - "opentelemetry-semantic-conventions ~= 0.58b0", + "opentelemetry-api ~= 1.39", + "opentelemetry-instrumentation ~= 0.60b0", + "opentelemetry-semantic-conventions ~= 0.60b0", "opentelemetry-util-genai >= 0.2b0, <0.4b0", ] [project.optional-dependencies] -instruments = [ - "claude-agent-sdk >= 0.1.14", -] +instruments = ["claude-agent-sdk >= 0.1.14"] [project.entry-points.opentelemetry_instrumentor] claude-agent-sdk = "opentelemetry.instrumentation.claude_agent_sdk:ClaudeAgentSDKInstrumentor" @@ -46,11 +44,7 @@ Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" path = "src/opentelemetry/instrumentation/claude_agent_sdk/version.py" [tool.hatch.build.targets.sdist] -include = [ - "/src", - "/tests", - "/examples", -] +include = ["/src", "/tests", "/examples"] [tool.hatch.build.targets.wheel] packages = ["src/opentelemetry"] diff --git a/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt b/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt index adfadef283..833e05fb1d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt +++ b/instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk/tests/requirements.oldest.txt @@ -21,8 +21,8 @@ pytest==7.4.4 pytest-vcr==1.0.2 pytest-asyncio==0.21.0 wrapt==1.16.0 -opentelemetry-api==1.37 # when updating, also update in pyproject.toml -opentelemetry-sdk==1.37 # when updating, also update in pyproject.toml -opentelemetry-semantic-conventions==0.58b0 # when updating, also update in pyproject.toml +opentelemetry-api==1.39 # when updating, also update in pyproject.toml +opentelemetry-sdk==1.39 # when updating, also update in pyproject.toml +opentelemetry-semantic-conventions==0.60b0 # when updating, also update in pyproject.toml -e instrumentation-genai/opentelemetry-instrumentation-claude-agent-sdk diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml index fc5939985b..7b4fcce224 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/pyproject.toml @@ -26,16 +26,14 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "opentelemetry-api ~= 1.37", - "opentelemetry-instrumentation ~= 0.58b0", - "opentelemetry-semantic-conventions ~= 0.58b0", + "opentelemetry-api ~= 1.39", + "opentelemetry-instrumentation ~= 0.60b0", + "opentelemetry-semantic-conventions ~= 0.60b0", "opentelemetry-util-genai", ] [project.optional-dependencies] -instruments = [ - "openai >= 1.26.0", -] +instruments = ["openai >= 1.26.0"] [project.entry-points.opentelemetry_instrumentor] openai = "opentelemetry.instrumentation.openai_v2:OpenAIInstrumentor" @@ -48,10 +46,7 @@ Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" path = "src/opentelemetry/instrumentation/openai_v2/version.py" [tool.hatch.build.targets.sdist] -include = [ - "/src", - "/tests", -] +include = ["/src", "/tests"] [tool.hatch.build.targets.wheel] packages = ["src/opentelemetry"] diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/requirements.oldest.txt b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/requirements.oldest.txt index 2644ba47e8..45339fb438 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/requirements.oldest.txt +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/requirements.oldest.txt @@ -29,9 +29,9 @@ pytest-vcr==1.0.2 pytest-asyncio==0.21.0 wrapt==1.16.0 opentelemetry-exporter-otlp-proto-http~=1.30 -opentelemetry-api==1.37 # when updating, also update in pyproject.toml -opentelemetry-sdk==1.37 # when updating, also update in pyproject.toml -opentelemetry-semantic-conventions==0.58b0 # when updating, also update in pyproject.toml +opentelemetry-api==1.39 # when updating, also update in pyproject.toml +opentelemetry-sdk==1.39 # when updating, also update in pyproject.toml +opentelemetry-semantic-conventions==0.60b0 # when updating, also update in pyproject.toml -e instrumentation-genai/opentelemetry-instrumentation-openai-v2 -e util/opentelemetry-util-genai \ No newline at end of file diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 1b0a444b95..d779ac8efe 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +- Add EmbeddingInvocation span lifecycle support + ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4219](#4219)) - Populate schema_url on metrics ([#4320](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4320)) - Add workflow invocation type to genAI utils @@ -41,10 +43,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3795](#3795)) - Make inputs / outputs / system instructions optional params to `on_completion`, ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3802](#3802)). - - Use a SHA256 hash of the system instructions as it's upload filename, and check +- Use a SHA256 hash of the system instructions as it's upload filename, and check if the file exists before re-uploading it, ([https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3814](#3814)). - ## Version 0.1b0 (2025-09-25) - Add completion hook to genai utils to implement semconv v1.37. @@ -57,6 +58,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ([#3752](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3752)) ([#3759](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3759)) ([#3763](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3763)) + - Add a utility to parse the `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` environment variable. Add `gen_ai_latest_experimental` as a new value to the Sem Conv stability flag ([#3716](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/3716)). diff --git a/util/opentelemetry-util-genai/README.rst b/util/opentelemetry-util-genai/README.rst index bc4b348fcf..94806e0159 100644 --- a/util/opentelemetry-util-genai/README.rst +++ b/util/opentelemetry-util-genai/README.rst @@ -36,6 +36,18 @@ This package provides these span attributes: - `gen_ai.output.messages`: Str('[{"role": "AI", "parts": [{"content": "hello back", "type": "text"}], "finish_reason": "stop"}]') - `gen_ai.system_instructions`: Str('[{"content": "You are a helpful assistant.", "type": "text"}]') (when system instruction is provided) +This package also supports embedding invocation spans via the `embedding` context manager. +For embedding invocations, common attributes include: + +- `gen_ai.provider.name`: Str(openai) +- `gen_ai.operation.name`: Str(embeddings) +- `gen_ai.request.model`: Str(text-embedding-3-small) +- `gen_ai.embeddings.dimension.count`: Int(1536) +- `gen_ai.request.encoding_formats`: Slice(["float"]) +- `gen_ai.usage.input_tokens`: Int(24) +- `server.address`: Str(api.openai.com) +- `server.port`: Int(443) + When `EVENT_ONLY` or `SPAN_AND_EVENT` mode is enabled and a LoggerProvider is configured, the package also emits `gen_ai.client.inference.operation.details` events with structured message content (as dictionaries instead of JSON strings). Note that when using `EVENT_ONLY` diff --git a/util/opentelemetry-util-genai/pyproject.toml b/util/opentelemetry-util-genai/pyproject.toml index c9d4d388c1..f8705369c2 100644 --- a/util/opentelemetry-util-genai/pyproject.toml +++ b/util/opentelemetry-util-genai/pyproject.toml @@ -26,9 +26,9 @@ classifiers = [ "Programming Language :: Python :: 3.14", ] dependencies = [ - "opentelemetry-instrumentation ~= 0.58b0", - "opentelemetry-semantic-conventions ~= 0.58b0", - "opentelemetry-api>=1.31.0", + "opentelemetry-instrumentation ~= 0.60b0", + "opentelemetry-semantic-conventions ~= 0.60b0", + "opentelemetry-api>=1.39", ] [project.entry-points.opentelemetry_genai_completion_hook] @@ -46,10 +46,7 @@ Repository = "https://github.com/open-telemetry/opentelemetry-python-contrib" path = "src/opentelemetry/util/genai/version.py" [tool.hatch.build.targets.sdist] -include = [ - "/src", - "/tests", -] +include = ["/src", "/tests"] [tool.hatch.build.targets.wheel] packages = ["src/opentelemetry"] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py index 4e85799ea1..80b801e9a1 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/handler.py @@ -62,7 +62,7 @@ import timeit from contextlib import contextmanager -from typing import Iterator +from typing import Iterator, TypeVar from opentelemetry import context as otel_context from opentelemetry._logs import ( @@ -80,13 +80,23 @@ ) from opentelemetry.util.genai.metrics import InvocationMetricsRecorder from opentelemetry.util.genai.span_utils import ( + _apply_embedding_finish_attributes, _apply_error_attributes, _apply_llm_finish_attributes, + _get_embedding_span_name, + _get_llm_span_name, _maybe_emit_llm_event, ) -from opentelemetry.util.genai.types import Error, LLMInvocation +from opentelemetry.util.genai.types import ( + EmbeddingInvocation, + Error, + GenAIInvocation, + LLMInvocation, +) from opentelemetry.util.genai.version import __version__ +_T = TypeVar("_T", bound=GenAIInvocation) + class TelemetryHandler: """ @@ -134,14 +144,28 @@ def _record_llm_metrics( error_type=error_type, ) - def start_llm( - self, - invocation: LLMInvocation, - ) -> LLMInvocation: - """Start an LLM invocation and create a pending span entry.""" - # Create a span and attach it as current; keep the token to detach later + @staticmethod + def _record_embedding_metrics( + invocation: EmbeddingInvocation, + span: Span | None = None, + *, + error_type: str | None = None, + ) -> None: + # Metrics recorder currently supports LLMInvocation fields only. + # Keep embedding metrics as a no-op until dedicated embedding + # metric support is added. + return + + def _start(self, invocation: _T) -> _T: + """Start a GenAI invocation and create a pending span entry.""" + if isinstance(invocation, LLMInvocation): + span_name = _get_llm_span_name(invocation) + elif isinstance(invocation, EmbeddingInvocation): + span_name = _get_embedding_span_name(invocation) + else: + span_name = "" span = self._tracer.start_span( - name=f"{invocation.operation_name} {invocation.request_model}", + name=span_name, kind=SpanKind.CLIENT, ) # Record a monotonic start timestamp (seconds) for duration @@ -153,40 +177,87 @@ def start_llm( ) return invocation - def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: # pylint: disable=no-self-use - """Finalize an LLM invocation successfully and end its span.""" + def _stop(self, invocation: _T) -> _T: + """Finalize a GenAI invocation successfully and end its span.""" if invocation.context_token is None or invocation.span is None: # TODO: Provide feedback that this invocation was not started return invocation span = invocation.span - _apply_llm_finish_attributes(span, invocation) - self._record_llm_metrics(invocation, span) - _maybe_emit_llm_event(self._logger, span, invocation) - # Detach context and end span - otel_context.detach(invocation.context_token) - span.end() + try: + if isinstance(invocation, LLMInvocation): + _apply_llm_finish_attributes(span, invocation) + self._record_llm_metrics(invocation, span) + _maybe_emit_llm_event(self._logger, span, invocation) + elif isinstance(invocation, EmbeddingInvocation): + _apply_embedding_finish_attributes(span, invocation) + self._record_embedding_metrics(invocation, span) + finally: + # Detach context and end span even if finishing fails + otel_context.detach(invocation.context_token) + span.end() return invocation - def fail_llm( # pylint: disable=no-self-use - self, invocation: LLMInvocation, error: Error - ) -> LLMInvocation: - """Fail an LLM invocation and end its span with error status.""" + def _fail(self, invocation: _T, error: Error) -> _T: + """Fail a GenAI invocation and end its span with error status.""" if invocation.context_token is None or invocation.span is None: # TODO: Provide feedback that this invocation was not started return invocation span = invocation.span - _apply_llm_finish_attributes(invocation.span, invocation) - _apply_error_attributes(invocation.span, error) - error_type = getattr(error.type, "__qualname__", None) - self._record_llm_metrics(invocation, span, error_type=error_type) - _maybe_emit_llm_event(self._logger, span, invocation, error) - # Detach context and end span - otel_context.detach(invocation.context_token) - span.end() + error_type = error.type.__qualname__ + try: + if isinstance(invocation, LLMInvocation): + _apply_llm_finish_attributes(span, invocation) + _apply_error_attributes(span, error, error_type) + self._record_llm_metrics( + invocation, span, error_type=error_type + ) + _maybe_emit_llm_event( + self._logger, span, invocation, error_type + ) + elif isinstance(invocation, EmbeddingInvocation): + _apply_embedding_finish_attributes(span, invocation) + _apply_error_attributes(span, error, error_type) + self._record_embedding_metrics( + invocation, span, error_type=error_type + ) + finally: + # Detach context and end span even if finishing fails + otel_context.detach(invocation.context_token) + span.end() return invocation + def start( + self, + invocation: _T, + ) -> _T: + """Start a GenAI invocation and create a pending span entry.""" + return self._start(invocation) + + def stop(self, invocation: _T) -> _T: + """Finalize a GenAI invocation successfully and end its span.""" + return self._stop(invocation) + + def fail(self, invocation: _T, error: Error) -> _T: + """Fail a GenAI invocation and end its span with error status.""" + return self._fail(invocation, error) + + # LLM-specific convenience methods + def start_llm(self, invocation: LLMInvocation) -> LLMInvocation: + """Start an LLM invocation and create a pending span entry.""" + return self._start(invocation) + + def stop_llm(self, invocation: LLMInvocation) -> LLMInvocation: + """Finalize an LLM invocation successfully and end its span.""" + return self._stop(invocation) + + def fail_llm( + self, invocation: LLMInvocation, error: Error + ) -> LLMInvocation: + """Fail an LLM invocation and end its span with error status.""" + return self._fail(invocation, error) + @contextmanager def llm( self, invocation: LLMInvocation | None = None @@ -211,6 +282,28 @@ def llm( raise self.stop_llm(invocation) + @contextmanager + def embedding( + self, invocation: EmbeddingInvocation | None = None + ) -> Iterator[EmbeddingInvocation]: + """Context manager for Embedding invocations. + + Only set data attributes on the invocation object, do not modify the span or context. + + Starts the span on entry. On normal exit, finalizes the invocation and ends the span. + If an exception occurs inside the context, marks the span as error, ends it, and + re-raises the original exception. + """ + if invocation is None: + invocation = EmbeddingInvocation() + self.start(invocation) + try: + yield invocation + except Exception as exc: + self.fail(invocation, Error(message=str(exc), type=type(exc))) + raise + self.stop(invocation) + def get_telemetry_handler( tracer_provider: TracerProvider | None = None, diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py index 889994436f..ac099e3dae 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/span_utils.py @@ -32,7 +32,9 @@ from opentelemetry.trace.propagation import set_span_in_context from opentelemetry.trace.status import Status, StatusCode from opentelemetry.util.genai.types import ( + EmbeddingInvocation, Error, + GenAIInvocation, InputMessage, LLMInvocation, MessagePart, @@ -68,9 +70,42 @@ def _get_llm_common_attributes( } +def _get_embedding_common_attributes( + invocation: EmbeddingInvocation, +) -> dict[str, Any]: + """Get common Embedding attributes shared by finish() and error() paths. + + Returns a dictionary of attributes. + """ + optional_attrs = ( + (server_attributes.SERVER_ADDRESS, invocation.server_address), + (server_attributes.SERVER_PORT, invocation.server_port), + ) + + return { + GenAI.GEN_AI_OPERATION_NAME: invocation.operation_name, + GenAI.GEN_AI_PROVIDER_NAME: invocation.provider, + **{key: value for key, value in optional_attrs if value is not None}, + } + + +def _get_span_name( + invocation: GenAIInvocation, +) -> str: + """Get the span name for a GenAI invocation.""" + operation_name = getattr(invocation, "operation_name", None) or "" + request_model = getattr(invocation, "request_model", None) or "" + return f"{operation_name} {request_model}".strip() + + def _get_llm_span_name(invocation: LLMInvocation) -> str: """Get the span name for an LLM invocation.""" - return f"{invocation.operation_name} {invocation.request_model}".strip() + return _get_span_name(invocation) + + +def _get_embedding_span_name(invocation: EmbeddingInvocation) -> str: + """Get the span name for an Embedding invocation.""" + return _get_span_name(invocation) def _get_llm_messages_attributes_for_span( @@ -151,7 +186,7 @@ def _maybe_emit_llm_event( logger: Logger | None, span: Span, invocation: LLMInvocation, - error: Error | None = None, + error_type: str | None = None, ) -> None: """Emit a gen_ai.client.inference.operation.details event to the logger. @@ -179,8 +214,8 @@ def _maybe_emit_llm_event( ) # Add error.type if operation ended in error - if error is not None: - attributes[error_attributes.ERROR_TYPE] = error.type.__qualname__ + if error_type is not None: + attributes[error_attributes.ERROR_TYPE] = error_type # Create and emit the event context = set_span_in_context(span, get_current()) @@ -218,13 +253,31 @@ def _apply_llm_finish_attributes( span.set_attributes(attributes) -def _apply_error_attributes(span: Span, error: Error) -> None: +def _apply_embedding_finish_attributes( + span: Span, invocation: EmbeddingInvocation +) -> None: + """Apply attributes common to embedding finish() paths.""" + # Update span name + span.update_name(_get_embedding_span_name(invocation)) + + # Build all attributes by reusing the attribute getter functions + attributes: dict[str, Any] = {} + attributes.update(_get_embedding_common_attributes(invocation)) + attributes.update(_get_embedding_request_attributes(invocation)) + attributes.update(_get_embedding_response_attributes(invocation)) + + attributes.update(invocation.attributes) + + # Set all attributes on the span + if attributes: + span.set_attributes(attributes) + + +def _apply_error_attributes(span: Span, error: Error, error_type: str) -> None: """Apply status and error attributes common to error() paths.""" span.set_status(Status(StatusCode.ERROR, error.message)) if span.is_recording(): - span.set_attribute( - error_attributes.ERROR_TYPE, error.type.__qualname__ - ) + span.set_attribute(error_attributes.ERROR_TYPE, error_type) def _get_llm_request_attributes( @@ -244,6 +297,19 @@ def _get_llm_request_attributes( return {key: value for key, value in optional_attrs if value is not None} +def _get_embedding_request_attributes( + invocation: EmbeddingInvocation, +) -> dict[str, Any]: + """Get GenAI request semantic convention attributes.""" + optional_attrs = ( + (GenAI.GEN_AI_REQUEST_MODEL, invocation.request_model), + (GenAI.GEN_AI_EMBEDDINGS_DIMENSION_COUNT, invocation.dimension_count), + (GenAI.GEN_AI_REQUEST_ENCODING_FORMATS, invocation.encoding_formats), + ) + + return {key: value for key, value in optional_attrs if value is not None} + + def _get_llm_response_attributes( invocation: LLMInvocation, ) -> dict[str, Any]: @@ -279,6 +345,18 @@ def _get_llm_response_attributes( return {key: value for key, value in optional_attrs if value is not None} +def _get_embedding_response_attributes( + invocation: EmbeddingInvocation, +) -> dict[str, Any]: + """Get GenAI response semantic convention attributes.""" + optional_attrs = ( + (GenAI.GEN_AI_RESPONSE_MODEL, invocation.response_model_name), + (GenAI.GEN_AI_USAGE_INPUT_TOKENS, invocation.input_tokens), + ) + + return {key: value for key, value in optional_attrs if value is not None} + + __all__ = [ "_apply_llm_finish_attributes", "_apply_error_attributes", @@ -287,4 +365,9 @@ def _get_llm_response_attributes( "_get_llm_response_attributes", "_get_llm_span_name", "_maybe_emit_llm_event", + "_apply_embedding_finish_attributes", + "_get_embedding_common_attributes", + "_get_embedding_request_attributes", + "_get_embedding_response_attributes", + "_get_embedding_span_name", ] diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py index fc5de2cdb0..a714e808b6 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/types.py @@ -202,6 +202,13 @@ class GenAIInvocation: span: Span | None = None attributes: dict[str, Any] = field(default_factory=_new_str_any_dict) + monotonic_start_s: float | None = None + """ + Monotonic start time in seconds (from timeit.default_timer) used for + duration calculations to avoid mixing clock sources. This is populated + by the TelemetryHandler when starting an invocation. + """ + @dataclass class WorkflowInvocation(GenAIInvocation): @@ -228,9 +235,8 @@ class LLMInvocation(GenAIInvocation): set by the TelemetryHandler. """ - request_model: str | None = None - # Chat by default operation_name: str = GenAI.GenAiOperationNameValues.CHAT.value + request_model: str | None = None input_messages: list[InputMessage] = field( default_factory=_new_input_messages ) @@ -267,10 +273,42 @@ class LLMInvocation(GenAIInvocation): seed: int | None = None server_address: str | None = None server_port: int | None = None - # Monotonic start time in seconds (from timeit.default_timer) used - # for duration calculations to avoid mixing clock sources. This is - # populated by the TelemetryHandler when starting an invocation. - monotonic_start_s: float | None = None + + +@dataclass +class EmbeddingInvocation(GenAIInvocation): + """ + Represents a single embedding model invocation. When creating an + EmbeddingInvocation object, only update the data attributes. The span + and context_token attributes are set by the TelemetryHandler. + """ + + operation_name: str = GenAI.GenAiOperationNameValues.EMBEDDINGS.value + request_model: str | None = None + provider: str | None = None # e.g., azure.ai.openai, openai, aws.bedrock + server_address: str | None = None + server_port: int | None = None + + # encoding_formats can be multi-value -> combinational cardinality risk. + # Keep on spans/events only. + encoding_formats: list[str] | None = None + input_tokens: int | None = None + dimension_count: int | None = None + response_model_name: str | None = None + + attributes: dict[str, Any] = field(default_factory=_new_str_any_dict) + """ + Additional attributes to set on spans and/or events. These attributes + will not be set on metrics. + """ + + metric_attributes: dict[str, Any] = field( + default_factory=_new_str_any_dict + ) + """ + Additional attributes to set on metrics. Must be of a low cardinality. + These attributes will not be set on spans or events. + """ @dataclass diff --git a/util/opentelemetry-util-genai/tests/test_utils.py b/util/opentelemetry-util-genai/tests/test_utils.py index d229d4b4b6..ee0e63d852 100644 --- a/util/opentelemetry-util-genai/tests/test_utils.py +++ b/util/opentelemetry-util-genai/tests/test_utils.py @@ -25,9 +25,7 @@ ) from opentelemetry.sdk._logs import LoggerProvider from opentelemetry.sdk._logs.export import ( - InMemoryLogExporter as InMemoryLogRecordExporter, -) -from opentelemetry.sdk._logs.export import ( + InMemoryLogExporter, SimpleLogRecordProcessor, ) from opentelemetry.sdk.trace import ReadableSpan, TracerProvider @@ -51,7 +49,7 @@ from opentelemetry.util.genai.handler import get_telemetry_handler from opentelemetry.util.genai.types import ( ContentCapturingMode, - Error, + EmbeddingInvocation, InputMessage, LLMInvocation, MessagePart, @@ -224,7 +222,7 @@ def setUp(self): tracer_provider.add_span_processor( SimpleSpanProcessor(self.span_exporter) ) - self.log_exporter = InMemoryLogRecordExporter() + self.log_exporter = InMemoryLogExporter() logger_provider = LoggerProvider() logger_provider.add_log_record_processor( SimpleLogRecordProcessor(self.log_exporter) @@ -523,6 +521,82 @@ def test_parent_child_span_relationship(self): # Parent should not have a parent (root) assert parent_span.parent is None + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + emit_event="", + ) + def test_embedding_parent_child_span_relationship(self): + parent_invocation = EmbeddingInvocation( + request_model="embed-parent-model", + provider="test-provider", + input_tokens=10, + ) + child_invocation = EmbeddingInvocation( + request_model="embed-child-model", + provider="test-provider", + input_tokens=5, + ) + + self.telemetry_handler.start(parent_invocation) + assert parent_invocation.span is not None + self.telemetry_handler.start(child_invocation) + assert child_invocation.span is not None + self.telemetry_handler.stop(child_invocation) + self.telemetry_handler.stop(parent_invocation) + + spans = self.span_exporter.get_finished_spans() + assert len(spans) == 2 + child_span = next( + s for s in spans if s.name == "embeddings embed-child-model" + ) + parent_span = next( + s for s in spans if s.name == "embeddings embed-parent-model" + ) + + assert child_span.context.trace_id == parent_span.context.trace_id + assert child_span.parent is not None + assert child_span.parent.span_id == parent_span.context.span_id + assert parent_span.parent is None + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + emit_event="", + ) + def test_llm_parent_embedding_child_span_relationship(self): + message = _create_input_message("hi") + chat_generation = _create_output_message("ok") + child_invocation = EmbeddingInvocation( + request_model="embed-child-model", + provider="test-provider", + input_tokens=3, + ) + + with self.telemetry_handler.llm() as parent_invocation: + for attr, value in { + "request_model": "parent-model", + "input_messages": [message], + "provider": "test-provider", + }.items(): + setattr(parent_invocation, attr, value) + self.telemetry_handler.start(child_invocation) + assert child_invocation.span is not None + self.telemetry_handler.stop(child_invocation) + parent_invocation.output_messages = [chat_generation] + + spans = self.span_exporter.get_finished_spans() + assert len(spans) == 2 + child_span = next( + s for s in spans if s.name == "embeddings embed-child-model" + ) + parent_span = next(s for s in spans if s.name == "chat parent-model") + + assert child_span.context.trace_id == parent_span.context.trace_id + assert child_span.parent is not None + assert child_span.parent.span_id == parent_span.context.span_id + assert parent_span.parent is None + def test_llm_context_manager_error_path_records_error_status_and_attrs( self, ): @@ -571,318 +645,91 @@ class BoomError(RuntimeError): }, ) - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="EVENT_ONLY", - emit_event="true", - ) - def test_emits_llm_event(self): - invocation = LLMInvocation( - request_model="event-model", - input_messages=[_create_input_message("test query")], - system_instruction=_create_system_instruction(), - provider="test-provider", - temperature=0.7, - max_tokens=100, - response_model_name="response-model", - response_id="event-response-id", - input_tokens=10, - output_tokens=20, - ) - - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [_create_output_message("test response")] - self.telemetry_handler.stop_llm(invocation) - - # Check that event was emitted - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 1) - log_record = logs[0].log_record - - # Verify event name - self.assertEqual( - log_record.event_name, "gen_ai.client.inference.operation.details" - ) - - # Verify event attributes - attrs = log_record.attributes - self.assertIsNotNone(attrs) - self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") - self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "event-model") - self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "test-provider") - self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_TEMPERATURE], 0.7) - self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MAX_TOKENS], 100) - self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "response-model") - self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "event-response-id") - self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 10) - self.assertEqual(attrs[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS], 20) - - # Verify messages are in structured format (not JSON string) - # OpenTelemetry may convert lists to tuples, so we normalize - input_msg = _normalize_to_dict( - _normalize_to_list(attrs[GenAI.GEN_AI_INPUT_MESSAGES])[0] - ) - self.assertEqual(input_msg["role"], "Human") - self.assertEqual( - _normalize_to_list(input_msg["parts"])[0]["content"], "test query" - ) - - output_msg = _normalize_to_dict( - _normalize_to_list(attrs[GenAI.GEN_AI_OUTPUT_MESSAGES])[0] - ) - self.assertEqual(output_msg["role"], "AI") - self.assertEqual( - _normalize_to_list(output_msg["parts"])[0]["content"], - "test response", - ) - self.assertEqual(output_msg["finish_reason"], "stop") - - # Verify system instruction is present in event in structured format - sys_instr = _normalize_to_dict( - _normalize_to_list(attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS])[0] - ) - self.assertEqual(sys_instr["content"], "You are a helpful assistant.") - self.assertEqual(sys_instr["type"], "text") - - # Verify event context matches span context - span = _get_single_span(self.span_exporter) - self.assertIsNotNone(log_record.trace_id) - self.assertIsNotNone(log_record.span_id) - self.assertIsNotNone(span.context) - self.assertEqual(log_record.trace_id, span.context.trace_id) - self.assertEqual(log_record.span_id, span.context.span_id) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="SPAN_AND_EVENT", - emit_event="true", - ) - def test_emits_llm_event_and_span(self): - message = _create_input_message("combined test") - chat_generation = _create_output_message("combined response") - system_instruction = _create_system_instruction("System prompt here") - - invocation = LLMInvocation( - request_model="combined-model", - input_messages=[message], - system_instruction=system_instruction, - provider="test-provider", - ) - - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [chat_generation] - self.telemetry_handler.stop_llm(invocation) - - # Check span was created - span = _get_single_span(self.span_exporter) - span_attrs = _get_span_attributes(span) - self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) - - # Check event was emitted - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 1) - log_record = logs[0].log_record - self.assertEqual( - log_record.event_name, "gen_ai.client.inference.operation.details" - ) - self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes) - # Verify system instruction in both span and event - self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) - span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) - self.assertEqual(span_system[0]["content"], "System prompt here") - event_attrs = log_record.attributes - self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, event_attrs) - event_system = event_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] - event_system_list = ( - list(event_system) - if isinstance(event_system, tuple) - else event_system - ) - event_sys_instr = ( - dict(event_system_list[0]) - if isinstance(event_system_list[0], tuple) - else event_system_list[0] - ) - self.assertEqual(event_sys_instr["content"], "System prompt here") - # Verify event context matches span context - span = _get_single_span(self.span_exporter) - self.assertIsNotNone(log_record.trace_id) - self.assertIsNotNone(log_record.span_id) - self.assertIsNotNone(span.context) - self.assertEqual(log_record.trace_id, span.context.trace_id) - self.assertEqual(log_record.span_id, span.context.span_id) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="EVENT_ONLY", - emit_event="true", - ) - def test_emits_llm_event_with_error(self): - class TestError(RuntimeError): + def test_embedding_context_manager_error_path_records_error_status_and_attrs( + self, + ): + class BoomError(RuntimeError): pass - message = _create_input_message("error test") - invocation = LLMInvocation( - request_model="error-model", - input_messages=[message], + invocation = EmbeddingInvocation( + request_model="embed-model", provider="test-provider", + dimension_count=1536, + input_tokens=7, + server_address="embed.example.com", + server_port=443, + attributes={"custom_embed_attr": "value"}, ) - self.telemetry_handler.start_llm(invocation) - error = Error(message="Test error occurred", type=TestError) - self.telemetry_handler.fail_llm(invocation, error) - - # Check event was emitted - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 1) - log_record = logs[0].log_record - attrs = log_record.attributes + with self.assertRaises(BoomError): + with self.telemetry_handler.embedding(invocation): + invocation.response_model_name = "embed-response-model" + raise BoomError("embedding boom") - # Verify error attribute is present - self.assertEqual( - attrs[error_attributes.ERROR_TYPE], TestError.__qualname__ - ) - self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") - self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") - # Verify event context matches span context span = _get_single_span(self.span_exporter) - self.assertIsNotNone(log_record.trace_id) - self.assertIsNotNone(log_record.span_id) - self.assertIsNotNone(span.context) - self.assertEqual(log_record.trace_id, span.context.trace_id) - self.assertEqual(log_record.span_id, span.context.span_id) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="EVENT_ONLY", - emit_event="false", - ) - def test_does_not_emit_llm_event_when_emit_event_false(self): - message = _create_input_message("emit false test") - chat_generation = _create_output_message("emit false response") - - invocation = LLMInvocation( - request_model="emit-false-model", - input_messages=[message], - provider="test-provider", - ) - - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [chat_generation] - self.telemetry_handler.stop_llm(invocation) - - # Check no event was emitted - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 0) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="NO_CONTENT", - emit_event="", - ) - def test_does_not_emit_llm_event_by_default_for_no_content(self): - """Test that event is not emitted by default when content_capturing is NO_CONTENT and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" - invocation = LLMInvocation( - request_model="default-model", - input_messages=[_create_input_message("default test")], - provider="test-provider", + assert span.status.status_code == StatusCode.ERROR + _assert_span_time_order(span) + span_attrs = _get_span_attributes(span) + _assert_span_attributes( + span_attrs, + { + GenAI.GEN_AI_OPERATION_NAME: "embeddings", + GenAI.GEN_AI_REQUEST_MODEL: "embed-model", + GenAI.GEN_AI_PROVIDER_NAME: "test-provider", + GenAI.GEN_AI_EMBEDDINGS_DIMENSION_COUNT: 1536, + GenAI.GEN_AI_USAGE_INPUT_TOKENS: 7, + GenAI.GEN_AI_RESPONSE_MODEL: "embed-response-model", + server_attributes.SERVER_ADDRESS: "embed.example.com", + server_attributes.SERVER_PORT: 443, + "custom_embed_attr": "value", + error_attributes.ERROR_TYPE: BoomError.__qualname__, + }, ) - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [ - _create_output_message("default response") - ] - self.telemetry_handler.stop_llm(invocation) - - # Check that no event was emitted (NO_CONTENT defaults to False) - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 0) - @patch_env_vars( stability_mode="gen_ai_latest_experimental", content_capturing="SPAN_ONLY", emit_event="", ) - def test_does_not_emit_llm_event_by_default_for_span_only(self): - """Test that event is not emitted by default when content_capturing is SPAN_ONLY and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" - invocation = LLMInvocation( - request_model="default-model", - input_messages=[_create_input_message("default test")], - provider="test-provider", - ) - - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [ - _create_output_message("default response") - ] - self.telemetry_handler.stop_llm(invocation) - - # Check that no event was emitted (SPAN_ONLY defaults to False) - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 0) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="EVENT_ONLY", - emit_event="", - ) - def test_emits_llm_event_by_default_for_event_only(self): - """Test that event is emitted by default when content_capturing is EVENT_ONLY and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" - invocation = LLMInvocation( - request_model="default-model", - input_messages=[_create_input_message("default test")], - provider="test-provider", - ) - - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [ - _create_output_message("default response") - ] - self.telemetry_handler.stop_llm(invocation) - - # Check that event was emitted (EVENT_ONLY defaults to True) - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 1) - log_record = logs[0].log_record - self.assertEqual( - log_record.event_name, "gen_ai.client.inference.operation.details" - ) - - @patch_env_vars( - stability_mode="gen_ai_latest_experimental", - content_capturing="SPAN_AND_EVENT", - emit_event="", - ) - def test_emits_llm_event_by_default_for_span_and_event(self): - """Test that event is emitted by default when content_capturing is SPAN_AND_EVENT and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" - message = _create_input_message("span and event test") - chat_generation = _create_output_message("span and event response") - system_instruction = _create_system_instruction("System prompt") - - invocation = LLMInvocation( - request_model="span-and-event-model", - input_messages=[message], - system_instruction=system_instruction, + def test_embedding_manual_start_and_stop_creates_span(self): + invocation = EmbeddingInvocation( + request_model="embed-model", provider="test-provider", + dimension_count=1536, + encoding_formats=["float"], + input_tokens=123, + server_address="custom.server.com", + server_port=42, + attributes={"custom_embed_attr": "value"}, ) - self.telemetry_handler.start_llm(invocation) - invocation.output_messages = [chat_generation] - self.telemetry_handler.stop_llm(invocation) + self.telemetry_handler.start(invocation) + assert invocation.span is not None + invocation.attributes.update({"extra_embed": "info"}) + invocation.metric_attributes = {"should not be on span": "value"} + self.telemetry_handler.stop(invocation) - # Check span was created span = _get_single_span(self.span_exporter) - span_attrs = _get_span_attributes(span) - self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) + self.assertEqual(span.name, "embeddings embed-model") + self.assertEqual(span.kind, trace.SpanKind.CLIENT) + _assert_span_time_order(span) - # Check that event was emitted (SPAN_AND_EVENT defaults to True) - logs = self.log_exporter.get_finished_logs() - self.assertEqual(len(logs), 1) - log_record = logs[0].log_record - self.assertEqual( - log_record.event_name, "gen_ai.client.inference.operation.details" + attrs = _get_span_attributes(span) + _assert_span_attributes( + attrs, + { + GenAI.GEN_AI_OPERATION_NAME: "embeddings", + GenAI.GEN_AI_REQUEST_MODEL: "embed-model", + GenAI.GEN_AI_PROVIDER_NAME: "test-provider", + GenAI.GEN_AI_EMBEDDINGS_DIMENSION_COUNT: 1536, + GenAI.GEN_AI_REQUEST_ENCODING_FORMATS: ("float",), + GenAI.GEN_AI_USAGE_INPUT_TOKENS: 123, + server_attributes.SERVER_ADDRESS: "custom.server.com", + server_attributes.SERVER_PORT: 42, + "custom_embed_attr": "value", + "extra_embed": "info", + }, ) - self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes) class AnyNonNone: diff --git a/util/opentelemetry-util-genai/tests/test_utils_events.py b/util/opentelemetry-util-genai/tests/test_utils_events.py new file mode 100644 index 0000000000..20b3300c62 --- /dev/null +++ b/util/opentelemetry-util-genai/tests/test_utils_events.py @@ -0,0 +1,380 @@ +# Copyright The OpenTelemetry Authors +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import unittest + +from opentelemetry.sdk._logs import LoggerProvider +from opentelemetry.sdk._logs.export import ( + InMemoryLogExporter, + SimpleLogRecordProcessor, +) +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) +from opentelemetry.semconv._incubating.attributes import ( + gen_ai_attributes as GenAI, +) +from opentelemetry.semconv.attributes import error_attributes +from opentelemetry.util.genai.handler import get_telemetry_handler +from opentelemetry.util.genai.types import Error, LLMInvocation + +from .test_utils import ( + _create_input_message, + _create_output_message, + _create_system_instruction, + _get_single_span, + _get_span_attributes, + _normalize_to_dict, + _normalize_to_list, + patch_env_vars, +) + + +class TestTelemetryHandlerEvents(unittest.TestCase): + def setUp(self): + self.span_exporter = InMemorySpanExporter() + tracer_provider = TracerProvider() + tracer_provider.add_span_processor( + SimpleSpanProcessor(self.span_exporter) + ) + self.log_exporter = InMemoryLogExporter() + logger_provider = LoggerProvider() + logger_provider.add_log_record_processor( + SimpleLogRecordProcessor(self.log_exporter) + ) + self.telemetry_handler = get_telemetry_handler( + tracer_provider=tracer_provider, logger_provider=logger_provider + ) + + def tearDown(self): + self.span_exporter.clear() + self.log_exporter.clear() + if hasattr(get_telemetry_handler, "_default_handler"): + delattr(get_telemetry_handler, "_default_handler") + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="true", + ) + def test_emits_llm_event(self): + invocation = LLMInvocation( + request_model="event-model", + input_messages=[_create_input_message("test query")], + system_instruction=_create_system_instruction(), + provider="test-provider", + temperature=0.7, + max_tokens=100, + response_model_name="response-model", + response_id="event-response-id", + input_tokens=10, + output_tokens=20, + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [_create_output_message("test response")] + self.telemetry_handler.stop_llm(invocation) + + # Check that event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + + # Verify event name + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + + # Verify event attributes + attrs = log_record.attributes + self.assertIsNotNone(attrs) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "event-model") + self.assertEqual(attrs[GenAI.GEN_AI_PROVIDER_NAME], "test-provider") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_TEMPERATURE], 0.7) + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MAX_TOKENS], 100) + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_MODEL], "response-model") + self.assertEqual(attrs[GenAI.GEN_AI_RESPONSE_ID], "event-response-id") + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_INPUT_TOKENS], 10) + self.assertEqual(attrs[GenAI.GEN_AI_USAGE_OUTPUT_TOKENS], 20) + + # Verify messages are in structured format (not JSON string) + # OpenTelemetry may convert lists to tuples, so we normalize + input_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_INPUT_MESSAGES])[0] + ) + self.assertEqual(input_msg["role"], "Human") + self.assertEqual( + _normalize_to_list(input_msg["parts"])[0]["content"], "test query" + ) + + output_msg = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_OUTPUT_MESSAGES])[0] + ) + self.assertEqual(output_msg["role"], "AI") + self.assertEqual( + _normalize_to_list(output_msg["parts"])[0]["content"], + "test response", + ) + self.assertEqual(output_msg["finish_reason"], "stop") + + # Verify system instruction is present in event in structured format + sys_instr = _normalize_to_dict( + _normalize_to_list(attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS])[0] + ) + self.assertEqual(sys_instr["content"], "You are a helpful assistant.") + self.assertEqual(sys_instr["type"], "text") + + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_AND_EVENT", + emit_event="true", + ) + def test_emits_llm_event_and_span(self): + message = _create_input_message("combined test") + chat_generation = _create_output_message("combined response") + system_instruction = _create_system_instruction("System prompt here") + + invocation = LLMInvocation( + request_model="combined-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check span was created + span = _get_single_span(self.span_exporter) + span_attrs = _get_span_attributes(span) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes) + # Verify system instruction in both span and event + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, span_attrs) + span_system = json.loads(span_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS]) + self.assertEqual(span_system[0]["content"], "System prompt here") + event_attrs = log_record.attributes + self.assertIn(GenAI.GEN_AI_SYSTEM_INSTRUCTIONS, event_attrs) + event_system = event_attrs[GenAI.GEN_AI_SYSTEM_INSTRUCTIONS] + event_system_list = ( + list(event_system) + if isinstance(event_system, tuple) + else event_system + ) + event_sys_instr = ( + dict(event_system_list[0]) + if isinstance(event_system_list[0], tuple) + else event_system_list[0] + ) + self.assertEqual(event_sys_instr["content"], "System prompt here") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="true", + ) + def test_emits_llm_event_with_error(self): + class TestError(RuntimeError): + pass + + message = _create_input_message("error test") + invocation = LLMInvocation( + request_model="error-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + error = Error(message="Test error occurred", type=TestError) + self.telemetry_handler.fail_llm(invocation, error) + + # Check event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + attrs = log_record.attributes + + # Verify error attribute is present + self.assertEqual( + attrs[error_attributes.ERROR_TYPE], TestError.__qualname__ + ) + self.assertEqual(attrs[GenAI.GEN_AI_OPERATION_NAME], "chat") + self.assertEqual(attrs[GenAI.GEN_AI_REQUEST_MODEL], "error-model") + # Verify event context matches span context + span = _get_single_span(self.span_exporter) + self.assertIsNotNone(log_record.trace_id) + self.assertIsNotNone(log_record.span_id) + self.assertIsNotNone(span.context) + self.assertEqual(log_record.trace_id, span.context.trace_id) + self.assertEqual(log_record.span_id, span.context.span_id) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="false", + ) + def test_does_not_emit_llm_event_when_emit_event_false(self): + message = _create_input_message("emit false test") + chat_generation = _create_output_message("emit false response") + + invocation = LLMInvocation( + request_model="emit-false-model", + input_messages=[message], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check no event was emitted + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="NO_CONTENT", + emit_event="", + ) + def test_does_not_emit_llm_event_by_default_for_no_content(self): + """Test that event is not emitted by default when content_capturing is NO_CONTENT and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" + invocation = LLMInvocation( + request_model="default-model", + input_messages=[_create_input_message("default test")], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [ + _create_output_message("default response") + ] + self.telemetry_handler.stop_llm(invocation) + + # Check that no event was emitted (NO_CONTENT defaults to False) + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_ONLY", + emit_event="", + ) + def test_does_not_emit_llm_event_by_default_for_span_only(self): + """Test that event is not emitted by default when content_capturing is SPAN_ONLY and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" + invocation = LLMInvocation( + request_model="default-model", + input_messages=[_create_input_message("default test")], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [ + _create_output_message("default response") + ] + self.telemetry_handler.stop_llm(invocation) + + # Check that no event was emitted (SPAN_ONLY defaults to False) + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 0) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="EVENT_ONLY", + emit_event="", + ) + def test_emits_llm_event_by_default_for_event_only(self): + """Test that event is emitted by default when content_capturing is EVENT_ONLY and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" + invocation = LLMInvocation( + request_model="default-model", + input_messages=[_create_input_message("default test")], + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [ + _create_output_message("default response") + ] + self.telemetry_handler.stop_llm(invocation) + + # Check that event was emitted (EVENT_ONLY defaults to True) + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + + @patch_env_vars( + stability_mode="gen_ai_latest_experimental", + content_capturing="SPAN_AND_EVENT", + emit_event="", + ) + def test_emits_llm_event_by_default_for_span_and_event(self): + """Test that event is emitted by default when content_capturing is SPAN_AND_EVENT and OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT is not set.""" + message = _create_input_message("span and event test") + chat_generation = _create_output_message("span and event response") + system_instruction = _create_system_instruction("System prompt") + + invocation = LLMInvocation( + request_model="span-and-event-model", + input_messages=[message], + system_instruction=system_instruction, + provider="test-provider", + ) + + self.telemetry_handler.start_llm(invocation) + invocation.output_messages = [chat_generation] + self.telemetry_handler.stop_llm(invocation) + + # Check span was created + span = _get_single_span(self.span_exporter) + span_attrs = _get_span_attributes(span) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, span_attrs) + + # Check that event was emitted (SPAN_AND_EVENT defaults to True) + logs = self.log_exporter.get_finished_logs() + self.assertEqual(len(logs), 1) + log_record = logs[0].log_record + self.assertEqual( + log_record.event_name, "gen_ai.client.inference.operation.details" + ) + self.assertIn(GenAI.GEN_AI_INPUT_MESSAGES, log_record.attributes)