diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md index 433736f9..b833f45d 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Added + +- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — Global alternative to + the per-request `suppress_language_model_instrumentation` OTel context key + (the env var is the uppercase form of the same string). When set to a truthy + value (`true`, `1`, `yes`, `on`), the openai-v2 instrumentor skips creating + spans entirely. Intended for zero-code deployments alongside + `OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai`. +- Add `gen_ai.tool.definitions` attribute on LLM spans when + `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true` and + `OTEL_INSTRUMENTATION_GENAI_CAPTURE_TOOL_DEFINITIONS=true` +- Add `gen_ai.request.stream` attribute for streaming requests +- Add `gen_ai.response.time_to_first_chunk` attribute and metric for streaming requests + ### Fixed - Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when @@ -19,17 +33,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 attributes to the underlying stream. Inspired by upstream fix ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184), fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)). - -### Added - -- Add `gen_ai.tool.definitions` attribute on LLM spans when - `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true` and - `OTEL_INSTRUMENTATION_GENAI_CAPTURE_TOOL_DEFINITIONS=true` -- Add `gen_ai.request.stream` attribute for streaming requests -- Add `gen_ai.response.time_to_first_chunk` attribute and metric for streaming requests - -### Fixed - - Fix PyPI badge, install command, and references in README.rst to use correct `splunk-otel-instrumentation-openai` package name instead of upstream - Fix project URLs in pyproject.toml to point to SDOT repo (`signalfx/splunk-otel-python-contrib`) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index 48b38951..e2cf8b2e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -15,6 +15,7 @@ import asyncio import inspect +import os import timeit from typing import Any, Iterable, Optional @@ -59,6 +60,25 @@ ) +def _is_instrumentation_suppressed() -> bool: + """Return True when OpenAI spans should be skipped. + + Checks two surfaces for the suppression signal: + 1. OTel context key — set per-request by the LangChain instrumentor's + ``_OpenAITracingWrapper`` to prevent duplicate LLM spans when both + instrumentors are active simultaneously. + 2. Environment variable ``SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION`` — + set globally (e.g. in zero-code deployments) together with + ``OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai``. + """ + if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + return True + raw = os.environ.get( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper(), "" + ) + return raw.strip().lower() in ("true", "1", "yes", "on") + + def _normalize_stop_sequences(stop_values: Any) -> list[str]: if stop_values is None: return [] @@ -394,7 +414,7 @@ def chat_completions_create(capture_content: bool, handler): def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return wrapped(*args, **kwargs) span_attributes = {**get_llm_request_attributes(kwargs, instance)} @@ -449,7 +469,7 @@ def async_chat_completions_create(capture_content: bool, handler): async def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return await wrapped(*args, **kwargs) span_attributes = {**get_llm_request_attributes(kwargs, instance)} @@ -504,7 +524,7 @@ def embeddings_create(capture_content: bool, handler): def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return wrapped(*args, **kwargs) span_attributes = get_llm_request_attributes( @@ -553,7 +573,7 @@ def async_embeddings_create(capture_content: bool, handler): async def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return await wrapped(*args, **kwargs) span_attributes = get_llm_request_attributes( diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml new file mode 100644 index 00000000..2abb443f --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml @@ -0,0 +1,134 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.54.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.54.3 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q", + "object": "chat.completion", + "created": 1731368630, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 12, + "completion_tokens": 5, + "total_tokens": 17, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e122593ff368bc8-SIN + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 11 Nov 2024 23:43:50 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '765' + openai-organization: test_openai_org_id + openai-processing-ms: + - '287' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199977' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_58cff97afd0e7c0bba910ccf0b044a6f + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml new file mode 100644 index 00000000..2abb443f --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml @@ -0,0 +1,134 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.54.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.54.3 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q", + "object": "chat.completion", + "created": 1731368630, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 12, + "completion_tokens": 5, + "total_tokens": 17, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e122593ff368bc8-SIN + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 11 Nov 2024 23:43:50 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '765' + openai-organization: test_openai_org_id + openai-processing-ms: + - '287' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199977' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_58cff97afd0e7c0bba910ccf0b044a6f + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py index e57001bc..221dbd17 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py @@ -13,8 +13,14 @@ # limitations under the License. """ -Tests for SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY functionality. -This prevents duplicate telemetry when multiple instrumentations (e.g., LangChain + OpenAI) are active. +Tests for suppression of OpenAI instrumentation. + +Covers two suppression surfaces: +1. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY context key — set per-request + by the LangChain instrumentor to prevent duplicate LLM spans when both + instrumentors are active simultaneously. +2. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION environment variable — set globally + for zero-code deployments alongside OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai. """ import pytest @@ -24,6 +30,11 @@ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, ) +# The env var name is the uppercase form of the context key string. +SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR = ( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper() +) + @pytest.mark.vcr() def test_chat_completion_suppressed( @@ -144,3 +155,81 @@ def test_chat_completion_not_suppressed_by_default( # Should have at least the main chat completion span chat_spans = [s for s in spans if "chat" in s.name.lower()] assert len(chat_spans) > 0 + + +# --------------------------------------------------------------------------- +# Environment variable suppression +# --------------------------------------------------------------------------- + + +@pytest.mark.vcr() +def test_chat_completion_suppressed_via_env_var( + monkeypatch, span_exporter, openai_client, instrument_with_content +): + """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=true suppresses spans globally.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "true") + + response = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + assert response is not None + + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 + + +@pytest.mark.vcr() +def test_chat_completion_not_suppressed_when_env_var_false( + monkeypatch, span_exporter, openai_client, instrument_with_content +): + """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=false leaves instrumentation active.""" + monkeypatch.setenv( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "false" + ) + + response = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + assert response is not None + + spans = span_exporter.get_finished_spans() + assert len(spans) > 0 + + +@pytest.mark.parametrize("value", ["true", "1", "yes", "on", "TRUE"]) +def test_env_var_truthy_values_suppress(monkeypatch, value): + """All truthy spellings of the env var are recognised.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is True + + +@pytest.mark.parametrize("value", ["false", "0", "no", "off", "FALSE", ""]) +def test_env_var_falsey_values_do_not_suppress(monkeypatch, value): + """Falsey env var spellings leave instrumentation active.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is False + + +def test_env_var_unset_does_not_suppress(monkeypatch): + """When the env var is absent, instrumentation is active by default.""" + monkeypatch.delenv( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, raising=False + ) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is False diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 6b6571fd..d3cecb5a 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -6,6 +6,12 @@ All notable changes to this repository are documented in this file. ### Added +- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — The uppercase form of + `SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY` is now also checked as an + environment variable by the openai-v2 instrumentor, allowing zero-code + deployments to set suppression globally without the LangChain instrumentor + injecting the context key per-request. No new constant added. + - **`OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT` env var** — Explicit override for content event emission (`true`/`false`). When unset, defaults are derived from `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` mode. ### Changed diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py index 375b7b7a..03e71e9b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py @@ -95,7 +95,11 @@ GEN_AI_SECURITY_EVENT_ID = "gen_ai.security.event_id" # Context key for suppressing instrumentation to avoid duplicate telemetry -# when multiple instrumentations (e.g., LangChain + OpenAI) are active +# when multiple instrumentations (e.g., LangChain + OpenAI) are active. +# The uppercase form of this string is also checked as an environment variable +# (SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION) so zero-code deployments can set +# suppression globally without the LangChain instrumentor injecting it +# per-request. Use together with OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY = ( "suppress_language_model_instrumentation" )