From c092f2029256a3b72000d27de24c4e58caf8328f Mon Sep 17 00:00:00 2001 From: adityamehra Date: Wed, 13 May 2026 16:34:15 -0700 Subject: [PATCH 01/10] feat(langchain): Release version 0.1.14 --- RELEASING.md | 6 +++--- .../opentelemetry-instrumentation-langchain/CHANGELOG.md | 4 ++++ .../src/opentelemetry/instrumentation/langchain/version.py | 2 +- 3 files changed, 8 insertions(+), 4 deletions(-) diff --git a/RELEASING.md b/RELEASING.md index cc76acdd..1dbceac6 100644 --- a/RELEASING.md +++ b/RELEASING.md @@ -10,19 +10,19 @@ How to release a new version of the GenAI packages: | Package Name | Path | Current Version | Tag Format | |-------------|------|-----------------|-------------------------------------| -| splunk-otel-util-genai | `util/opentelemetry-util-genai` | 0.1.11 | `util-genai-v0.Y.Z` | +| splunk-otel-util-genai | `util/opentelemetry-util-genai` | 0.1.11 | `util-genai-v0.Y.Z` | | splunk-otel-util-genai-evals | `util/opentelemetry-util-genai-evals` | 0.1.8 | `util-genai-evals-v0.Y.Z` | | splunk-otel-genai-emitters-splunk | `util/opentelemetry-util-genai-emitters-splunk` | 0.1.8 | `genai-emitters-splunk-v0.Y.Z` | | splunk-otel-genai-evals-deepeval | `util/opentelemetry-util-genai-evals-deepeval` | 0.1.14 | `genai-evals-deepeval-v0.Y.Z` | | splunk-otel-util-genai-translator-traceloop | `util/opentelemetry-util-genai-traceloop-translator` | 0.1.8 | `genai-translator-traceloop-v0.Y.Z` | | splunk-otel-util-genai-translator-langsmith | `util/opentelemetry-util-genai-langsmith-translator` | 0.1.1 | `genai-translator-langsmith-v0.Y.Z` | | splunk-otel-util-genai-translator-openlit | `util/opentelemetry-util-genai-openlit-translator` | 0.1.2 | `genai-translator-openlit-v0.Y.Z` | -| splunk-otel-instrumentation-langchain | `instrumentation-genai/opentelemetry-instrumentation-langchain` | 0.1.9 | `instrumentation-langchain-v0.Y.Z` | +| splunk-otel-instrumentation-langchain | `instrumentation-genai/opentelemetry-instrumentation-langchain` | 0.1.14 | `instrumentation-langchain-v0.Y.Z` | | splunk-otel-instrumentation-llamaindex | `instrumentation-genai/opentelemetry-instrumentation-llamaindex` | 0.1.1 | `instrumentation-llamaindex-v0.Y.Z` | | splunk-otel-instrumentation-aidefense | `instrumentation-genai/opentelemetry-instrumentation-aidefense` | 0.2.1 | `instrumentation-aidefense-v0.Y.Z` | | splunk-otel-instrumentation-weaviate | `instrumentation-genai/opentelemetry-instrumentation-weaviate` | 0.1.0 | `instrumentation-weaviate-v0.Y.Z` | | splunk-otel-instrumentation-crewai | `instrumentation-genai/opentelemetry-instrumentation-crewai` | 0.1.3 | `instrumentation-crewai-v0.Y.Z` | -| splunk-otel-instrumentation-openai-agents | `instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2` | 0.1.3 | `instrumentation-openai-agents-v0.Y.Z` | +| splunk-otel-instrumentation-openai-agents | `instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2` | 0.1.3 | `instrumentation-openai-agents-v0.Y.Z` | | splunk-otel-instrumentation-fastmcp | `instrumentation-genai/opentelemetry-instrumentation-fastmcp` | 0.1.1 | `instrumentation-fastmcp-v0.Y.Z` | ### Release Steps diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md index 30f8e18b..0a18648c 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md @@ -2,6 +2,10 @@ All notable changes to this repository are documented in this file. +## Version 0.1.14 + +### Bump version for release + ## Version 0.1.13 ### Fixed diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py index 3cb7d95e..fb69db9c 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py +++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py @@ -1 +1 @@ -__version__ = "0.1.13" +__version__ = "0.1.14" From 6b2da1914b02829870d7cb0b657d4716735d4751 Mon Sep 17 00:00:00 2001 From: adityamehra Date: Mon, 18 May 2026 09:10:57 -0700 Subject: [PATCH 02/10] fix(openai-v2): add __getattr__ to StreamWrapper to proxy unknown attributes StreamWrapper was missing __getattr__, causing AttributeError when LiteLLM (and other clients) access raw_response.headers after calling with_raw_response.create(stream=True). Ports the fix from upstream opentelemetry-python-contrib PR #4184 (fixes issue #4113). Co-authored-by: Cursor --- .../instrumentation/openai_v2/patch.py | 3 +++ .../tests/test_patch_unit.py | 19 +++++++++++++++++++ 2 files changed, 22 insertions(+) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index b3a04dc5..5c9e2eec 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -768,6 +768,9 @@ def __init__( self._first_chunk_processed = False self.setup() + def __getattr__(self, name: str): + return getattr(self.stream, name) + def setup(self): if not self._span_started: self._span_started = True diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py index 8e319357..49239b71 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py @@ -202,6 +202,25 @@ def test_time_to_first_chunk_only_captured_once(self): "time_to_first_chunk should not change after first chunk" ) + def test_getattr_proxies_unknown_attributes_to_stream(self): + """Test that unknown attributes (e.g. .headers) are proxied to the + underlying stream, fixing the AttributeError raised by LiteLLM when + accessing raw_response.headers after with_raw_response streaming. + Regression test for https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113 + """ + invocation = LLMInvocation(request_model="gpt-4o") + mock_stream = MagicMock() + mock_stream.headers = {"content-type": "application/json"} + mock_handler = MagicMock() + + wrapper = StreamWrapper( + stream=mock_stream, + invocation=invocation, + handler=mock_handler, + ) + + assert wrapper.headers == {"content-type": "application/json"} + def test_time_to_first_chunk_not_captured_without_start_time(self): """Test that time_to_first_chunk is not captured without _start_time.""" invocation = LLMInvocation(request_model="gpt-4o") From ec35675e7b5f75d40f3e2549325298d97de2148b Mon Sep 17 00:00:00 2001 From: adityamehra Date: Mon, 18 May 2026 11:04:05 -0700 Subject: [PATCH 03/10] docs(openai-v2): add changelog entry for StreamWrapper __getattr__ fix Co-authored-by: Cursor --- .../opentelemetry-instrumentation-openai-v2/CHANGELOG.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md index 029f8ad5..5506a9f0 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md @@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## Unreleased +### Fixed + +- Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when + using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider). + `StreamWrapper` now proxies unknown attribute lookups to the underlying stream via + `__getattr__`. Ports upstream fix + ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184), + fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)). + ### Added - Add `gen_ai.tool.definitions` attribute on LLM spans when From 7173333e73a1665bf8ddfb3429674065a60febc4 Mon Sep 17 00:00:00 2001 From: adityamehra Date: Mon, 18 May 2026 22:14:55 -0700 Subject: [PATCH 04/10] fix(openai-v2): preserve LegacyAPIResponse headers on StreamWrapper for with_raw_response streaming When LiteLLM calls with_raw_response.create(stream=True), the OpenAI SDK returns a LegacyAPIResponse (with .headers). SDOT's _parse_response was calling .parse() on it to get the AsyncStream before wrapping in StreamWrapper, discarding the headers. LiteLLM then accesses raw_response.headers, which failed with AttributeError since AsyncStream has no .headers attribute. Fix: - Capture LegacyAPIResponse.headers before _parse_response discards it - Store captured headers as StreamWrapper.headers (direct attribute, not proxied) - Add parse() returning self so callers can treat StreamWrapper as a raw response - Keep __getattr__ for any other unknown attribute proxying to the underlying stream Co-authored-by: Cursor --- .../CHANGELOG.md | 8 +++- .../instrumentation/openai_v2/patch.py | 12 ++++++ .../tests/test_patch_unit.py | 37 ++++++++++++++++--- 3 files changed, 50 insertions(+), 7 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md index 5506a9f0..f24877e3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md @@ -11,8 +11,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider). - `StreamWrapper` now proxies unknown attribute lookups to the underlying stream via - `__getattr__`. Ports upstream fix + `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping + in `StreamWrapper`, discarding the raw HTTP headers. `StreamWrapper` now captures + headers from the `LegacyAPIResponse` before it is parsed and exposes them directly, + and adds a `parse()` method returning `self` so callers can treat the wrapper as + a drop-in for the raw response. Also adds `__getattr__` to proxy any other unknown + attributes to the underlying stream. Inspired by upstream fix ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184), fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)). diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index 5c9e2eec..48b38951 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -418,12 +418,14 @@ def traced_method(wrapped, instance, args, kwargs): raise try: + raw_headers = getattr(result, "headers", None) parsed_result = _parse_response(result) if is_streaming(kwargs): return StreamWrapper( parsed_result, invocation, handler, + raw_headers=raw_headers, ) if span and span.is_recording(): @@ -471,12 +473,14 @@ async def traced_method(wrapped, instance, args, kwargs): raise try: + raw_headers = getattr(result, "headers", None) parsed_result = _parse_response(result) if is_streaming(kwargs): return StreamWrapper( parsed_result, invocation, handler, + raw_headers=raw_headers, ) if span and span.is_recording(): @@ -755,11 +759,13 @@ def __init__( stream: Stream, invocation: LLMInvocation, handler, + raw_headers=None, ): self.stream = stream self.invocation = invocation self.span = getattr(invocation, "span", None) self.handler = handler + self.headers = raw_headers self.choice_buffers = [] self.finish_reasons = [] # Instance-level to avoid cross-request contamination self._span_started = False @@ -771,6 +777,12 @@ def __init__( def __getattr__(self, name: str): return getattr(self.stream, name) + def parse(self): + """Proxy for with_raw_response callers (e.g. LiteLLM) that call + .parse() on the result of with_raw_response.create(stream=True). + Returns self so the caller can iterate the stream directly.""" + return self + def setup(self): if not self._span_started: self._span_started = True diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py index 49239b71..146217ae 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py @@ -202,15 +202,42 @@ def test_time_to_first_chunk_only_captured_once(self): "time_to_first_chunk should not change after first chunk" ) + def test_raw_response_headers_stored_from_legacy_api_response(self): + """Test the real production scenario: LiteLLM calls with_raw_response.create(stream=True), + which makes the OpenAI SDK return a LegacyAPIResponse. SDOT's _parse_response calls + .parse() on it to get the AsyncStream — discarding .headers. StreamWrapper must + preserve the headers captured before _parse_response so LiteLLM can access them. + + Regression test for the AttributeError seen in production: + File "litellm/llms/azure/azure.py", line 176 + headers = dict(raw_response.headers) + AttributeError: 'StreamWrapper' object has no attribute 'headers' + """ + invocation = LLMInvocation(request_model="gpt-4o") + mock_stream = MagicMock(spec=[]) # AsyncStream has no .headers + mock_handler = MagicMock() + raw_headers = {"content-type": "text/event-stream", "x-request-id": "abc123"} + + wrapper = StreamWrapper( + stream=mock_stream, + invocation=invocation, + handler=mock_handler, + raw_headers=raw_headers, + ) + + # LiteLLM: headers = dict(raw_response.headers) + assert wrapper.headers == raw_headers + # LiteLLM: response = raw_response.parse() + assert wrapper.parse() is wrapper + def test_getattr_proxies_unknown_attributes_to_stream(self): - """Test that unknown attributes (e.g. .headers) are proxied to the - underlying stream, fixing the AttributeError raised by LiteLLM when - accessing raw_response.headers after with_raw_response streaming. + """Test that unknown attributes on StreamWrapper are proxied to the + underlying stream object. Regression test for https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113 """ invocation = LLMInvocation(request_model="gpt-4o") mock_stream = MagicMock() - mock_stream.headers = {"content-type": "application/json"} + mock_stream.some_custom_attr = "value" mock_handler = MagicMock() wrapper = StreamWrapper( @@ -219,7 +246,7 @@ def test_getattr_proxies_unknown_attributes_to_stream(self): handler=mock_handler, ) - assert wrapper.headers == {"content-type": "application/json"} + assert wrapper.some_custom_attr == "value" def test_time_to_first_chunk_not_captured_without_start_time(self): """Test that time_to_first_chunk is not captured without _start_time.""" From 044cf8b9487b85db2a11aaf543e63daf0651ab8b Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 11:34:22 -0700 Subject: [PATCH 05/10] style: apply ruff formatting to test_patch_unit.py Co-authored-by: Cursor --- .../tests/test_patch_unit.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py index 146217ae..6745238b 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py @@ -216,7 +216,10 @@ def test_raw_response_headers_stored_from_legacy_api_response(self): invocation = LLMInvocation(request_model="gpt-4o") mock_stream = MagicMock(spec=[]) # AsyncStream has no .headers mock_handler = MagicMock() - raw_headers = {"content-type": "text/event-stream", "x-request-id": "abc123"} + raw_headers = { + "content-type": "text/event-stream", + "x-request-id": "abc123", + } wrapper = StreamWrapper( stream=mock_stream, From 2706482c318ace0872dbd1b2abfc84a6db807766 Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 12:20:55 -0700 Subject: [PATCH 06/10] test(openai-v2): add standalone reproducer for with_raw_response streaming headers bug Reproduces the production error reported in lab0: AttributeError: 'StreamWrapper' object has no attribute 'headers' Mirrors the pattern from upstream issues: #4032 - StreamWrapper missing .parse() #4113 - StreamWrapper missing .headers Co-authored-by: Cursor --- .../reproduce_raw_response_streaming.py | 149 ++++++++++++++++++ 1 file changed, 149 insertions(+) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py new file mode 100644 index 00000000..9bf44fd8 --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py @@ -0,0 +1,149 @@ +""" +Reproducer for: StreamWrapper missing .headers when LiteLLM calls +with_raw_response.create(stream=True). + +Production error (lab0, 2026-05-15): + File "litellm/llms/azure/azure.py", line 176 + headers = dict(raw_response.headers) + AttributeError: 'StreamWrapper' object has no attribute 'headers' + +LiteLLM's Azure provider always calls: + raw_response = await azure_client.chat.completions.with_raw_response.create(...) + headers = dict(raw_response.headers) # <-- fails when SDOT is active + response = raw_response.parse() # <-- also fails without parse() + +Related upstream issues: + #4032 - StreamWrapper missing .parse() (fixed) + #4113 - StreamWrapper missing .headers (fixed upstream, not yet in SDOT) + +Run: + pip install openai opentelemetry-sdk splunk-otel-instrumentation-openai + python reproduce_raw_response_streaming.py +""" + +import asyncio +import json +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +from openai import AsyncAzureOpenAI + +from opentelemetry.instrumentation.openai_v2 import OpenAIInstrumentor +from opentelemetry.sdk.trace import TracerProvider +from opentelemetry.sdk.trace.export import SimpleSpanProcessor +from opentelemetry.sdk.trace.export.in_memory_span_exporter import ( + InMemorySpanExporter, +) + +# --------------------------------------------------------------------------- +# Minimal SSE streaming response that mimics Azure OpenAI +# --------------------------------------------------------------------------- +SSE_CHUNKS = [ + b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n', + b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n', + b"data: [DONE]\n\n", +] + + +def _make_mock_httpx_response() -> httpx.Response: + """Return a minimal mock httpx.Response with headers and a streaming body. + + The request headers must include X-Stainless-Raw-Response: true so the + OpenAI SDK returns LegacyAPIResponse (sync .parse()) instead of + AsyncAPIResponse (async .parse()). SDOT's _parse_response calls .parse() + synchronously, so it must be LegacyAPIResponse. + """ + response_headers = { + "content-type": "text/event-stream", + "x-request-id": "test-request-id-abc123", + "openai-model": "gpt-4o", + } + # RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response" — must be "true" so the + # OpenAI SDK wraps the response in LegacyAPIResponse (sync .parse()). + request_headers = httpx.Headers({"X-Stainless-Raw-Response": "true"}) + + async def aiter_bytes(_chunk_size=None): + for chunk in SSE_CHUNKS: + yield chunk + + mock_request = MagicMock(spec=httpx.Request) + mock_request.headers = request_headers + + mock_response = MagicMock(spec=httpx.Response) + mock_response.status_code = 200 + mock_response.headers = httpx.Headers(response_headers) + mock_response.aiter_bytes = aiter_bytes + mock_response.aclose = AsyncMock() + mock_response.request = mock_request + mock_response.http_version = "HTTP/1.1" + mock_response.elapsed = MagicMock() + return mock_response + + +# --------------------------------------------------------------------------- +# Reproducer +# --------------------------------------------------------------------------- +async def reproducer(): + # Set up OTel + exporter = InMemorySpanExporter() + provider = TracerProvider() + provider.add_span_processor(SimpleSpanProcessor(exporter)) + + instrumentor = OpenAIInstrumentor() + instrumentor.instrument(tracer_provider=provider) + + client = AsyncAzureOpenAI( + api_key="test-key", + azure_endpoint="https://test.openai.azure.com", + api_version="2024-02-15-preview", + ) + + mock_httpx_response = _make_mock_httpx_response() + + # Patch the underlying httpx send so no real network call is made + with patch.object( + client._client, + "send", + new_callable=AsyncMock, + return_value=mock_httpx_response, + ): + # This is exactly what LiteLLM's Azure provider does: + # https://github.com/BerriAI/litellm/blob/main/litellm/llms/azure/azure.py#L167-L176 + raw_response = await client.chat.completions.with_raw_response.create( + model="gpt-4o", + messages=[{"role": "user", "content": "Say hello"}], + max_tokens=10, + stream=True, + ) + + print(f"raw_response type: {type(raw_response).__name__}") + + # Step 1: LiteLLM accesses .headers — this is the line that crashed + headers = dict(raw_response.headers) + print(f"✓ raw_response.headers: {json.dumps(headers, indent=2)}") + + # Step 2: LiteLLM calls .parse() to get the stream + response = raw_response.parse() + print(f"✓ raw_response.parse() returned: {type(response).__name__}") + + # Step 3: iterate the stream + collected = [] + async for chunk in response: + for choice in chunk.choices: + if choice.delta.content: + collected.append(choice.delta.content) + + text = "".join(collected) + print(f"✓ streamed content: {text!r}") + + spans = exporter.get_finished_spans() + print(f"✓ OTel spans recorded: {len(spans)}") + for span in spans: + print(f" - {span.name}") + + instrumentor.uninstrument() + print("\nAll assertions passed — bug is fixed.") + + +if __name__ == "__main__": + asyncio.run(reproducer()) From 2df57a324daf6c7be475fba6793fca8d5d291718 Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 12:21:52 -0700 Subject: [PATCH 07/10] test(openai-v2): use make_azure_openai_chat_completion_request verbatim in reproducer Co-authored-by: Cursor --- .../reproduce_raw_response_streaming.py | 125 ++++++++++-------- 1 file changed, 70 insertions(+), 55 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py index 9bf44fd8..123d7ff9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py @@ -1,28 +1,30 @@ """ Reproducer for: StreamWrapper missing .headers when LiteLLM calls -with_raw_response.create(stream=True). +with_raw_response.create(stream=True) on an SDOT-instrumented Azure OpenAI client. -Production error (lab0, 2026-05-15): - File "litellm/llms/azure/azure.py", line 176 +Production traceback (lab0, 2026-05-15): + File "litellm/llms/azure/azure.py", line 619, in async_streaming + headers, response = await self.make_azure_openai_chat_completion_request(...) + File "litellm/llms/azure/azure.py", line 176, in make_azure_openai_chat_completion_request headers = dict(raw_response.headers) AttributeError: 'StreamWrapper' object has no attribute 'headers' -LiteLLM's Azure provider always calls: - raw_response = await azure_client.chat.completions.with_raw_response.create(...) - headers = dict(raw_response.headers) # <-- fails when SDOT is active - response = raw_response.parse() # <-- also fails without parse() +This reproducer calls make_azure_openai_chat_completion_request verbatim to +confirm the fix in SDOT's StreamWrapper resolves the crash. Related upstream issues: - #4032 - StreamWrapper missing .parse() (fixed) - #4113 - StreamWrapper missing .headers (fixed upstream, not yet in SDOT) + #4032 - StreamWrapper missing .parse() (fixed upstream) + #4113 - StreamWrapper missing .headers (fixed upstream via __getattr__, + but SDOT needed a deeper fix: preserve LegacyAPIResponse.headers + before _parse_response discards it) Run: - pip install openai opentelemetry-sdk splunk-otel-instrumentation-openai + pip install openai litellm opentelemetry-sdk splunk-otel-instrumentation-openai python reproduce_raw_response_streaming.py """ import asyncio -import json +import time from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -36,38 +38,36 @@ ) # --------------------------------------------------------------------------- -# Minimal SSE streaming response that mimics Azure OpenAI +# Minimal SSE streaming chunks that mimic Azure OpenAI # --------------------------------------------------------------------------- SSE_CHUNKS = [ - b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n', - b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n', + b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n', + b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n', b"data: [DONE]\n\n", ] def _make_mock_httpx_response() -> httpx.Response: - """Return a minimal mock httpx.Response with headers and a streaming body. + """Build a fake httpx.Response that the OpenAI SDK treats as a raw streaming response. - The request headers must include X-Stainless-Raw-Response: true so the - OpenAI SDK returns LegacyAPIResponse (sync .parse()) instead of - AsyncAPIResponse (async .parse()). SDOT's _parse_response calls .parse() - synchronously, so it must be LegacyAPIResponse. + The request must carry X-Stainless-Raw-Response: true so the OpenAI SDK + returns LegacyAPIResponse (sync .parse()) rather than AsyncAPIResponse + (async .parse()). SDOT's _parse_response calls .parse() synchronously. """ response_headers = { "content-type": "text/event-stream", "x-request-id": "test-request-id-abc123", "openai-model": "gpt-4o", + "ms-azureml-model-session": "d0", } - # RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response" — must be "true" so the - # OpenAI SDK wraps the response in LegacyAPIResponse (sync .parse()). - request_headers = httpx.Headers({"X-Stainless-Raw-Response": "true"}) async def aiter_bytes(_chunk_size=None): for chunk in SSE_CHUNKS: yield chunk mock_request = MagicMock(spec=httpx.Request) - mock_request.headers = request_headers + # RAW_RESPONSE_HEADER value that async_to_raw_response_wrapper injects + mock_request.headers = httpx.Headers({"X-Stainless-Raw-Response": "true"}) mock_response = MagicMock(spec=httpx.Response) mock_response.status_code = 200 @@ -80,11 +80,33 @@ async def aiter_bytes(_chunk_size=None): return mock_response +# --------------------------------------------------------------------------- +# Verbatim copy of LiteLLM's make_azure_openai_chat_completion_request +# (litellm/llms/azure/azure.py lines 154-179) +# --------------------------------------------------------------------------- +async def make_azure_openai_chat_completion_request(azure_client, data, timeout): + """ + Helper to: + - call chat.completions.create.with_raw_response when litellm.return_response_headers is True + - call chat.completions.create by default + """ + start_time = time.time() + try: + raw_response = await azure_client.chat.completions.with_raw_response.create( + **data, timeout=timeout + ) + + headers = dict(raw_response.headers) + response = raw_response.parse() + return headers, response + except Exception as e: + raise e + + # --------------------------------------------------------------------------- # Reproducer # --------------------------------------------------------------------------- async def reproducer(): - # Set up OTel exporter = InMemorySpanExporter() provider = TracerProvider() provider.add_span_processor(SimpleSpanProcessor(exporter)) @@ -92,57 +114,50 @@ async def reproducer(): instrumentor = OpenAIInstrumentor() instrumentor.instrument(tracer_provider=provider) - client = AsyncAzureOpenAI( + azure_client = AsyncAzureOpenAI( api_key="test-key", azure_endpoint="https://test.openai.azure.com", api_version="2024-02-15-preview", ) + data = { + "model": "gpt-4o", + "messages": [{"role": "user", "content": "Say hello"}], + "max_tokens": 10, + "stream": True, + } + mock_httpx_response = _make_mock_httpx_response() - # Patch the underlying httpx send so no real network call is made with patch.object( - client._client, + azure_client._client, "send", new_callable=AsyncMock, return_value=mock_httpx_response, ): - # This is exactly what LiteLLM's Azure provider does: - # https://github.com/BerriAI/litellm/blob/main/litellm/llms/azure/azure.py#L167-L176 - raw_response = await client.chat.completions.with_raw_response.create( - model="gpt-4o", - messages=[{"role": "user", "content": "Say hello"}], - max_tokens=10, - stream=True, + # This is the exact call that crashed in production + headers, response = await make_azure_openai_chat_completion_request( + azure_client=azure_client, + data=data, + timeout=60.0, ) - print(f"raw_response type: {type(raw_response).__name__}") - - # Step 1: LiteLLM accesses .headers — this is the line that crashed - headers = dict(raw_response.headers) - print(f"✓ raw_response.headers: {json.dumps(headers, indent=2)}") - - # Step 2: LiteLLM calls .parse() to get the stream - response = raw_response.parse() - print(f"✓ raw_response.parse() returned: {type(response).__name__}") + print(f"✓ headers accessible: {dict(headers)}") + print(f"✓ response type: {type(response).__name__}") - # Step 3: iterate the stream - collected = [] - async for chunk in response: - for choice in chunk.choices: - if choice.delta.content: - collected.append(choice.delta.content) + collected = [] + async for chunk in response: + for choice in chunk.choices: + if choice.delta.content: + collected.append(choice.delta.content) - text = "".join(collected) - print(f"✓ streamed content: {text!r}") + print(f"✓ streamed content: {''.join(collected)!r}") spans = exporter.get_finished_spans() - print(f"✓ OTel spans recorded: {len(spans)}") - for span in spans: - print(f" - {span.name}") + print(f"✓ OTel spans: {[s.name for s in spans]}") instrumentor.uninstrument() - print("\nAll assertions passed — bug is fixed.") + print("\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed.") if __name__ == "__main__": From 57328cec86073126dc9c95cd53baab6c8152d31b Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 12:38:31 -0700 Subject: [PATCH 08/10] chore(openai-v2): move reproducer script to examples/scripts/ Co-authored-by: Cursor --- .../{ => examples/scripts}/reproduce_raw_response_streaming.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename instrumentation-genai/opentelemetry-instrumentation-openai-v2/{ => examples/scripts}/reproduce_raw_response_streaming.py (100%) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py similarity index 100% rename from instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py rename to instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py From a2fe60e7c700c4a05094f9d25bcaba553528b306 Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 12:39:01 -0700 Subject: [PATCH 09/10] fix(openai-v2): remove unused time import from reproducer Co-authored-by: Cursor --- .../scripts/reproduce_raw_response_streaming.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py index 123d7ff9..52839fc3 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py @@ -24,7 +24,6 @@ """ import asyncio -import time from unittest.mock import AsyncMock, MagicMock, patch import httpx @@ -84,16 +83,19 @@ async def aiter_bytes(_chunk_size=None): # Verbatim copy of LiteLLM's make_azure_openai_chat_completion_request # (litellm/llms/azure/azure.py lines 154-179) # --------------------------------------------------------------------------- -async def make_azure_openai_chat_completion_request(azure_client, data, timeout): +async def make_azure_openai_chat_completion_request( + azure_client, data, timeout +): """ Helper to: - call chat.completions.create.with_raw_response when litellm.return_response_headers is True - call chat.completions.create by default """ - start_time = time.time() try: - raw_response = await azure_client.chat.completions.with_raw_response.create( - **data, timeout=timeout + raw_response = ( + await azure_client.chat.completions.with_raw_response.create( + **data, timeout=timeout + ) ) headers = dict(raw_response.headers) @@ -157,7 +159,9 @@ async def reproducer(): print(f"✓ OTel spans: {[s.name for s in spans]}") instrumentor.uninstrument() - print("\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed.") + print( + "\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed." + ) if __name__ == "__main__": From 8f324d4bc470979307e99213143b61772108779e Mon Sep 17 00:00:00 2001 From: adityamehra Date: Tue, 19 May 2026 13:29:37 -0700 Subject: [PATCH 10/10] feat(suppress): expose SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION as env var MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR to util-genai attributes as the environment variable counterpart of the existing suppress_language_model_instrumentation OTel context key. The openai-v2 instrumentor now checks both surfaces via a single _is_instrumentation_suppressed() helper: 1. OTel context key (existing) — set per-request by the LangChain instrumentor to prevent duplicate LLM spans. 2. New env var — set globally for zero-code deployments together with OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai. One concept, two surfaces. No new flags introduced in the LangChain package. Co-authored-by: Cursor --- .../CHANGELOG.md | 21 +++ .../instrumentation/openai_v2/patch.py | 28 +++- ...ion_not_suppressed_when_env_var_false.yaml | 134 ++++++++++++++++++ ...hat_completion_suppressed_via_env_var.yaml | 134 ++++++++++++++++++ .../tests/test_suppression.py | 93 +++++++++++- util/opentelemetry-util-genai/CHANGELOG.md | 6 + .../opentelemetry/util/genai/attributes.py | 6 +- 7 files changed, 415 insertions(+), 7 deletions(-) create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md index 433736f9..6eb91dc9 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md @@ -9,6 +9,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ### Fixed +- Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when + using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider). + `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping + in `StreamWrapper`, discarding the raw HTTP headers. `StreamWrapper` now captures + headers from the `LegacyAPIResponse` before it is parsed and exposes them directly, + and adds a `parse()` method returning `self` so callers can treat the wrapper as + a drop-in for the raw response. Also adds `__getattr__` to proxy any other unknown + attributes to the underlying stream. Inspired by upstream fix + ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184), + fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)). + +### Added +- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — Global alternative to + the per-request `suppress_language_model_instrumentation` OTel context key + (the env var is the uppercase form of the same string). When set to a truthy + value (`true`, `1`, `yes`, `on`), the openai-v2 instrumentor skips creating + spans entirely. Intended for zero-code deployments alongside + `OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai`. + +### Fixed + - Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider). `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py index 48b38951..e2cf8b2e 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py @@ -15,6 +15,7 @@ import asyncio import inspect +import os import timeit from typing import Any, Iterable, Optional @@ -59,6 +60,25 @@ ) +def _is_instrumentation_suppressed() -> bool: + """Return True when OpenAI spans should be skipped. + + Checks two surfaces for the suppression signal: + 1. OTel context key — set per-request by the LangChain instrumentor's + ``_OpenAITracingWrapper`` to prevent duplicate LLM spans when both + instrumentors are active simultaneously. + 2. Environment variable ``SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION`` — + set globally (e.g. in zero-code deployments) together with + ``OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai``. + """ + if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + return True + raw = os.environ.get( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper(), "" + ) + return raw.strip().lower() in ("true", "1", "yes", "on") + + def _normalize_stop_sequences(stop_values: Any) -> list[str]: if stop_values is None: return [] @@ -394,7 +414,7 @@ def chat_completions_create(capture_content: bool, handler): def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return wrapped(*args, **kwargs) span_attributes = {**get_llm_request_attributes(kwargs, instance)} @@ -449,7 +469,7 @@ def async_chat_completions_create(capture_content: bool, handler): async def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return await wrapped(*args, **kwargs) span_attributes = {**get_llm_request_attributes(kwargs, instance)} @@ -504,7 +524,7 @@ def embeddings_create(capture_content: bool, handler): def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return wrapped(*args, **kwargs) span_attributes = get_llm_request_attributes( @@ -553,7 +573,7 @@ def async_embeddings_create(capture_content: bool, handler): async def traced_method(wrapped, instance, args, kwargs): # Check if instrumentation is suppressed (e.g., by LangChain) - if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY): + if _is_instrumentation_suppressed(): return await wrapped(*args, **kwargs) span_attributes = get_llm_request_attributes( diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml new file mode 100644 index 00000000..2abb443f --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml @@ -0,0 +1,134 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.54.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.54.3 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q", + "object": "chat.completion", + "created": 1731368630, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 12, + "completion_tokens": 5, + "total_tokens": 17, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e122593ff368bc8-SIN + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 11 Nov 2024 23:43:50 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '765' + openai-organization: test_openai_org_id + openai-processing-ms: + - '287' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199977' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_58cff97afd0e7c0bba910ccf0b044a6f + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml new file mode 100644 index 00000000..2abb443f --- /dev/null +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml @@ -0,0 +1,134 @@ +interactions: +- request: + body: |- + { + "messages": [ + { + "role": "user", + "content": "Say this is a test" + } + ], + "model": "gpt-4o-mini", + "stream": false + } + headers: + accept: + - application/json + accept-encoding: + - gzip, deflate + authorization: + - Bearer test_openai_api_key + connection: + - keep-alive + content-length: + - '106' + content-type: + - application/json + host: + - api.openai.com + user-agent: + - OpenAI/Python 1.54.3 + x-stainless-arch: + - arm64 + x-stainless-async: + - 'false' + x-stainless-lang: + - python + x-stainless-os: + - MacOS + x-stainless-package-version: + - 1.54.3 + x-stainless-retry-count: + - '0' + x-stainless-runtime: + - CPython + x-stainless-runtime-version: + - 3.12.6 + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: |- + { + "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q", + "object": "chat.completion", + "created": 1731368630, + "model": "gpt-4o-mini-2024-07-18", + "choices": [ + { + "index": 0, + "message": { + "role": "assistant", + "content": "This is a test.", + "refusal": null + }, + "logprobs": null, + "finish_reason": "stop" + } + ], + "usage": { + "prompt_tokens": 12, + "completion_tokens": 5, + "total_tokens": 17, + "prompt_tokens_details": { + "cached_tokens": 0, + "audio_tokens": 0 + }, + "completion_tokens_details": { + "reasoning_tokens": 0, + "audio_tokens": 0, + "accepted_prediction_tokens": 0, + "rejected_prediction_tokens": 0 + } + }, + "system_fingerprint": "fp_0ba0d124f1" + } + headers: + CF-Cache-Status: + - DYNAMIC + CF-RAY: + - 8e122593ff368bc8-SIN + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 11 Nov 2024 23:43:50 GMT + Server: + - cloudflare + Set-Cookie: test_set_cookie + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '765' + openai-organization: test_openai_org_id + openai-processing-ms: + - '287' + openai-version: + - '2020-10-01' + strict-transport-security: + - max-age=31536000; includeSubDomains; preload + x-ratelimit-limit-requests: + - '10000' + x-ratelimit-limit-tokens: + - '200000' + x-ratelimit-remaining-requests: + - '9999' + x-ratelimit-remaining-tokens: + - '199977' + x-ratelimit-reset-requests: + - 8.64s + x-ratelimit-reset-tokens: + - 6ms + x-request-id: + - req_58cff97afd0e7c0bba910ccf0b044a6f + status: + code: 200 + message: OK +version: 1 diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py index e57001bc..221dbd17 100644 --- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py +++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py @@ -13,8 +13,14 @@ # limitations under the License. """ -Tests for SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY functionality. -This prevents duplicate telemetry when multiple instrumentations (e.g., LangChain + OpenAI) are active. +Tests for suppression of OpenAI instrumentation. + +Covers two suppression surfaces: +1. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY context key — set per-request + by the LangChain instrumentor to prevent duplicate LLM spans when both + instrumentors are active simultaneously. +2. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION environment variable — set globally + for zero-code deployments alongside OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai. """ import pytest @@ -24,6 +30,11 @@ SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY, ) +# The env var name is the uppercase form of the context key string. +SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR = ( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper() +) + @pytest.mark.vcr() def test_chat_completion_suppressed( @@ -144,3 +155,81 @@ def test_chat_completion_not_suppressed_by_default( # Should have at least the main chat completion span chat_spans = [s for s in spans if "chat" in s.name.lower()] assert len(chat_spans) > 0 + + +# --------------------------------------------------------------------------- +# Environment variable suppression +# --------------------------------------------------------------------------- + + +@pytest.mark.vcr() +def test_chat_completion_suppressed_via_env_var( + monkeypatch, span_exporter, openai_client, instrument_with_content +): + """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=true suppresses spans globally.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "true") + + response = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + assert response is not None + + spans = span_exporter.get_finished_spans() + assert len(spans) == 0 + + +@pytest.mark.vcr() +def test_chat_completion_not_suppressed_when_env_var_false( + monkeypatch, span_exporter, openai_client, instrument_with_content +): + """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=false leaves instrumentation active.""" + monkeypatch.setenv( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "false" + ) + + response = openai_client.chat.completions.create( + model="gpt-4o-mini", + messages=[{"role": "user", "content": "Say this is a test"}], + ) + assert response is not None + + spans = span_exporter.get_finished_spans() + assert len(spans) > 0 + + +@pytest.mark.parametrize("value", ["true", "1", "yes", "on", "TRUE"]) +def test_env_var_truthy_values_suppress(monkeypatch, value): + """All truthy spellings of the env var are recognised.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is True + + +@pytest.mark.parametrize("value", ["false", "0", "no", "off", "FALSE", ""]) +def test_env_var_falsey_values_do_not_suppress(monkeypatch, value): + """Falsey env var spellings leave instrumentation active.""" + monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is False + + +def test_env_var_unset_does_not_suppress(monkeypatch): + """When the env var is absent, instrumentation is active by default.""" + monkeypatch.delenv( + SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, raising=False + ) + + from opentelemetry.instrumentation.openai_v2.patch import ( + _is_instrumentation_suppressed, + ) + + assert _is_instrumentation_suppressed() is False diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md index 6b6571fd..d3cecb5a 100644 --- a/util/opentelemetry-util-genai/CHANGELOG.md +++ b/util/opentelemetry-util-genai/CHANGELOG.md @@ -6,6 +6,12 @@ All notable changes to this repository are documented in this file. ### Added +- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — The uppercase form of + `SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY` is now also checked as an + environment variable by the openai-v2 instrumentor, allowing zero-code + deployments to set suppression globally without the LangChain instrumentor + injecting the context key per-request. No new constant added. + - **`OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT` env var** — Explicit override for content event emission (`true`/`false`). When unset, defaults are derived from `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` mode. ### Changed diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py index 375b7b7a..03e71e9b 100644 --- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py +++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py @@ -95,7 +95,11 @@ GEN_AI_SECURITY_EVENT_ID = "gen_ai.security.event_id" # Context key for suppressing instrumentation to avoid duplicate telemetry -# when multiple instrumentations (e.g., LangChain + OpenAI) are active +# when multiple instrumentations (e.g., LangChain + OpenAI) are active. +# The uppercase form of this string is also checked as an environment variable +# (SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION) so zero-code deployments can set +# suppression globally without the LangChain instrumentor injecting it +# per-request. Use together with OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY = ( "suppress_language_model_instrumentation" )