From c092f2029256a3b72000d27de24c4e58caf8328f Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Wed, 13 May 2026 16:34:15 -0700
Subject: [PATCH 01/10] feat(langchain): Release version 0.1.14

---
 RELEASING.md                                                | 6 +++---
 .../opentelemetry-instrumentation-langchain/CHANGELOG.md    | 4 ++++
 .../src/opentelemetry/instrumentation/langchain/version.py  | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/RELEASING.md b/RELEASING.md
index cc76acdd..1dbceac6 100644
--- a/RELEASING.md
+++ b/RELEASING.md
@@ -10,19 +10,19 @@ How to release a new version of the GenAI packages:
 
 | Package Name | Path | Current Version | Tag Format                          |
 |-------------|------|-----------------|-------------------------------------|
-| splunk-otel-util-genai | `util/opentelemetry-util-genai` | 0.1.11           | `util-genai-v0.Y.Z`                 |
+| splunk-otel-util-genai | `util/opentelemetry-util-genai` | 0.1.11          | `util-genai-v0.Y.Z`                 |
 | splunk-otel-util-genai-evals | `util/opentelemetry-util-genai-evals` | 0.1.8           | `util-genai-evals-v0.Y.Z`           |
 | splunk-otel-genai-emitters-splunk | `util/opentelemetry-util-genai-emitters-splunk` | 0.1.8           | `genai-emitters-splunk-v0.Y.Z`      |
 | splunk-otel-genai-evals-deepeval | `util/opentelemetry-util-genai-evals-deepeval` | 0.1.14          | `genai-evals-deepeval-v0.Y.Z`       |
 | splunk-otel-util-genai-translator-traceloop | `util/opentelemetry-util-genai-traceloop-translator` | 0.1.8           | `genai-translator-traceloop-v0.Y.Z` |
 | splunk-otel-util-genai-translator-langsmith | `util/opentelemetry-util-genai-langsmith-translator` | 0.1.1           | `genai-translator-langsmith-v0.Y.Z` |
 | splunk-otel-util-genai-translator-openlit | `util/opentelemetry-util-genai-openlit-translator` | 0.1.2           | `genai-translator-openlit-v0.Y.Z` |
-| splunk-otel-instrumentation-langchain | `instrumentation-genai/opentelemetry-instrumentation-langchain` | 0.1.9           | `instrumentation-langchain-v0.Y.Z`  |
+| splunk-otel-instrumentation-langchain | `instrumentation-genai/opentelemetry-instrumentation-langchain` | 0.1.14          | `instrumentation-langchain-v0.Y.Z`  |
 | splunk-otel-instrumentation-llamaindex | `instrumentation-genai/opentelemetry-instrumentation-llamaindex` | 0.1.1           | `instrumentation-llamaindex-v0.Y.Z`  |
 | splunk-otel-instrumentation-aidefense | `instrumentation-genai/opentelemetry-instrumentation-aidefense` | 0.2.1           | `instrumentation-aidefense-v0.Y.Z`  |
 | splunk-otel-instrumentation-weaviate | `instrumentation-genai/opentelemetry-instrumentation-weaviate` | 0.1.0           | `instrumentation-weaviate-v0.Y.Z`  |
 | splunk-otel-instrumentation-crewai | `instrumentation-genai/opentelemetry-instrumentation-crewai` | 0.1.3           | `instrumentation-crewai-v0.Y.Z`  |
-| splunk-otel-instrumentation-openai-agents | `instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2` | 0.1.3          | `instrumentation-openai-agents-v0.Y.Z`  |
+| splunk-otel-instrumentation-openai-agents | `instrumentation-genai/opentelemetry-instrumentation-openai-agents-v2` | 0.1.3           | `instrumentation-openai-agents-v0.Y.Z`  |
 | splunk-otel-instrumentation-fastmcp | `instrumentation-genai/opentelemetry-instrumentation-fastmcp` | 0.1.1           | `instrumentation-fastmcp-v0.Y.Z`  |
 
 ### Release Steps
diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md
index 30f8e18b..0a18648c 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 All notable changes to this repository are documented in this file.
 
+## Version 0.1.14
+
+### Bump version for release
+
 ## Version 0.1.13
 
 ### Fixed
diff --git a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py
index 3cb7d95e..fb69db9c 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-langchain/src/opentelemetry/instrumentation/langchain/version.py
@@ -1 +1 @@
-__version__ = "0.1.13"
+__version__ = "0.1.14"

From 6b2da1914b02829870d7cb0b657d4716735d4751 Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Mon, 18 May 2026 09:10:57 -0700
Subject: [PATCH 02/10] fix(openai-v2): add __getattr__ to StreamWrapper to
 proxy unknown attributes

StreamWrapper was missing __getattr__, causing AttributeError when
LiteLLM (and other clients) access raw_response.headers after calling
with_raw_response.create(stream=True).

Ports the fix from upstream opentelemetry-python-contrib PR #4184
(fixes issue #4113).

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../instrumentation/openai_v2/patch.py        |  3 +++
 .../tests/test_patch_unit.py                  | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
index b3a04dc5..5c9e2eec 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -768,6 +768,9 @@ def __init__(
         self._first_chunk_processed = False
         self.setup()
 
+    def __getattr__(self, name: str):
+        return getattr(self.stream, name)
+
     def setup(self):
         if not self._span_started:
             self._span_started = True
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
index 8e319357..49239b71 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
@@ -202,6 +202,25 @@ def test_time_to_first_chunk_only_captured_once(self):
             "time_to_first_chunk should not change after first chunk"
         )
 
+    def test_getattr_proxies_unknown_attributes_to_stream(self):
+        """Test that unknown attributes (e.g. .headers) are proxied to the
+        underlying stream, fixing the AttributeError raised by LiteLLM when
+        accessing raw_response.headers after with_raw_response streaming.
+        Regression test for https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113
+        """
+        invocation = LLMInvocation(request_model="gpt-4o")
+        mock_stream = MagicMock()
+        mock_stream.headers = {"content-type": "application/json"}
+        mock_handler = MagicMock()
+
+        wrapper = StreamWrapper(
+            stream=mock_stream,
+            invocation=invocation,
+            handler=mock_handler,
+        )
+
+        assert wrapper.headers == {"content-type": "application/json"}
+
     def test_time_to_first_chunk_not_captured_without_start_time(self):
         """Test that time_to_first_chunk is not captured without _start_time."""
         invocation = LLMInvocation(request_model="gpt-4o")

From ec35675e7b5f75d40f3e2549325298d97de2148b Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Mon, 18 May 2026 11:04:05 -0700
Subject: [PATCH 03/10] docs(openai-v2): add changelog entry for StreamWrapper
 __getattr__ fix

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../opentelemetry-instrumentation-openai-v2/CHANGELOG.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
index 029f8ad5..5506a9f0 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -7,6 +7,15 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## Unreleased
 
+### Fixed
+
+- Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when
+  using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider).
+  `StreamWrapper` now proxies unknown attribute lookups to the underlying stream via
+  `__getattr__`. Ports upstream fix
+  ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184),
+  fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)).
+
 ### Added
 
 - Add `gen_ai.tool.definitions` attribute on LLM spans when

From 7173333e73a1665bf8ddfb3429674065a60febc4 Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Mon, 18 May 2026 22:14:55 -0700
Subject: [PATCH 04/10] fix(openai-v2): preserve LegacyAPIResponse headers on
 StreamWrapper for with_raw_response streaming

When LiteLLM calls with_raw_response.create(stream=True), the OpenAI SDK
returns a LegacyAPIResponse (with .headers). SDOT's _parse_response was
calling .parse() on it to get the AsyncStream before wrapping in StreamWrapper,
discarding the headers. LiteLLM then accesses raw_response.headers, which
failed with AttributeError since AsyncStream has no .headers attribute.

Fix:
- Capture LegacyAPIResponse.headers before _parse_response discards it
- Store captured headers as StreamWrapper.headers (direct attribute, not proxied)
- Add parse() returning self so callers can treat StreamWrapper as a raw response
- Keep __getattr__ for any other unknown attribute proxying to the underlying stream

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../CHANGELOG.md                              |  8 +++-
 .../instrumentation/openai_v2/patch.py        | 12 ++++++
 .../tests/test_patch_unit.py                  | 37 ++++++++++++++++---
 3 files changed, 50 insertions(+), 7 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
index 5506a9f0..f24877e3 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -11,8 +11,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 - Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when
   using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider).
-  `StreamWrapper` now proxies unknown attribute lookups to the underlying stream via
-  `__getattr__`. Ports upstream fix
+  `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping
+  in `StreamWrapper`, discarding the raw HTTP headers. `StreamWrapper` now captures
+  headers from the `LegacyAPIResponse` before it is parsed and exposes them directly,
+  and adds a `parse()` method returning `self` so callers can treat the wrapper as
+  a drop-in for the raw response. Also adds `__getattr__` to proxy any other unknown
+  attributes to the underlying stream. Inspired by upstream fix
   ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184),
   fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)).
 
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
index 5c9e2eec..48b38951 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -418,12 +418,14 @@ def traced_method(wrapped, instance, args, kwargs):
             raise
 
         try:
+            raw_headers = getattr(result, "headers", None)
             parsed_result = _parse_response(result)
             if is_streaming(kwargs):
                 return StreamWrapper(
                     parsed_result,
                     invocation,
                     handler,
+                    raw_headers=raw_headers,
                 )
 
             if span and span.is_recording():
@@ -471,12 +473,14 @@ async def traced_method(wrapped, instance, args, kwargs):
             raise
 
         try:
+            raw_headers = getattr(result, "headers", None)
             parsed_result = _parse_response(result)
             if is_streaming(kwargs):
                 return StreamWrapper(
                     parsed_result,
                     invocation,
                     handler,
+                    raw_headers=raw_headers,
                 )
 
             if span and span.is_recording():
@@ -755,11 +759,13 @@ def __init__(
         stream: Stream,
         invocation: LLMInvocation,
         handler,
+        raw_headers=None,
     ):
         self.stream = stream
         self.invocation = invocation
         self.span = getattr(invocation, "span", None)
         self.handler = handler
+        self.headers = raw_headers
         self.choice_buffers = []
         self.finish_reasons = []  # Instance-level to avoid cross-request contamination
         self._span_started = False
@@ -771,6 +777,12 @@ def __init__(
     def __getattr__(self, name: str):
         return getattr(self.stream, name)
 
+    def parse(self):
+        """Proxy for with_raw_response callers (e.g. LiteLLM) that call
+        .parse() on the result of with_raw_response.create(stream=True).
+        Returns self so the caller can iterate the stream directly."""
+        return self
+
     def setup(self):
         if not self._span_started:
             self._span_started = True
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
index 49239b71..146217ae 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
@@ -202,15 +202,42 @@ def test_time_to_first_chunk_only_captured_once(self):
             "time_to_first_chunk should not change after first chunk"
         )
 
+    def test_raw_response_headers_stored_from_legacy_api_response(self):
+        """Test the real production scenario: LiteLLM calls with_raw_response.create(stream=True),
+        which makes the OpenAI SDK return a LegacyAPIResponse. SDOT's _parse_response calls
+        .parse() on it to get the AsyncStream — discarding .headers. StreamWrapper must
+        preserve the headers captured before _parse_response so LiteLLM can access them.
+
+        Regression test for the AttributeError seen in production:
+          File "litellm/llms/azure/azure.py", line 176
+            headers = dict(raw_response.headers)
+          AttributeError: 'StreamWrapper' object has no attribute 'headers'
+        """
+        invocation = LLMInvocation(request_model="gpt-4o")
+        mock_stream = MagicMock(spec=[])  # AsyncStream has no .headers
+        mock_handler = MagicMock()
+        raw_headers = {"content-type": "text/event-stream", "x-request-id": "abc123"}
+
+        wrapper = StreamWrapper(
+            stream=mock_stream,
+            invocation=invocation,
+            handler=mock_handler,
+            raw_headers=raw_headers,
+        )
+
+        # LiteLLM: headers = dict(raw_response.headers)
+        assert wrapper.headers == raw_headers
+        # LiteLLM: response = raw_response.parse()
+        assert wrapper.parse() is wrapper
+
     def test_getattr_proxies_unknown_attributes_to_stream(self):
-        """Test that unknown attributes (e.g. .headers) are proxied to the
-        underlying stream, fixing the AttributeError raised by LiteLLM when
-        accessing raw_response.headers after with_raw_response streaming.
+        """Test that unknown attributes on StreamWrapper are proxied to the
+        underlying stream object.
         Regression test for https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113
         """
         invocation = LLMInvocation(request_model="gpt-4o")
         mock_stream = MagicMock()
-        mock_stream.headers = {"content-type": "application/json"}
+        mock_stream.some_custom_attr = "value"
         mock_handler = MagicMock()
 
         wrapper = StreamWrapper(
@@ -219,7 +246,7 @@ def test_getattr_proxies_unknown_attributes_to_stream(self):
             handler=mock_handler,
         )
 
-        assert wrapper.headers == {"content-type": "application/json"}
+        assert wrapper.some_custom_attr == "value"
 
     def test_time_to_first_chunk_not_captured_without_start_time(self):
         """Test that time_to_first_chunk is not captured without _start_time."""

From 044cf8b9487b85db2a11aaf543e63daf0651ab8b Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 11:34:22 -0700
Subject: [PATCH 05/10] style: apply ruff formatting to test_patch_unit.py

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../tests/test_patch_unit.py                                 | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
index 146217ae..6745238b 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_patch_unit.py
@@ -216,7 +216,10 @@ def test_raw_response_headers_stored_from_legacy_api_response(self):
         invocation = LLMInvocation(request_model="gpt-4o")
         mock_stream = MagicMock(spec=[])  # AsyncStream has no .headers
         mock_handler = MagicMock()
-        raw_headers = {"content-type": "text/event-stream", "x-request-id": "abc123"}
+        raw_headers = {
+            "content-type": "text/event-stream",
+            "x-request-id": "abc123",
+        }
 
         wrapper = StreamWrapper(
             stream=mock_stream,

From 2706482c318ace0872dbd1b2abfc84a6db807766 Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 12:20:55 -0700
Subject: [PATCH 06/10] test(openai-v2): add standalone reproducer for
 with_raw_response streaming headers bug

Reproduces the production error reported in lab0:
  AttributeError: 'StreamWrapper' object has no attribute 'headers'

Mirrors the pattern from upstream issues:
  #4032 - StreamWrapper missing .parse()
  #4113 - StreamWrapper missing .headers

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../reproduce_raw_response_streaming.py       | 149 ++++++++++++++++++
 1 file changed, 149 insertions(+)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
new file mode 100644
index 00000000..9bf44fd8
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
@@ -0,0 +1,149 @@
+"""
+Reproducer for: StreamWrapper missing .headers when LiteLLM calls
+with_raw_response.create(stream=True).
+
+Production error (lab0, 2026-05-15):
+  File "litellm/llms/azure/azure.py", line 176
+    headers = dict(raw_response.headers)
+  AttributeError: 'StreamWrapper' object has no attribute 'headers'
+
+LiteLLM's Azure provider always calls:
+  raw_response = await azure_client.chat.completions.with_raw_response.create(...)
+  headers = dict(raw_response.headers)   # <-- fails when SDOT is active
+  response = raw_response.parse()        # <-- also fails without parse()
+
+Related upstream issues:
+  #4032 - StreamWrapper missing .parse()  (fixed)
+  #4113 - StreamWrapper missing .headers  (fixed upstream, not yet in SDOT)
+
+Run:
+  pip install openai opentelemetry-sdk splunk-otel-instrumentation-openai
+  python reproduce_raw_response_streaming.py
+"""
+
+import asyncio
+import json
+from unittest.mock import AsyncMock, MagicMock, patch
+
+import httpx
+from openai import AsyncAzureOpenAI
+
+from opentelemetry.instrumentation.openai_v2 import OpenAIInstrumentor
+from opentelemetry.sdk.trace import TracerProvider
+from opentelemetry.sdk.trace.export import SimpleSpanProcessor
+from opentelemetry.sdk.trace.export.in_memory_span_exporter import (
+    InMemorySpanExporter,
+)
+
+# ---------------------------------------------------------------------------
+# Minimal SSE streaming response that mimics Azure OpenAI
+# ---------------------------------------------------------------------------
+SSE_CHUNKS = [
+    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n',
+    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n',
+    b"data: [DONE]\n\n",
+]
+
+
+def _make_mock_httpx_response() -> httpx.Response:
+    """Return a minimal mock httpx.Response with headers and a streaming body.
+
+    The request headers must include X-Stainless-Raw-Response: true so the
+    OpenAI SDK returns LegacyAPIResponse (sync .parse()) instead of
+    AsyncAPIResponse (async .parse()). SDOT's _parse_response calls .parse()
+    synchronously, so it must be LegacyAPIResponse.
+    """
+    response_headers = {
+        "content-type": "text/event-stream",
+        "x-request-id": "test-request-id-abc123",
+        "openai-model": "gpt-4o",
+    }
+    # RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response" — must be "true" so the
+    # OpenAI SDK wraps the response in LegacyAPIResponse (sync .parse()).
+    request_headers = httpx.Headers({"X-Stainless-Raw-Response": "true"})
+
+    async def aiter_bytes(_chunk_size=None):
+        for chunk in SSE_CHUNKS:
+            yield chunk
+
+    mock_request = MagicMock(spec=httpx.Request)
+    mock_request.headers = request_headers
+
+    mock_response = MagicMock(spec=httpx.Response)
+    mock_response.status_code = 200
+    mock_response.headers = httpx.Headers(response_headers)
+    mock_response.aiter_bytes = aiter_bytes
+    mock_response.aclose = AsyncMock()
+    mock_response.request = mock_request
+    mock_response.http_version = "HTTP/1.1"
+    mock_response.elapsed = MagicMock()
+    return mock_response
+
+
+# ---------------------------------------------------------------------------
+# Reproducer
+# ---------------------------------------------------------------------------
+async def reproducer():
+    # Set up OTel
+    exporter = InMemorySpanExporter()
+    provider = TracerProvider()
+    provider.add_span_processor(SimpleSpanProcessor(exporter))
+
+    instrumentor = OpenAIInstrumentor()
+    instrumentor.instrument(tracer_provider=provider)
+
+    client = AsyncAzureOpenAI(
+        api_key="test-key",
+        azure_endpoint="https://test.openai.azure.com",
+        api_version="2024-02-15-preview",
+    )
+
+    mock_httpx_response = _make_mock_httpx_response()
+
+    # Patch the underlying httpx send so no real network call is made
+    with patch.object(
+        client._client,
+        "send",
+        new_callable=AsyncMock,
+        return_value=mock_httpx_response,
+    ):
+        # This is exactly what LiteLLM's Azure provider does:
+        #   https://github.com/BerriAI/litellm/blob/main/litellm/llms/azure/azure.py#L167-L176
+        raw_response = await client.chat.completions.with_raw_response.create(
+            model="gpt-4o",
+            messages=[{"role": "user", "content": "Say hello"}],
+            max_tokens=10,
+            stream=True,
+        )
+
+        print(f"raw_response type: {type(raw_response).__name__}")
+
+        # Step 1: LiteLLM accesses .headers  — this is the line that crashed
+        headers = dict(raw_response.headers)
+        print(f"✓ raw_response.headers: {json.dumps(headers, indent=2)}")
+
+        # Step 2: LiteLLM calls .parse() to get the stream
+        response = raw_response.parse()
+        print(f"✓ raw_response.parse() returned: {type(response).__name__}")
+
+        # Step 3: iterate the stream
+        collected = []
+        async for chunk in response:
+            for choice in chunk.choices:
+                if choice.delta.content:
+                    collected.append(choice.delta.content)
+
+        text = "".join(collected)
+        print(f"✓ streamed content: {text!r}")
+
+    spans = exporter.get_finished_spans()
+    print(f"✓ OTel spans recorded: {len(spans)}")
+    for span in spans:
+        print(f"  - {span.name}")
+
+    instrumentor.uninstrument()
+    print("\nAll assertions passed — bug is fixed.")
+
+
+if __name__ == "__main__":
+    asyncio.run(reproducer())

From 2df57a324daf6c7be475fba6793fca8d5d291718 Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 12:21:52 -0700
Subject: [PATCH 07/10] test(openai-v2): use
 make_azure_openai_chat_completion_request verbatim in reproducer

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../reproduce_raw_response_streaming.py       | 125 ++++++++++--------
 1 file changed, 70 insertions(+), 55 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
index 9bf44fd8..123d7ff9 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
@@ -1,28 +1,30 @@
 """
 Reproducer for: StreamWrapper missing .headers when LiteLLM calls
-with_raw_response.create(stream=True).
+with_raw_response.create(stream=True) on an SDOT-instrumented Azure OpenAI client.
 
-Production error (lab0, 2026-05-15):
-  File "litellm/llms/azure/azure.py", line 176
+Production traceback (lab0, 2026-05-15):
+  File "litellm/llms/azure/azure.py", line 619, in async_streaming
+    headers, response = await self.make_azure_openai_chat_completion_request(...)
+  File "litellm/llms/azure/azure.py", line 176, in make_azure_openai_chat_completion_request
     headers = dict(raw_response.headers)
   AttributeError: 'StreamWrapper' object has no attribute 'headers'
 
-LiteLLM's Azure provider always calls:
-  raw_response = await azure_client.chat.completions.with_raw_response.create(...)
-  headers = dict(raw_response.headers)   # <-- fails when SDOT is active
-  response = raw_response.parse()        # <-- also fails without parse()
+This reproducer calls make_azure_openai_chat_completion_request verbatim to
+confirm the fix in SDOT's StreamWrapper resolves the crash.
 
 Related upstream issues:
-  #4032 - StreamWrapper missing .parse()  (fixed)
-  #4113 - StreamWrapper missing .headers  (fixed upstream, not yet in SDOT)
+  #4032 - StreamWrapper missing .parse()  (fixed upstream)
+  #4113 - StreamWrapper missing .headers  (fixed upstream via __getattr__,
+           but SDOT needed a deeper fix: preserve LegacyAPIResponse.headers
+           before _parse_response discards it)
 
 Run:
-  pip install openai opentelemetry-sdk splunk-otel-instrumentation-openai
+  pip install openai litellm opentelemetry-sdk splunk-otel-instrumentation-openai
   python reproduce_raw_response_streaming.py
 """
 
 import asyncio
-import json
+import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import httpx
@@ -36,38 +38,36 @@
 )
 
 # ---------------------------------------------------------------------------
-# Minimal SSE streaming response that mimics Azure OpenAI
+# Minimal SSE streaming chunks that mimic Azure OpenAI
 # ---------------------------------------------------------------------------
 SSE_CHUNKS = [
-    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n',
-    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n',
+    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","model":"gpt-4o","choices":[{"index":0,"delta":{"role":"assistant","content":"Hello"},"finish_reason":null}]}\n\n',
+    b'data: {"id":"chatcmpl-123","object":"chat.completion.chunk","model":"gpt-4o","choices":[{"index":0,"delta":{"content":"!"},"finish_reason":"stop"}]}\n\n',
     b"data: [DONE]\n\n",
 ]
 
 
 def _make_mock_httpx_response() -> httpx.Response:
-    """Return a minimal mock httpx.Response with headers and a streaming body.
+    """Build a fake httpx.Response that the OpenAI SDK treats as a raw streaming response.
 
-    The request headers must include X-Stainless-Raw-Response: true so the
-    OpenAI SDK returns LegacyAPIResponse (sync .parse()) instead of
-    AsyncAPIResponse (async .parse()). SDOT's _parse_response calls .parse()
-    synchronously, so it must be LegacyAPIResponse.
+    The request must carry X-Stainless-Raw-Response: true so the OpenAI SDK
+    returns LegacyAPIResponse (sync .parse()) rather than AsyncAPIResponse
+    (async .parse()). SDOT's _parse_response calls .parse() synchronously.
     """
     response_headers = {
         "content-type": "text/event-stream",
         "x-request-id": "test-request-id-abc123",
         "openai-model": "gpt-4o",
+        "ms-azureml-model-session": "d0",
     }
-    # RAW_RESPONSE_HEADER = "X-Stainless-Raw-Response" — must be "true" so the
-    # OpenAI SDK wraps the response in LegacyAPIResponse (sync .parse()).
-    request_headers = httpx.Headers({"X-Stainless-Raw-Response": "true"})
 
     async def aiter_bytes(_chunk_size=None):
         for chunk in SSE_CHUNKS:
             yield chunk
 
     mock_request = MagicMock(spec=httpx.Request)
-    mock_request.headers = request_headers
+    # RAW_RESPONSE_HEADER value that async_to_raw_response_wrapper injects
+    mock_request.headers = httpx.Headers({"X-Stainless-Raw-Response": "true"})
 
     mock_response = MagicMock(spec=httpx.Response)
     mock_response.status_code = 200
@@ -80,11 +80,33 @@ async def aiter_bytes(_chunk_size=None):
     return mock_response
 
 
+# ---------------------------------------------------------------------------
+# Verbatim copy of LiteLLM's make_azure_openai_chat_completion_request
+# (litellm/llms/azure/azure.py lines 154-179)
+# ---------------------------------------------------------------------------
+async def make_azure_openai_chat_completion_request(azure_client, data, timeout):
+    """
+    Helper to:
+    - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
+    - call chat.completions.create by default
+    """
+    start_time = time.time()
+    try:
+        raw_response = await azure_client.chat.completions.with_raw_response.create(
+            **data, timeout=timeout
+        )
+
+        headers = dict(raw_response.headers)
+        response = raw_response.parse()
+        return headers, response
+    except Exception as e:
+        raise e
+
+
 # ---------------------------------------------------------------------------
 # Reproducer
 # ---------------------------------------------------------------------------
 async def reproducer():
-    # Set up OTel
     exporter = InMemorySpanExporter()
     provider = TracerProvider()
     provider.add_span_processor(SimpleSpanProcessor(exporter))
@@ -92,57 +114,50 @@ async def reproducer():
     instrumentor = OpenAIInstrumentor()
     instrumentor.instrument(tracer_provider=provider)
 
-    client = AsyncAzureOpenAI(
+    azure_client = AsyncAzureOpenAI(
         api_key="test-key",
         azure_endpoint="https://test.openai.azure.com",
         api_version="2024-02-15-preview",
     )
 
+    data = {
+        "model": "gpt-4o",
+        "messages": [{"role": "user", "content": "Say hello"}],
+        "max_tokens": 10,
+        "stream": True,
+    }
+
     mock_httpx_response = _make_mock_httpx_response()
 
-    # Patch the underlying httpx send so no real network call is made
     with patch.object(
-        client._client,
+        azure_client._client,
         "send",
         new_callable=AsyncMock,
         return_value=mock_httpx_response,
     ):
-        # This is exactly what LiteLLM's Azure provider does:
-        #   https://github.com/BerriAI/litellm/blob/main/litellm/llms/azure/azure.py#L167-L176
-        raw_response = await client.chat.completions.with_raw_response.create(
-            model="gpt-4o",
-            messages=[{"role": "user", "content": "Say hello"}],
-            max_tokens=10,
-            stream=True,
+        # This is the exact call that crashed in production
+        headers, response = await make_azure_openai_chat_completion_request(
+            azure_client=azure_client,
+            data=data,
+            timeout=60.0,
         )
 
-        print(f"raw_response type: {type(raw_response).__name__}")
-
-        # Step 1: LiteLLM accesses .headers  — this is the line that crashed
-        headers = dict(raw_response.headers)
-        print(f"✓ raw_response.headers: {json.dumps(headers, indent=2)}")
-
-        # Step 2: LiteLLM calls .parse() to get the stream
-        response = raw_response.parse()
-        print(f"✓ raw_response.parse() returned: {type(response).__name__}")
+    print(f"✓ headers accessible: {dict(headers)}")
+    print(f"✓ response type: {type(response).__name__}")
 
-        # Step 3: iterate the stream
-        collected = []
-        async for chunk in response:
-            for choice in chunk.choices:
-                if choice.delta.content:
-                    collected.append(choice.delta.content)
+    collected = []
+    async for chunk in response:
+        for choice in chunk.choices:
+            if choice.delta.content:
+                collected.append(choice.delta.content)
 
-        text = "".join(collected)
-        print(f"✓ streamed content: {text!r}")
+    print(f"✓ streamed content: {''.join(collected)!r}")
 
     spans = exporter.get_finished_spans()
-    print(f"✓ OTel spans recorded: {len(spans)}")
-    for span in spans:
-        print(f"  - {span.name}")
+    print(f"✓ OTel spans: {[s.name for s in spans]}")
 
     instrumentor.uninstrument()
-    print("\nAll assertions passed — bug is fixed.")
+    print("\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed.")
 
 
 if __name__ == "__main__":

From 57328cec86073126dc9c95cd53baab6c8152d31b Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 12:38:31 -0700
Subject: [PATCH 08/10] chore(openai-v2): move reproducer script to
 examples/scripts/

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../{ => examples/scripts}/reproduce_raw_response_streaming.py    | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename instrumentation-genai/opentelemetry-instrumentation-openai-v2/{ => examples/scripts}/reproduce_raw_response_streaming.py (100%)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py
similarity index 100%
rename from instrumentation-genai/opentelemetry-instrumentation-openai-v2/reproduce_raw_response_streaming.py
rename to instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py

From a2fe60e7c700c4a05094f9d25bcaba553528b306 Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 12:39:01 -0700
Subject: [PATCH 09/10] fix(openai-v2): remove unused time import from
 reproducer

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../scripts/reproduce_raw_response_streaming.py  | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py
index 123d7ff9..52839fc3 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/examples/scripts/reproduce_raw_response_streaming.py
@@ -24,7 +24,6 @@
 """
 
 import asyncio
-import time
 from unittest.mock import AsyncMock, MagicMock, patch
 
 import httpx
@@ -84,16 +83,19 @@ async def aiter_bytes(_chunk_size=None):
 # Verbatim copy of LiteLLM's make_azure_openai_chat_completion_request
 # (litellm/llms/azure/azure.py lines 154-179)
 # ---------------------------------------------------------------------------
-async def make_azure_openai_chat_completion_request(azure_client, data, timeout):
+async def make_azure_openai_chat_completion_request(
+    azure_client, data, timeout
+):
     """
     Helper to:
     - call chat.completions.create.with_raw_response when litellm.return_response_headers is True
     - call chat.completions.create by default
     """
-    start_time = time.time()
     try:
-        raw_response = await azure_client.chat.completions.with_raw_response.create(
-            **data, timeout=timeout
+        raw_response = (
+            await azure_client.chat.completions.with_raw_response.create(
+                **data, timeout=timeout
+            )
         )
 
         headers = dict(raw_response.headers)
@@ -157,7 +159,9 @@ async def reproducer():
     print(f"✓ OTel spans: {[s.name for s in spans]}")
 
     instrumentor.uninstrument()
-    print("\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed.")
+    print(
+        "\nReproducer passed — 'StreamWrapper' has no attribute 'headers' is fixed."
+    )
 
 
 if __name__ == "__main__":

From 8f324d4bc470979307e99213143b61772108779e Mon Sep 17 00:00:00 2001
From: adityamehra <mehraaditya71@gmail.com>
Date: Tue, 19 May 2026 13:29:37 -0700
Subject: [PATCH 10/10] feat(suppress): expose
 SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION as env var
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR to util-genai attributes
as the environment variable counterpart of the existing
suppress_language_model_instrumentation OTel context key.

The openai-v2 instrumentor now checks both surfaces via a single
_is_instrumentation_suppressed() helper:
  1. OTel context key (existing) — set per-request by the LangChain
     instrumentor to prevent duplicate LLM spans.
  2. New env var — set globally for zero-code deployments together with
     OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai.

One concept, two surfaces. No new flags introduced in the LangChain package.

Co-authored-by: Cursor <cursoragent@cursor.com>
---
 .../CHANGELOG.md                              |  21 +++
 .../instrumentation/openai_v2/patch.py        |  28 +++-
 ...ion_not_suppressed_when_env_var_false.yaml | 134 ++++++++++++++++++
 ...hat_completion_suppressed_via_env_var.yaml | 134 ++++++++++++++++++
 .../tests/test_suppression.py                 |  93 +++++++++++-
 util/opentelemetry-util-genai/CHANGELOG.md    |   6 +
 .../opentelemetry/util/genai/attributes.py    |   6 +-
 7 files changed, 415 insertions(+), 7 deletions(-)
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml
 create mode 100644 instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml

diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
index 433736f9..6eb91dc9 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/CHANGELOG.md
@@ -9,6 +9,27 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ### Fixed
 
+- Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when
+  using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider).
+  `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping
+  in `StreamWrapper`, discarding the raw HTTP headers. `StreamWrapper` now captures
+  headers from the `LegacyAPIResponse` before it is parsed and exposes them directly,
+  and adds a `parse()` method returning `self` so callers can treat the wrapper as
+  a drop-in for the raw response. Also adds `__getattr__` to proxy any other unknown
+  attributes to the underlying stream. Inspired by upstream fix
+  ([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184),
+  fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)).
+
+### Added
+- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — Global alternative to
+  the per-request `suppress_language_model_instrumentation` OTel context key
+  (the env var is the uppercase form of the same string). When set to a truthy
+  value (`true`, `1`, `yes`, `on`), the openai-v2 instrumentor skips creating
+  spans entirely. Intended for zero-code deployments alongside
+  `OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai`.
+
+### Fixed
+
 - Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when
   using `with_raw_response.create(stream=True)` (e.g. via LiteLLM's Azure provider).
   `_parse_response` was calling `.parse()` on the `LegacyAPIResponse` before wrapping
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
index 48b38951..e2cf8b2e 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/src/opentelemetry/instrumentation/openai_v2/patch.py
@@ -15,6 +15,7 @@
 
 import asyncio
 import inspect
+import os
 import timeit
 from typing import Any, Iterable, Optional
 
@@ -59,6 +60,25 @@
 )
 
 
+def _is_instrumentation_suppressed() -> bool:
+    """Return True when OpenAI spans should be skipped.
+
+    Checks two surfaces for the suppression signal:
+    1. OTel context key — set per-request by the LangChain instrumentor's
+       ``_OpenAITracingWrapper`` to prevent duplicate LLM spans when both
+       instrumentors are active simultaneously.
+    2. Environment variable ``SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION`` —
+       set globally (e.g. in zero-code deployments) together with
+       ``OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai``.
+    """
+    if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
+        return True
+    raw = os.environ.get(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper(), ""
+    )
+    return raw.strip().lower() in ("true", "1", "yes", "on")
+
+
 def _normalize_stop_sequences(stop_values: Any) -> list[str]:
     if stop_values is None:
         return []
@@ -394,7 +414,7 @@ def chat_completions_create(capture_content: bool, handler):
 
     def traced_method(wrapped, instance, args, kwargs):
         # Check if instrumentation is suppressed (e.g., by LangChain)
-        if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
+        if _is_instrumentation_suppressed():
             return wrapped(*args, **kwargs)
 
         span_attributes = {**get_llm_request_attributes(kwargs, instance)}
@@ -449,7 +469,7 @@ def async_chat_completions_create(capture_content: bool, handler):
 
     async def traced_method(wrapped, instance, args, kwargs):
         # Check if instrumentation is suppressed (e.g., by LangChain)
-        if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
+        if _is_instrumentation_suppressed():
             return await wrapped(*args, **kwargs)
 
         span_attributes = {**get_llm_request_attributes(kwargs, instance)}
@@ -504,7 +524,7 @@ def embeddings_create(capture_content: bool, handler):
 
     def traced_method(wrapped, instance, args, kwargs):
         # Check if instrumentation is suppressed (e.g., by LangChain)
-        if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
+        if _is_instrumentation_suppressed():
             return wrapped(*args, **kwargs)
 
         span_attributes = get_llm_request_attributes(
@@ -553,7 +573,7 @@ def async_embeddings_create(capture_content: bool, handler):
 
     async def traced_method(wrapped, instance, args, kwargs):
         # Check if instrumentation is suppressed (e.g., by LangChain)
-        if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
+        if _is_instrumentation_suppressed():
             return await wrapped(*args, **kwargs)
 
         span_attributes = get_llm_request_attributes(
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml
new file mode 100644
index 00000000..2abb443f
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_not_suppressed_when_env_var_false.yaml
@@ -0,0 +1,134 @@
+interactions:
+- request:
+    body: |-
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "Say this is a test"
+          }
+        ],
+        "model": "gpt-4o-mini",
+        "stream": false
+      }
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      authorization:
+      - Bearer test_openai_api_key
+      connection:
+      - keep-alive
+      content-length:
+      - '106'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.54.3
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.54.3
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.6
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: |-
+        {
+          "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
+          "object": "chat.completion",
+          "created": 1731368630,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "This is a test.",
+                "refusal": null
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 12,
+            "completion_tokens": 5,
+            "total_tokens": 17,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "system_fingerprint": "fp_0ba0d124f1"
+        }
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e122593ff368bc8-SIN
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 11 Nov 2024 23:43:50 GMT
+      Server:
+      - cloudflare
+      Set-Cookie: test_set_cookie
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '765'
+      openai-organization: test_openai_org_id
+      openai-processing-ms:
+      - '287'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '200000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '199977'
+      x-ratelimit-reset-requests:
+      - 8.64s
+      x-ratelimit-reset-tokens:
+      - 6ms
+      x-request-id:
+      - req_58cff97afd0e7c0bba910ccf0b044a6f
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml
new file mode 100644
index 00000000..2abb443f
--- /dev/null
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/cassettes/test_chat_completion_suppressed_via_env_var.yaml
@@ -0,0 +1,134 @@
+interactions:
+- request:
+    body: |-
+      {
+        "messages": [
+          {
+            "role": "user",
+            "content": "Say this is a test"
+          }
+        ],
+        "model": "gpt-4o-mini",
+        "stream": false
+      }
+    headers:
+      accept:
+      - application/json
+      accept-encoding:
+      - gzip, deflate
+      authorization:
+      - Bearer test_openai_api_key
+      connection:
+      - keep-alive
+      content-length:
+      - '106'
+      content-type:
+      - application/json
+      host:
+      - api.openai.com
+      user-agent:
+      - OpenAI/Python 1.54.3
+      x-stainless-arch:
+      - arm64
+      x-stainless-async:
+      - 'false'
+      x-stainless-lang:
+      - python
+      x-stainless-os:
+      - MacOS
+      x-stainless-package-version:
+      - 1.54.3
+      x-stainless-retry-count:
+      - '0'
+      x-stainless-runtime:
+      - CPython
+      x-stainless-runtime-version:
+      - 3.12.6
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: |-
+        {
+          "id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
+          "object": "chat.completion",
+          "created": 1731368630,
+          "model": "gpt-4o-mini-2024-07-18",
+          "choices": [
+            {
+              "index": 0,
+              "message": {
+                "role": "assistant",
+                "content": "This is a test.",
+                "refusal": null
+              },
+              "logprobs": null,
+              "finish_reason": "stop"
+            }
+          ],
+          "usage": {
+            "prompt_tokens": 12,
+            "completion_tokens": 5,
+            "total_tokens": 17,
+            "prompt_tokens_details": {
+              "cached_tokens": 0,
+              "audio_tokens": 0
+            },
+            "completion_tokens_details": {
+              "reasoning_tokens": 0,
+              "audio_tokens": 0,
+              "accepted_prediction_tokens": 0,
+              "rejected_prediction_tokens": 0
+            }
+          },
+          "system_fingerprint": "fp_0ba0d124f1"
+        }
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-RAY:
+      - 8e122593ff368bc8-SIN
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 11 Nov 2024 23:43:50 GMT
+      Server:
+      - cloudflare
+      Set-Cookie: test_set_cookie
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '765'
+      openai-organization: test_openai_org_id
+      openai-processing-ms:
+      - '287'
+      openai-version:
+      - '2020-10-01'
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      x-ratelimit-limit-requests:
+      - '10000'
+      x-ratelimit-limit-tokens:
+      - '200000'
+      x-ratelimit-remaining-requests:
+      - '9999'
+      x-ratelimit-remaining-tokens:
+      - '199977'
+      x-ratelimit-reset-requests:
+      - 8.64s
+      x-ratelimit-reset-tokens:
+      - 6ms
+      x-request-id:
+      - req_58cff97afd0e7c0bba910ccf0b044a6f
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py
index e57001bc..221dbd17 100644
--- a/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py
+++ b/instrumentation-genai/opentelemetry-instrumentation-openai-v2/tests/test_suppression.py
@@ -13,8 +13,14 @@
 # limitations under the License.
 
 """
-Tests for SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY functionality.
-This prevents duplicate telemetry when multiple instrumentations (e.g., LangChain + OpenAI) are active.
+Tests for suppression of OpenAI instrumentation.
+
+Covers two suppression surfaces:
+1. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY context key — set per-request
+   by the LangChain instrumentor to prevent duplicate LLM spans when both
+   instrumentors are active simultaneously.
+2. SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION environment variable — set globally
+   for zero-code deployments alongside OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai.
 """
 
 import pytest
@@ -24,6 +30,11 @@
     SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY,
 )
 
+# The env var name is the uppercase form of the context key string.
+SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR = (
+    SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper()
+)
+
 
 @pytest.mark.vcr()
 def test_chat_completion_suppressed(
@@ -144,3 +155,81 @@ def test_chat_completion_not_suppressed_by_default(
     # Should have at least the main chat completion span
     chat_spans = [s for s in spans if "chat" in s.name.lower()]
     assert len(chat_spans) > 0
+
+
+# ---------------------------------------------------------------------------
+# Environment variable suppression
+# ---------------------------------------------------------------------------
+
+
+@pytest.mark.vcr()
+def test_chat_completion_suppressed_via_env_var(
+    monkeypatch, span_exporter, openai_client, instrument_with_content
+):
+    """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=true suppresses spans globally."""
+    monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "true")
+
+    response = openai_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "Say this is a test"}],
+    )
+    assert response is not None
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) == 0
+
+
+@pytest.mark.vcr()
+def test_chat_completion_not_suppressed_when_env_var_false(
+    monkeypatch, span_exporter, openai_client, instrument_with_content
+):
+    """SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION=false leaves instrumentation active."""
+    monkeypatch.setenv(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, "false"
+    )
+
+    response = openai_client.chat.completions.create(
+        model="gpt-4o-mini",
+        messages=[{"role": "user", "content": "Say this is a test"}],
+    )
+    assert response is not None
+
+    spans = span_exporter.get_finished_spans()
+    assert len(spans) > 0
+
+
+@pytest.mark.parametrize("value", ["true", "1", "yes", "on", "TRUE"])
+def test_env_var_truthy_values_suppress(monkeypatch, value):
+    """All truthy spellings of the env var are recognised."""
+    monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value)
+
+    from opentelemetry.instrumentation.openai_v2.patch import (
+        _is_instrumentation_suppressed,
+    )
+
+    assert _is_instrumentation_suppressed() is True
+
+
+@pytest.mark.parametrize("value", ["false", "0", "no", "off", "FALSE", ""])
+def test_env_var_falsey_values_do_not_suppress(monkeypatch, value):
+    """Falsey env var spellings leave instrumentation active."""
+    monkeypatch.setenv(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, value)
+
+    from opentelemetry.instrumentation.openai_v2.patch import (
+        _is_instrumentation_suppressed,
+    )
+
+    assert _is_instrumentation_suppressed() is False
+
+
+def test_env_var_unset_does_not_suppress(monkeypatch):
+    """When the env var is absent, instrumentation is active by default."""
+    monkeypatch.delenv(
+        SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_ENV_VAR, raising=False
+    )
+
+    from opentelemetry.instrumentation.openai_v2.patch import (
+        _is_instrumentation_suppressed,
+    )
+
+    assert _is_instrumentation_suppressed() is False
diff --git a/util/opentelemetry-util-genai/CHANGELOG.md b/util/opentelemetry-util-genai/CHANGELOG.md
index 6b6571fd..d3cecb5a 100644
--- a/util/opentelemetry-util-genai/CHANGELOG.md
+++ b/util/opentelemetry-util-genai/CHANGELOG.md
@@ -6,6 +6,12 @@ All notable changes to this repository are documented in this file.
 
 ### Added
 
+- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — The uppercase form of
+  `SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY` is now also checked as an
+  environment variable by the openai-v2 instrumentor, allowing zero-code
+  deployments to set suppression globally without the LangChain instrumentor
+  injecting the context key per-request. No new constant added.
+
 - **`OTEL_INSTRUMENTATION_GENAI_EMIT_EVENT` env var** — Explicit override for content event emission (`true`/`false`). When unset, defaults are derived from `OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT` mode.
 
 ### Changed
diff --git a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py
index 375b7b7a..03e71e9b 100644
--- a/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py
+++ b/util/opentelemetry-util-genai/src/opentelemetry/util/genai/attributes.py
@@ -95,7 +95,11 @@
 GEN_AI_SECURITY_EVENT_ID = "gen_ai.security.event_id"
 
 # Context key for suppressing instrumentation to avoid duplicate telemetry
-# when multiple instrumentations (e.g., LangChain + OpenAI) are active
+# when multiple instrumentations (e.g., LangChain + OpenAI) are active.
+# The uppercase form of this string is also checked as an environment variable
+# (SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION) so zero-code deployments can set
+# suppression globally without the LangChain instrumentor injecting it
+# per-request. Use together with OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai.
 SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY = (
     "suppress_language_model_instrumentation"
 )