Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,20 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

### Added

- **`SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION` env var** — Global alternative to
the per-request `suppress_language_model_instrumentation` OTel context key
(the env var is the uppercase form of the same string). When set to a truthy
value (`true`, `1`, `yes`, `on`), the openai-v2 instrumentor skips creating
spans entirely. Intended for zero-code deployments alongside

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this an alternative instead of "alongside"?

`OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai`.
- Add `gen_ai.tool.definitions` attribute on LLM spans when
`OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true` and
`OTEL_INSTRUMENTATION_GENAI_CAPTURE_TOOL_DEFINITIONS=true`
- Add `gen_ai.request.stream` attribute for streaming requests
- Add `gen_ai.response.time_to_first_chunk` attribute and metric for streaming requests

### Fixed

- Fix `AttributeError: 'StreamWrapper' object has no attribute 'headers'` when
Expand All @@ -19,17 +33,6 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
attributes to the underlying stream. Inspired by upstream fix
([opentelemetry-python-contrib#4184](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/4184),
fixes [#4113](https://github.com/open-telemetry/opentelemetry-python-contrib/issues/4113)).

### Added

- Add `gen_ai.tool.definitions` attribute on LLM spans when
`OTEL_INSTRUMENTATION_GENAI_CAPTURE_MESSAGE_CONTENT=true` and
`OTEL_INSTRUMENTATION_GENAI_CAPTURE_TOOL_DEFINITIONS=true`
- Add `gen_ai.request.stream` attribute for streaming requests
- Add `gen_ai.response.time_to_first_chunk` attribute and metric for streaming requests

### Fixed

- Fix PyPI badge, install command, and references in README.rst to use correct
`splunk-otel-instrumentation-openai` package name instead of upstream
- Fix project URLs in pyproject.toml to point to SDOT repo (`signalfx/splunk-otel-python-contrib`)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@

import asyncio
import inspect
import os
import timeit
from typing import Any, Iterable, Optional

Expand Down Expand Up @@ -59,6 +60,25 @@
)


def _is_instrumentation_suppressed() -> bool:
"""Return True when OpenAI spans should be skipped.

Checks two surfaces for the suppression signal:
1. OTel context key — set per-request by the LangChain instrumentor's
``_OpenAITracingWrapper`` to prevent duplicate LLM spans when both
instrumentors are active simultaneously.
2. Environment variable ``SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION`` —
set globally (e.g. in zero-code deployments) together with
``OTEL_PYTHON_DISABLED_INSTRUMENTATIONS=openai``.
"""
if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
return True
raw = os.environ.get(
SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY.upper(), ""
)
return raw.strip().lower() in ("true", "1", "yes", "on")


def _normalize_stop_sequences(stop_values: Any) -> list[str]:
if stop_values is None:
return []
Expand Down Expand Up @@ -394,7 +414,7 @@ def chat_completions_create(capture_content: bool, handler):

def traced_method(wrapped, instance, args, kwargs):
# Check if instrumentation is suppressed (e.g., by LangChain)
if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
if _is_instrumentation_suppressed():
return wrapped(*args, **kwargs)

span_attributes = {**get_llm_request_attributes(kwargs, instance)}
Expand Down Expand Up @@ -449,7 +469,7 @@ def async_chat_completions_create(capture_content: bool, handler):

async def traced_method(wrapped, instance, args, kwargs):
# Check if instrumentation is suppressed (e.g., by LangChain)
if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
if _is_instrumentation_suppressed():
return await wrapped(*args, **kwargs)

span_attributes = {**get_llm_request_attributes(kwargs, instance)}
Expand Down Expand Up @@ -504,7 +524,7 @@ def embeddings_create(capture_content: bool, handler):

def traced_method(wrapped, instance, args, kwargs):
# Check if instrumentation is suppressed (e.g., by LangChain)
if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
if _is_instrumentation_suppressed():
return wrapped(*args, **kwargs)

span_attributes = get_llm_request_attributes(
Expand Down Expand Up @@ -553,7 +573,7 @@ def async_embeddings_create(capture_content: bool, handler):

async def traced_method(wrapped, instance, args, kwargs):
# Check if instrumentation is suppressed (e.g., by LangChain)
if context_api.get_value(SUPPRESS_LANGUAGE_MODEL_INSTRUMENTATION_KEY):
if _is_instrumentation_suppressed():
return await wrapped(*args, **kwargs)

span_attributes = get_llm_request_attributes(
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
interactions:
- request:
body: |-
{
"messages": [
{
"role": "user",
"content": "Say this is a test"
}
],
"model": "gpt-4o-mini",
"stream": false
}
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
authorization:
- Bearer test_openai_api_key
connection:
- keep-alive
content-length:
- '106'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.54.3
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.54.3
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.6
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: |-
{
"id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
"object": "chat.completion",
"created": 1731368630,
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test.",
"refusal": null
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 12,
"completion_tokens": 5,
"total_tokens": 17,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"system_fingerprint": "fp_0ba0d124f1"
}
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e122593ff368bc8-SIN
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 11 Nov 2024 23:43:50 GMT
Server:
- cloudflare
Set-Cookie: test_set_cookie
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
content-length:
- '765'
openai-organization: test_openai_org_id
openai-processing-ms:
- '287'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '199977'
x-ratelimit-reset-requests:
- 8.64s
x-ratelimit-reset-tokens:
- 6ms
x-request-id:
- req_58cff97afd0e7c0bba910ccf0b044a6f
status:
code: 200
message: OK
version: 1
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
interactions:
- request:
body: |-
{
"messages": [
{
"role": "user",
"content": "Say this is a test"
}
],
"model": "gpt-4o-mini",
"stream": false
}
headers:
accept:
- application/json
accept-encoding:
- gzip, deflate
authorization:
- Bearer test_openai_api_key
connection:
- keep-alive
content-length:
- '106'
content-type:
- application/json
host:
- api.openai.com
user-agent:
- OpenAI/Python 1.54.3
x-stainless-arch:
- arm64
x-stainless-async:
- 'false'
x-stainless-lang:
- python
x-stainless-os:
- MacOS
x-stainless-package-version:
- 1.54.3
x-stainless-retry-count:
- '0'
x-stainless-runtime:
- CPython
x-stainless-runtime-version:
- 3.12.6
method: POST
uri: https://api.openai.com/v1/chat/completions
response:
body:
string: |-
{
"id": "chatcmpl-ASYMQRl3A3DXL9FWCK9tnGRcKIO7q",
"object": "chat.completion",
"created": 1731368630,
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "This is a test.",
"refusal": null
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 12,
"completion_tokens": 5,
"total_tokens": 17,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
},
"system_fingerprint": "fp_0ba0d124f1"
}
headers:
CF-Cache-Status:
- DYNAMIC
CF-RAY:
- 8e122593ff368bc8-SIN
Connection:
- keep-alive
Content-Type:
- application/json
Date:
- Mon, 11 Nov 2024 23:43:50 GMT
Server:
- cloudflare
Set-Cookie: test_set_cookie
Transfer-Encoding:
- chunked
X-Content-Type-Options:
- nosniff
access-control-expose-headers:
- X-Request-ID
alt-svc:
- h3=":443"; ma=86400
content-length:
- '765'
openai-organization: test_openai_org_id
openai-processing-ms:
- '287'
openai-version:
- '2020-10-01'
strict-transport-security:
- max-age=31536000; includeSubDomains; preload
x-ratelimit-limit-requests:
- '10000'
x-ratelimit-limit-tokens:
- '200000'
x-ratelimit-remaining-requests:
- '9999'
x-ratelimit-remaining-tokens:
- '199977'
x-ratelimit-reset-requests:
- 8.64s
x-ratelimit-reset-tokens:
- 6ms
x-request-id:
- req_58cff97afd0e7c0bba910ccf0b044a6f
status:
code: 200
message: OK
version: 1
Loading
Loading