Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907))
- Drop Python 3.9 support
([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076))
- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics
([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946))


## Version 1.41.0/0.62b0 (2026-04-09)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -25,12 +25,34 @@
"""


# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclientoperationduration
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS: Final = (
0.01,
0.02,
0.04,
0.08,
0.16,
0.32,
0.64,
1.28,
2.56,
5.12,
10.24,
20.48,
40.96,
81.92,
)


def create_gen_ai_client_operation_duration(meter: Meter) -> Histogram:
"""GenAI operation duration"""
return meter.create_histogram(
name=GEN_AI_CLIENT_OPERATION_DURATION,
description="GenAI operation duration.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)


Expand Down Expand Up @@ -61,10 +83,15 @@ def create_gen_ai_client_token_usage(meter: Meter) -> Histogram:

def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
"""Generative AI server request duration such as time-to-last byte or last output token"""
# Shares the latency-style boundaries with client operation duration and
# time-to-first-token per the semconv spec.
return meter.create_histogram(
name=GEN_AI_SERVER_REQUEST_DURATION,
description="Generative AI server request duration such as time-to-last byte or last output token.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)


Expand All @@ -78,12 +105,33 @@ def create_gen_ai_server_request_duration(meter: Meter) -> Histogram:
"""


# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiservertime_per_output_token
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS: Final = (
0.01,
0.025,
0.05,
0.075,
0.1,
0.15,
0.2,
0.3,
0.4,
0.5,
0.75,
1.0,
2.5,
)


def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:
"""Time per output token generated after the first token for successful responses"""
return meter.create_histogram(
name=GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN,
description="Time per output token generated after the first token for successful responses.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS
),
)


Expand All @@ -97,8 +145,13 @@ def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram:

def create_gen_ai_server_time_to_first_token(meter: Meter) -> Histogram:
"""Time to generate first token for successful responses"""
# Shares the latency-style boundaries with client operation duration per
# the semconv spec.
return meter.create_histogram(
name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN,
description="Time to generate first token for successful responses.",
unit="s",
explicit_bucket_boundaries_advisory=list(
_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS
),
)
Loading