diff --git a/CHANGELOG.md b/CHANGELOG.md index 4537ad3f8bc..9ab2de856cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,7 +19,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - logs: add exception support to Logger emit and LogRecord attributes ([#4907](https://github.com/open-telemetry/opentelemetry-python/issues/4907)) - Drop Python 3.9 support - ([#5076](https://github.com/open-telemetry/opentelemetry-python-contrib/pull/5076)) + ([#5076](https://github.com/open-telemetry/opentelemetry-python/pull/5076)) +- `opentelemetry-semantic-conventions`: Attach spec-mandated explicit bucket boundaries to the GenAI histogram helpers (`gen_ai.client.operation.duration`, `gen_ai.server.request.duration`, `gen_ai.server.time_to_first_token`, `gen_ai.server.time_per_output_token`); without them the default SDK buckets produced unusable histograms for latency-per-token metrics + ([#4946](https://github.com/open-telemetry/opentelemetry-python/issues/4946)) ## Version 1.41.0/0.62b0 (2026-04-09) diff --git a/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py b/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py index 7a7afa33888..b8f6e5a7a91 100644 --- a/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py +++ b/opentelemetry-semantic-conventions/src/opentelemetry/semconv/_incubating/metrics/gen_ai_metrics.py @@ -25,12 +25,34 @@ """ +# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiclientoperationduration +_GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS: Final = ( + 0.01, + 0.02, + 0.04, + 0.08, + 0.16, + 0.32, + 0.64, + 1.28, + 2.56, + 5.12, + 10.24, + 20.48, + 40.96, + 81.92, +) + + def create_gen_ai_client_operation_duration(meter: Meter) -> Histogram: """GenAI operation duration""" return meter.create_histogram( name=GEN_AI_CLIENT_OPERATION_DURATION, description="GenAI operation duration.", unit="s", + explicit_bucket_boundaries_advisory=list( + _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS + ), ) @@ -61,10 +83,15 @@ def create_gen_ai_client_token_usage(meter: Meter) -> Histogram: def create_gen_ai_server_request_duration(meter: Meter) -> Histogram: """Generative AI server request duration such as time-to-last byte or last output token""" + # Shares the latency-style boundaries with client operation duration and + # time-to-first-token per the semconv spec. return meter.create_histogram( name=GEN_AI_SERVER_REQUEST_DURATION, description="Generative AI server request duration such as time-to-last byte or last output token.", unit="s", + explicit_bucket_boundaries_advisory=list( + _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS + ), ) @@ -78,12 +105,33 @@ def create_gen_ai_server_request_duration(meter: Meter) -> Histogram: """ +# https://opentelemetry.io/docs/specs/semconv/gen-ai/gen-ai-metrics/#metric-gen_aiservertime_per_output_token +_GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS: Final = ( + 0.01, + 0.025, + 0.05, + 0.075, + 0.1, + 0.15, + 0.2, + 0.3, + 0.4, + 0.5, + 0.75, + 1.0, + 2.5, +) + + def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram: """Time per output token generated after the first token for successful responses""" return meter.create_histogram( name=GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN, description="Time per output token generated after the first token for successful responses.", unit="s", + explicit_bucket_boundaries_advisory=list( + _GEN_AI_SERVER_TIME_PER_OUTPUT_TOKEN_BUCKETS + ), ) @@ -97,8 +145,13 @@ def create_gen_ai_server_time_per_output_token(meter: Meter) -> Histogram: def create_gen_ai_server_time_to_first_token(meter: Meter) -> Histogram: """Time to generate first token for successful responses""" + # Shares the latency-style boundaries with client operation duration per + # the semconv spec. return meter.create_histogram( name=GEN_AI_SERVER_TIME_TO_FIRST_TOKEN, description="Time to generate first token for successful responses.", unit="s", + explicit_bucket_boundaries_advisory=list( + _GEN_AI_CLIENT_OPERATION_DURATION_BUCKETS + ), )