Skip to content

Commit 50218b9

Browse files
herin049aabmass
andauthored
feat: make retryable gRPC error codes configurable for gRPC exporters (open-telemetry#4917)
* feat: make retryable gRPC error codes configurable for gRPC exporters * update CHANGELOG.md * add ability to configure OTLP gRPC retryable error codes via environment variables * update OTLP gRPC exporter constructor type hints * update retryable error codes initialization logic * fix typechecking error * update environment variables docstring * remove trailing whitespace * Fix formattings --------- Co-authored-by: Aaron Abbott <aaronabbott@google.com>
1 parent 3a10517 commit 50218b9

7 files changed

Lines changed: 115 additions & 8 deletions

File tree

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
5050
([#5145](https://github.com/open-telemetry/opentelemetry-python/pull/5145))
5151
- `opentelemetry-exporter-otlp-json-common`: add 'opentelemetry-exporter-otlp-json-common' package for OTLP JSON exporters
5252
([#4996](https://github.com/open-telemetry/opentelemetry-python/pull/4996))
53+
- `opentelemetry-exporter-otlp-proto-grpc`: make retryable gRPC error codes configurable for gRPC exporters
54+
([#4917](https://github.com/open-telemetry/opentelemetry-python/pull/4917))
5355

5456
## Version 1.41.0/0.62b0 (2026-04-09)
5557

exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/_log_exporter/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
11
# Copyright The OpenTelemetry Authors
22
# SPDX-License-Identifier: Apache-2.0
33

4-
from collections.abc import Sequence
4+
from collections.abc import Iterable, Sequence
55
from collections.abc import Sequence as TypingSequence
66
from os import environ
77
from typing import Literal
88

9-
from grpc import ChannelCredentials, Compression
9+
from grpc import ChannelCredentials, Compression, StatusCode
1010
from opentelemetry.exporter.otlp.proto.common._log_encoder import encode_logs
1111
from opentelemetry.exporter.otlp.proto.grpc.exporter import (
1212
OTLPExporterMixin,
@@ -62,6 +62,7 @@ def __init__(
6262
timeout: float | None = None,
6363
compression: Compression | None = None,
6464
channel_options: tuple[tuple[str, str]] | None = None,
65+
retryable_error_codes: Iterable[StatusCode] | None = None,
6566
*,
6667
meter_provider: MeterProvider | None = None,
6768
):
@@ -103,6 +104,7 @@ def __init__(
103104
stub=LogsServiceStub,
104105
result=LogRecordExportResult,
105106
channel_options=channel_options,
107+
retryable_error_codes=retryable_error_codes,
106108
component_type=OtelComponentTypeValues.OTLP_GRPC_LOG_EXPORTER,
107109
signal="logs",
108110
meter_provider=meter_provider,

exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/exporter.py

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
1010
"""
1111

12+
import os
1213
import random
1314
import threading
1415
from abc import ABC, abstractmethod
1516
from collections.abc import (
1617
Callable,
18+
Iterable,
1719
Sequence, # noqa: F401
1820
)
1921
from collections.abc import Sequence as TypingSequence
@@ -82,6 +84,7 @@
8284
from opentelemetry.sdk._shared_internal import DuplicateFilter
8385
from opentelemetry.sdk.environment_variables import (
8486
_OTEL_PYTHON_EXPORTER_OTLP_GRPC_CREDENTIAL_PROVIDER,
87+
_OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES,
8588
OTEL_EXPORTER_OTLP_CERTIFICATE,
8689
OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE,
8790
OTEL_EXPORTER_OTLP_CLIENT_KEY,
@@ -295,6 +298,7 @@ def __init__(
295298
timeout: float | None = None,
296299
compression: Compression | None = None,
297300
channel_options: tuple[tuple[str, str]] | None = None,
301+
retryable_error_codes: Iterable[StatusCode] | None = None,
298302
*,
299303
component_type: OtelComponentTypeValues | None = None,
300304
signal: Literal["traces", "metrics", "logs"] = "traces",
@@ -357,6 +361,22 @@ def __init__(
357361
else compression
358362
) or Compression.NoCompression
359363

364+
self._retryable_error_codes = retryable_error_codes or os.environ.get(
365+
_OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES
366+
)
367+
if isinstance(self._retryable_error_codes, str):
368+
self._retryable_error_codes = frozenset(
369+
StatusCode[code.strip().upper()]
370+
for code in self._retryable_error_codes.split(",")
371+
if code.strip()
372+
)
373+
elif self._retryable_error_codes is not None:
374+
self._retryable_error_codes = frozenset(
375+
self._retryable_error_codes
376+
)
377+
else:
378+
self._retryable_error_codes = _RETRYABLE_ERROR_CODES
379+
360380
self._channel = None
361381
self._client = None
362382

@@ -481,7 +501,7 @@ def _export(
481501
self._initialize_channel_and_stub()
482502

483503
if (
484-
error.code() not in _RETRYABLE_ERROR_CODES # type: ignore [reportAttributeAccessIssue]
504+
error.code() not in self._retryable_error_codes # type: ignore [reportAttributeAccessIssue]
485505
or retry_num + 1 == _MAX_RETRYS
486506
or backoff_seconds > (deadline_sec - time())
487507
or self._shutdown

exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/metric_exporter/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from logging import getLogger
1010
from os import environ
1111

12-
from grpc import ChannelCredentials, Compression
12+
from grpc import ChannelCredentials, Compression, StatusCode
1313
from opentelemetry.exporter.otlp.proto.common._internal.metrics_encoder import (
1414
OTLPMetricExporterMixin,
1515
)
@@ -105,6 +105,7 @@ def __init__(
105105
preferred_aggregation: dict[type, Aggregation] | None = None,
106106
max_export_batch_size: int | None = None,
107107
channel_options: tuple[tuple[str, str]] | None = None,
108+
retryable_error_codes: Iterable[StatusCode] | None = None,
108109
*,
109110
meter_provider: MeterProvider | None = None,
110111
):
@@ -151,6 +152,7 @@ def __init__(
151152
timeout=timeout or environ_timeout,
152153
compression=compression,
153154
channel_options=channel_options,
155+
retryable_error_codes=retryable_error_codes,
154156
component_type=OtelComponentTypeValues.OTLP_GRPC_METRIC_EXPORTER,
155157
signal="metrics",
156158
meter_provider=meter_provider,

exporter/opentelemetry-exporter-otlp-proto-grpc/src/opentelemetry/exporter/otlp/proto/grpc/trace_exporter/__init__.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,11 +4,11 @@
44
"""OTLP Span Exporter"""
55

66
import logging
7-
from collections.abc import Sequence
7+
from collections.abc import Iterable, Sequence
88
from collections.abc import Sequence as TypingSequence
99
from os import environ
1010

11-
from grpc import ChannelCredentials, Compression
11+
from grpc import ChannelCredentials, Compression, StatusCode
1212
from opentelemetry.exporter.otlp.proto.common.trace_encoder import (
1313
encode_spans,
1414
)
@@ -90,6 +90,7 @@ def __init__(
9090
timeout: float | None = None,
9191
compression: Compression | None = None,
9292
channel_options: tuple[tuple[str, str]] | None = None,
93+
retryable_error_codes: Iterable[StatusCode] | None = None,
9394
*,
9495
meter_provider: MeterProvider | None = None,
9596
):
@@ -132,6 +133,7 @@ def __init__(
132133
timeout=timeout or environ_timeout,
133134
compression=compression,
134135
channel_options=channel_options,
136+
retryable_error_codes=retryable_error_codes,
135137
component_type=OtelComponentTypeValues.OTLP_GRPC_SPAN_EXPORTER,
136138
signal="traces",
137139
meter_provider=meter_provider,

exporter/opentelemetry-exporter-otlp-proto-grpc/tests/test_otlp_exporter_mixin.py

Lines changed: 68 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
encode_spans,
2828
)
2929
from opentelemetry.exporter.otlp.proto.grpc.exporter import ( # noqa: F401
30+
_RETRYABLE_ERROR_CODES,
3031
InvalidCompressionValueException,
3132
OTLPExporterMixin,
3233
environ_to_compression,
@@ -159,7 +160,7 @@ def join(self, timeout: float | None = None) -> Any:
159160
return self._return
160161

161162

162-
# pylint: disable=too-many-public-methods
163+
# pylint: disable-next=too-many-public-methods
163164
class TestOTLPExporterMixin(TestCase):
164165
def setUp(self):
165166
self.server = server(ThreadPoolExecutor(max_workers=10))
@@ -695,7 +696,72 @@ def test_unavailable_reconnects(self):
695696
# Since the initial channel was created in setUp (unpatched), this call
696697
# must be from the reconnection logic.
697698
self.assertTrue(mock_insecure_channel.called)
698-
# Verify that reconnection enabled flag is set
699+
700+
def test_retryable_error_codes_initialization(self):
701+
# pylint: disable=protected-access
702+
self.assertEqual(
703+
self.exporter._retryable_error_codes, _RETRYABLE_ERROR_CODES
704+
)
705+
custom_codes = [StatusCode.INTERNAL, StatusCode.UNKNOWN]
706+
exporter = OTLPSpanExporterForTesting(
707+
insecure=True, retryable_error_codes=custom_codes
708+
)
709+
self.assertEqual(
710+
exporter._retryable_error_codes, frozenset(custom_codes)
711+
)
712+
713+
@patch.dict(
714+
"os.environ",
715+
{
716+
"OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES": ",INTERNAL, unknown,,,dEAdline_Exceeded "
717+
},
718+
)
719+
def test_retryable_error_codes_initialization_from_env(self):
720+
expected_codes = frozenset(
721+
{
722+
StatusCode.INTERNAL,
723+
StatusCode.UNKNOWN,
724+
StatusCode.DEADLINE_EXCEEDED,
725+
}
726+
)
727+
exporter = OTLPSpanExporterForTesting()
728+
# pylint: disable=protected-access
729+
self.assertEqual(exporter._retryable_error_codes, expected_codes)
730+
731+
@unittest.skipIf(
732+
system() == "Windows",
733+
"For gRPC + windows there's some added delay in the RPCs which breaks the assertion over amount of time passed.",
734+
)
735+
def test_retryable_error_codes_custom(self):
736+
# Test that a custom error code is retried if specified
737+
custom_codes = [StatusCode.INTERNAL]
738+
mock_trace_service = TraceServiceServicerWithExportParams(
739+
StatusCode.INTERNAL,
740+
optional_retry_nanos=200000000, # .2 seconds
741+
)
742+
add_TraceServiceServicer_to_server(
743+
mock_trace_service,
744+
self.server,
745+
)
746+
exporter = OTLPSpanExporterForTesting(
747+
insecure=True, retryable_error_codes=custom_codes, timeout=10
748+
)
749+
750+
self.assertEqual(
751+
exporter.export([self.span]),
752+
SpanExportResult.FAILURE,
753+
)
754+
755+
self.assertEqual(mock_trace_service.num_requests, 6)
756+
757+
# Test that a default retryable code is NOT retried if not in custom_codes
758+
mock_trace_service.num_requests = 0
759+
mock_trace_service.export_result = StatusCode.UNAVAILABLE
760+
self.assertEqual(
761+
exporter.export([self.span]),
762+
SpanExportResult.FAILURE,
763+
)
764+
self.assertEqual(mock_trace_service.num_requests, 1)
699765

700766
def assert_standard_metric_attrs(self, attributes):
701767
self.assertEqual(

opentelemetry-sdk/src/opentelemetry/sdk/environment_variables/__init__.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -483,6 +483,19 @@ def channel_credential_provider() -> grpc.ChannelCredentials:
483483
Note: This environment variable is experimental and subject to change.
484484
"""
485485

486+
_OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES = (
487+
"OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES"
488+
)
489+
"""
490+
.. envvar:: OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES
491+
492+
The :envvar:`OTEL_PYTHON_EXPORTER_OTLP_GRPC_RETRYABLE_ERROR_CODES` stores a comma-separated list of human-readable
493+
gRPC error codes that are considered retryable for the OTLP gRPC exporters (e.g. `UNAVAILABLE, DEADLINE_EXCEEDED`).
494+
Supported error codes are defined in `grpc.StatusCode` and are parsed in a case-insensitive manner.
495+
496+
Note: This environment variable is experimental and subject to change.
497+
"""
498+
486499
OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE = "OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE"
487500
"""
488501
.. envvar:: OTEL_EXPORTER_OTLP_TRACES_CERTIFICATE

0 commit comments

Comments
 (0)