Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

- `opentelemetry-sdk`: Add `service` resource detector support to declarative file configuration via `detection_development.detectors[].service`
([#5003](https://github.com/open-telemetry/opentelemetry-python/pull/5003))
- `opentelemetry-exporter-otlp-proto-http`: Add experimental opt-in support for handling HTTP 413 (Payload Too Large) responses in trace and log exporters by splitting the batch in half and retrying each half recursively. Enable via `OTEL_PYTHON_EXPERIMENTAL_OTLP_RETRY_ON_413=true`.
([#5032](https://github.com/open-telemetry/opentelemetry-python/pull/5032))

## Version 1.41.0/0.62b0 (2026-04-09)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,10 @@ def _is_retryable(resp: requests.Response) -> bool:
return False


def _is_payload_too_large(resp: requests.Response) -> bool:
return resp.status_code == 413


def _load_session_from_envvar(
cred_envvar: Literal[
"OTEL_PYTHON_EXPORTER_OTLP_HTTP_LOGS_CREDENTIAL_PROVIDER",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
Compression,
)
from opentelemetry.exporter.otlp.proto.http._common import (
_is_payload_too_large,
_is_retryable,
_load_session_from_envvar,
)
Expand All @@ -41,6 +42,7 @@
)
from opentelemetry.sdk._shared_internal import DuplicateFilter
from opentelemetry.sdk.environment_variables import (
_OTEL_PYTHON_EXPERIMENTAL_OTLP_RETRY_ON_413,
_OTEL_PYTHON_EXPORTER_OTLP_HTTP_LOGS_CREDENTIAL_PROVIDER,
OTEL_EXPORTER_OTLP_CERTIFICATE,
OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE,
Expand Down Expand Up @@ -69,6 +71,7 @@
DEFAULT_LOGS_EXPORT_PATH = "v1/logs"
DEFAULT_TIMEOUT = 10 # in seconds
_MAX_RETRYS = 6
_MAX_BISECTS = 5


class OTLPLogExporter(LogRecordExporter):
Expand Down Expand Up @@ -183,8 +186,17 @@ def export(
_logger.warning("Exporter already shutdown, ignoring batch")
return LogRecordExportResult.FAILURE

serialized_data = encode_logs(batch).SerializeToString()
deadline_sec = time() + self._timeout
return self._export_batch(batch, deadline_sec, _MAX_BISECTS)

def _export_batch(
self,
batch: Sequence[ReadableLogRecord],
deadline_sec: float,
remaining_bisects: int,
) -> LogRecordExportResult:
serialized_data = encode_logs(batch).SerializeToString()

for retry_num in range(_MAX_RETRYS):
# multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff.
backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2)
Expand All @@ -196,12 +208,24 @@ def export(
reason = error
retryable = isinstance(error, ConnectionError)
status_code = None
bisectable = False
else:
reason = resp.reason
retryable = _is_retryable(resp)
status_code = resp.status_code
bisectable = (
_is_payload_too_large(resp)
and len(batch) > 1
and remaining_bisects > 0
and environ.get(
_OTEL_PYTHON_EXPERIMENTAL_OTLP_RETRY_ON_413, ""
)
.strip()
.lower()
== "true"
)

if not retryable:
if not retryable and not bisectable:
_logger.error(
"Failed to export logs batch code: %s, reason: %s",
status_code,
Expand All @@ -219,6 +243,34 @@ def export(
"max retries or shutdown."
)
return LogRecordExportResult.FAILURE

if bisectable:
if time() >= deadline_sec or self._shutdown:
_logger.error(
"Payload too large but %s, dropping %d log records",
"shutdown in progress"
if self._shutdown
else "deadline expired",
len(batch),
)
return LogRecordExportResult.FAILURE
mid = len(batch) // 2
_logger.warning(
"Payload too large (%d log records), splitting into two batches",
len(batch),
)
first = self._export_batch(
list(batch[:mid]),
deadline_sec,
remaining_bisects - 1,
)
if first != LogRecordExportResult.SUCCESS:
return LogRecordExportResult.FAILURE
return self._export_batch(
list(batch[mid:]),
deadline_sec,
remaining_bisects - 1,
)
_logger.warning(
"Transient error %s encountered while exporting logs batch, retrying in %.2fs.",
reason,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,12 @@
Compression,
)
from opentelemetry.exporter.otlp.proto.http._common import (
_is_payload_too_large,
_is_retryable,
_load_session_from_envvar,
)
from opentelemetry.sdk.environment_variables import (
_OTEL_PYTHON_EXPERIMENTAL_OTLP_RETRY_ON_413,
_OTEL_PYTHON_EXPORTER_OTLP_HTTP_TRACES_CREDENTIAL_PROVIDER,
OTEL_EXPORTER_OTLP_CERTIFICATE,
OTEL_EXPORTER_OTLP_CLIENT_CERTIFICATE,
Expand Down Expand Up @@ -65,6 +67,7 @@
DEFAULT_TRACES_EXPORT_PATH = "v1/traces"
DEFAULT_TIMEOUT = 10 # in seconds
_MAX_RETRYS = 6
_MAX_BISECTS = 5


class OTLPSpanExporter(SpanExporter):
Expand Down Expand Up @@ -176,8 +179,17 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
_logger.warning("Exporter already shutdown, ignoring batch")
return SpanExportResult.FAILURE

serialized_data = encode_spans(spans).SerializePartialToString()
deadline_sec = time() + self._timeout
return self._export_batch(spans, deadline_sec, _MAX_BISECTS)

def _export_batch(
self,
spans: Sequence[ReadableSpan],
deadline_sec: float,
remaining_bisects: int,
) -> SpanExportResult:
serialized_data = encode_spans(spans).SerializePartialToString()

for retry_num in range(_MAX_RETRYS):
# multiplying by a random number between .8 and 1.2 introduces a +/20% jitter to each backoff.
backoff_seconds = 2**retry_num * random.uniform(0.8, 1.2)
Expand All @@ -189,12 +201,24 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
reason = error
retryable = isinstance(error, ConnectionError)
status_code = None
bisectable = False
else:
reason = resp.reason
retryable = _is_retryable(resp)
status_code = resp.status_code
bisectable = (
_is_payload_too_large(resp)
and len(spans) > 1
and remaining_bisects > 0
and environ.get(
_OTEL_PYTHON_EXPERIMENTAL_OTLP_RETRY_ON_413, ""
)
.strip()
.lower()
== "true"
)

if not retryable:
if not retryable and not bisectable:
_logger.error(
"Failed to export span batch code: %s, reason: %s",
status_code,
Expand All @@ -212,6 +236,34 @@ def export(self, spans: Sequence[ReadableSpan]) -> SpanExportResult:
"max retries or shutdown."
)
return SpanExportResult.FAILURE

if bisectable:
if time() >= deadline_sec or self._shutdown:
_logger.error(
"Payload too large but %s, dropping %d spans",
"shutdown in progress"
if self._shutdown
else "deadline expired",
len(spans),
)
return SpanExportResult.FAILURE
mid = len(spans) // 2
_logger.warning(
"Payload too large (%d spans), splitting into two batches",
len(spans),
)
first = self._export_batch(
list(spans[:mid]),
deadline_sec,
remaining_bisects - 1,
)
if first != SpanExportResult.SUCCESS:
return SpanExportResult.FAILURE
return self._export_batch(
list(spans[mid:]),
deadline_sec,
remaining_bisects - 1,
)
_logger.warning(
"Transient error %s encountered while exporting span batch, retrying in %.2fs.",
reason,
Expand Down
Loading
Loading