Skip to content

Commit 47a5382

Browse files
committed
Address review nits from @rambleraptor
- Drop the misleading 'when none are set, the default requests behavior is preserved' sentence in the docs (partial overrides don't reset other knobs to undefined behavior). - Initialize retries and backoff_factor to 0 instead of None so the Retry() call no longer needs conditional defaults inline. A Retry(total=0) is a no-op and is functionally equivalent to no Retry at all. - Extract the loopback HTTP server setup from the retry test into a _local_rest_server_503_then_200 context manager; the test body now shows intent (set retries=3, list, verify 4 calls) without the handler scaffold. Signed-off-by: rahulsmahadev <rahul.mahadev@databricks.com>
1 parent afb1f51 commit 47a5382

3 files changed

Lines changed: 34 additions & 28 deletions

File tree

mkdocs/docs/configuration.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -353,7 +353,6 @@ catalog:
353353
The REST Catalog uses `requests` with no retries and no timeout by default, so transient
354354
5xx / network failures bubble up immediately and slow servers can hang the client indefinitely.
355355
Set a `connection:` block on the catalog to opt in to a per-request timeout and a retry policy.
356-
Every key is optional; when none are set, the default `requests` behavior is preserved.
357356

358357
```yaml
359358
catalog:

pyiceberg/catalog/rest/__init__.py

Lines changed: 11 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -453,7 +453,9 @@ def _create_connection_adapter(properties: Properties) -> _RetryTimeoutHTTPAdapt
453453
if timeout <= 0:
454454
raise ValueError(f"`{CONNECTION}.{CONNECTION_TIMEOUT}` must be a positive number, got: {timeout}")
455455

456-
retries: int | None = None
456+
# `retries` and `backoff_factor` default to 0 (a no-op Retry) so the user can set only
457+
# one or the other without forcing the rest of the policy to be specified explicitly.
458+
retries = 0
457459
if (raw_retries := connection_config.get(CONNECTION_RETRIES)) is not None:
458460
try:
459461
retries = int(raw_retries)
@@ -462,7 +464,7 @@ def _create_connection_adapter(properties: Properties) -> _RetryTimeoutHTTPAdapt
462464
if retries < 0:
463465
raise ValueError(f"`{CONNECTION}.{CONNECTION_RETRIES}` must be non-negative, got: {retries}")
464466

465-
backoff_factor: float | None = None
467+
backoff_factor = 0.0
466468
if (raw_backoff := connection_config.get(CONNECTION_BACKOFF_FACTOR)) is not None:
467469
try:
468470
backoff_factor = float(raw_backoff)
@@ -471,19 +473,15 @@ def _create_connection_adapter(properties: Properties) -> _RetryTimeoutHTTPAdapt
471473
if backoff_factor < 0:
472474
raise ValueError(f"`{CONNECTION}.{CONNECTION_BACKOFF_FACTOR}` must be non-negative, got: {backoff_factor}")
473475

474-
max_retries: Retry | None = None
475-
if retries is not None or backoff_factor is not None:
476-
max_retries = Retry(
477-
total=retries if retries is not None else 0,
478-
backoff_factor=backoff_factor if backoff_factor is not None else 0,
476+
return _RetryTimeoutHTTPAdapter(
477+
timeout=timeout,
478+
max_retries=Retry(
479+
total=retries,
480+
backoff_factor=backoff_factor,
479481
status_forcelist=list(_CONNECTION_RETRY_STATUS_FORCELIST),
480482
allowed_methods=_CONNECTION_RETRY_ALLOWED_METHODS,
481-
)
482-
483-
if timeout is None and max_retries is None:
484-
return None
485-
486-
return _RetryTimeoutHTTPAdapter(timeout=timeout, max_retries=max_retries)
483+
),
484+
)
487485

488486

489487
class RestCatalog(Catalog):

tests/catalog/test_rest.py

Lines changed: 23 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,8 @@
1919

2020
import base64
2121
import os
22-
from collections.abc import Callable
22+
from collections.abc import Callable, Iterator
23+
from contextlib import contextmanager
2324
from typing import Any, cast
2425
from unittest import mock
2526

@@ -2065,21 +2066,23 @@ def test_session_with_connection_timeout_only(rest_mock: Mocker) -> None:
20652066
adapter = catalog._session.adapters["https://"]
20662067
assert isinstance(adapter, _RetryTimeoutHTTPAdapter)
20672068
assert adapter._timeout == 30.0
2068-
# No retry options set, so no Retry object is configured.
2069+
# Default retry policy (total=0) is a no-op when only timeout is configured.
20692070
assert adapter.max_retries.total == 0
20702071

20712072

2072-
def test_session_retries_on_transient_5xx_then_succeeds() -> None:
2073-
"""Three real 503 responses followed by a 200; the catalog should make all four attempts.
2073+
@contextmanager
2074+
def _local_rest_server_503_then_200(num_failures: int) -> Iterator[dict[str, Any]]:
2075+
"""Stand up a loopback HTTP server that returns `num_failures` 503s for `/v1/namespaces` then a 200.
2076+
2077+
Used in place of `requests_mock`, which replaces the HTTPAdapter and would bypass the retry logic.
20742078
2075-
`requests_mock` would replace our HTTPAdapter, bypassing the retry logic we want to exercise,
2076-
so this test stands up an actual `http.server` on a loopback port instead.
2079+
Yields a dict with `port` and `namespace_calls` keys (the latter is updated in-place as requests arrive).
20772080
"""
20782081
import json
20792082
import threading
20802083
from http.server import BaseHTTPRequestHandler, HTTPServer
20812084

2082-
state = {"namespace_calls": 0}
2085+
state: dict[str, Any] = {"namespace_calls": 0}
20832086
config_body = json.dumps(
20842087
{"defaults": {}, "overrides": {}, "endpoints": [str(endpoint) for endpoint in TEST_SUPPORTED_ENDPOINTS]}
20852088
).encode()
@@ -2090,7 +2093,7 @@ def do_GET(self) -> None:
20902093
self._respond(200, config_body)
20912094
elif self.path.endswith("/v1/namespaces"):
20922095
state["namespace_calls"] += 1
2093-
if state["namespace_calls"] <= 3:
2096+
if state["namespace_calls"] <= num_failures:
20942097
self._respond(503, b"")
20952098
else:
20962099
self._respond(200, json.dumps({"namespaces": [["foo"]]}).encode())
@@ -2109,24 +2112,30 @@ def log_message(self, format: str, *args: Any) -> None: # silence default acces
21092112
pass
21102113

21112114
server = HTTPServer(("127.0.0.1", 0), _Handler)
2112-
port = server.server_address[1]
2115+
state["port"] = server.server_address[1]
21132116
server_thread = threading.Thread(target=server.serve_forever, daemon=True)
21142117
server_thread.start()
21152118
try:
2119+
yield state
2120+
finally:
2121+
server.shutdown()
2122+
server.server_close()
2123+
2124+
2125+
def test_session_retries_on_transient_5xx_then_succeeds() -> None:
2126+
"""The catalog should retry on transient 5xx and succeed once the server stabilizes."""
2127+
with _local_rest_server_503_then_200(num_failures=3) as server:
21162128
catalog = RestCatalog(
21172129
"rest",
21182130
**{ # type: ignore
2119-
"uri": f"http://127.0.0.1:{port}/",
2131+
"uri": f"http://127.0.0.1:{server['port']}/",
21202132
"token": TEST_TOKEN,
21212133
# backoff-factor=0 keeps the test fast; retries=3 covers three 503s + the eventual 200.
21222134
CONNECTION: {CONNECTION_RETRIES: 3, CONNECTION_BACKOFF_FACTOR: 0},
21232135
},
21242136
)
21252137
assert catalog.list_namespaces() == [("foo",)]
2126-
assert state["namespace_calls"] == 4
2127-
finally:
2128-
server.shutdown()
2129-
server.server_close()
2138+
assert server["namespace_calls"] == 4
21302139

21312140

21322141
def test_session_with_invalid_connection_timeout_raises(rest_mock: Mocker) -> None:

0 commit comments

Comments
 (0)