Skip to content
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions mkdocs/docs/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ catalog:
| rest.signing-region | us-east-1 | The region to use when SigV4 signing a request |
| rest.signing-name | execute-api | The service signing name to use when SigV4 signing a request |
| oauth2-server-uri | <https://auth-service/cc> | Authentication URL to use for client credentials authentication (default: uri + 'v1/oauth/tokens') |
| snapshot-loading-mode | refs | The snapshots to return in the body of the metadata. Setting the value to `all` would return the full set of snapshots currently valid for the table. Setting the value to `refs` would load all snapshots referenced by branches or tags. |

<!-- markdown-link-check-enable-->

Expand Down
12 changes: 11 additions & 1 deletion pyiceberg/catalog/rest/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,7 @@ class IdentifierKind(Enum):
SIGV4_REGION = "rest.signing-region"
SIGV4_SERVICE = "rest.signing-name"
OAUTH2_SERVER_URI = "oauth2-server-uri"
SNAPSHOT_LOADING_MODE = "snapshot-loading-mode"

NAMESPACE_SEPARATOR = b"\x1f".decode(UTF8)

Expand Down Expand Up @@ -678,7 +679,16 @@ def list_tables(self, namespace: Union[str, Identifier]) -> List[Identifier]:

@retry(**_RETRY_ARGS)
def load_table(self, identifier: Union[str, Identifier]) -> Table:
response = self._session.get(self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)))
params = {}
if mode := self.properties.get(SNAPSHOT_LOADING_MODE):
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is it possible to add this as a parameter to load_table that takes preference over session-level properties? I guess it's not part of the base catalog interface but 🤷🏻‍♂️

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Good seeing you here @corleyma, and I share your concern here. In some situations, we want to override this as well:

  • To enable time-travel.
  • In the case of a write, we want to know about all the snapshots to see if there are any conflicts.

I'm open to adding an option to the base interface, however, it does not make a lot of sense in the case of non-REST catalogs where we have to read the JSON anyway.

Copy link
Copy Markdown

@corleyma corleyma May 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sorry for coming back to this late.

Maybe the base load_table interface should just change to accept kwargs, and we can add here some implementation-specific kwargs for REST catalog?

if mode in {"all", "refs"}:
params["snapshots"] = mode
else:
raise ValueError("Invalid snapshot-loading-mode: {}")
Comment on lines +683 to +687
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.


response = self._session.get(
self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier)), params=params
)
try:
response.raise_for_status()
except HTTPError as exc:
Expand Down
25 changes: 24 additions & 1 deletion tests/catalog/test_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@

import pyiceberg
from pyiceberg.catalog import PropertiesUpdateSummary, load_catalog
from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, RestCatalog
from pyiceberg.catalog.rest import OAUTH2_SERVER_URI, SNAPSHOT_LOADING_MODE, RestCatalog
from pyiceberg.exceptions import (
AuthorizationExpiredError,
NamespaceAlreadyExistsError,
Expand Down Expand Up @@ -853,6 +853,29 @@ def test_load_table_200(rest_mock: Mocker, example_table_metadata_with_snapshot_
assert actual == expected


def test_load_table_200_loading_mode(
rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any]
) -> None:
rest_mock.get(
f"{TEST_URI}v1/namespaces/fokko/tables/table?snapshots=refs",
json=example_table_metadata_with_snapshot_v1_rest_json,
status_code=200,
request_headers=TEST_HEADERS,
)
catalog = RestCatalog("rest", uri=TEST_URI, token=TEST_TOKEN, **{SNAPSHOT_LOADING_MODE: "refs"})
actual = catalog.load_table(("fokko", "table"))
expected = Table(
identifier=("fokko", "table"),
metadata_location=example_table_metadata_with_snapshot_v1_rest_json["metadata-location"],
metadata=TableMetadataV1(**example_table_metadata_with_snapshot_v1_rest_json["metadata"]),
io=load_file_io(),
catalog=catalog,
)
# First compare the dicts
assert actual.metadata.model_dump() == expected.metadata.model_dump()
assert actual == expected


def test_load_table_honor_access_delegation(
rest_mock: Mocker, example_table_metadata_with_snapshot_v1_rest_json: Dict[str, Any]
) -> None:
Expand Down
Loading