Skip to content

Commit d50e5ba

Browse files
author
Jack Ye
committed
Support loading custom catalog impl
1 parent d7110fa commit d50e5ba

File tree

3 files changed

+36
-5
lines changed

3 files changed

+36
-5
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717

1818
from __future__ import annotations
1919

20+
import importlib
2021
import logging
2122
import re
2223
import uuid
@@ -77,6 +78,7 @@
7778

7879
TOKEN = "token"
7980
TYPE = "type"
81+
PY_CATALOG_IMPL = "py-catalog-impl"
8082
ICEBERG = "iceberg"
8183
TABLE_TYPE = "table_type"
8284
WAREHOUSE_LOCATION = "warehouse"
@@ -230,6 +232,13 @@ def load_catalog(name: Optional[str] = None, **properties: Optional[str]) -> Cat
230232
env = _ENV_CONFIG.get_catalog_config(name)
231233
conf: RecursiveDict = merge_config(env or {}, cast(RecursiveDict, properties))
232234

235+
if catalog_impl := properties.get(PY_CATALOG_IMPL):
236+
if catalog := _import_catalog(catalog_impl, properties):
237+
logger.info("Loaded Catalog: %s", catalog_impl)
238+
return catalog
239+
else:
240+
raise ValueError(f"Could not initialize Catalog: {catalog_impl}")
241+
233242
catalog_type: Optional[CatalogType]
234243
provided_catalog_type = conf.get(TYPE)
235244

@@ -283,6 +292,20 @@ def delete_data_files(io: FileIO, manifests_to_delete: List[ManifestFile]) -> No
283292
deleted_files[path] = True
284293

285294

295+
def _import_catalog(catalog_impl: str, properties: Properties) -> Optional[Catalog]:
296+
try:
297+
path_parts = catalog_impl.split(".")
298+
if len(path_parts) < 2:
299+
raise ValueError(f"py-catalog-impl should be full path (module.CustomCatalog), got: {catalog_impl}")
300+
module_name, class_name = ".".join(path_parts[:-1]), path_parts[-1]
301+
module = importlib.import_module(module_name)
302+
class_ = getattr(module, class_name)
303+
return class_(properties)
304+
except ModuleNotFoundError:
305+
logger.warning("Could not initialize Catalog: %s", catalog_impl)
306+
return None
307+
308+
286309
@dataclass
287310
class PropertiesUpdateSummary:
288311
removed: List[str]

tests/catalog/test_base.py

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,7 @@
3232
from pydantic_core import ValidationError
3333
from pytest_lazyfixture import lazy_fixture
3434

35-
from pyiceberg.catalog import (
36-
Catalog,
37-
MetastoreCatalog,
38-
PropertiesUpdateSummary,
39-
)
35+
from pyiceberg.catalog import PY_CATALOG_IMPL, Catalog, MetastoreCatalog, PropertiesUpdateSummary, load_catalog
4036
from pyiceberg.exceptions import (
4137
NamespaceAlreadyExistsError,
4238
NamespaceNotEmptyError,
@@ -295,6 +291,13 @@ def given_catalog_has_a_table(
295291
)
296292

297293

294+
def test_load_catalog_does_not_exist() -> None:
295+
with pytest.raises(ValueError) as exc_info:
296+
load_catalog("catalog", **{PY_CATALOG_IMPL: "pyiceberg.does.not.exist.Catalog"})
297+
298+
assert "Could not initialize Catalog: pyiceberg.does.not.exist.Catalog" in str(exc_info.value)
299+
300+
298301
def test_namespace_from_tuple() -> None:
299302
# Given
300303
identifier = ("com", "organization", "department", "my_table")

tests/catalog/test_glue.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import pytest
2323
from moto import mock_aws
2424

25+
from pyiceberg.catalog import PY_CATALOG_IMPL, load_catalog
2526
from pyiceberg.catalog.glue import GlueCatalog
2627
from pyiceberg.exceptions import (
2728
NamespaceAlreadyExistsError,
@@ -46,6 +47,10 @@
4647
)
4748

4849

50+
def test_load_catalog_from_impl() -> None:
51+
assert isinstance(load_catalog("catalog", **{PY_CATALOG_IMPL: "pyiceberg.catalog.glue.GlueCatalog"}), GlueCatalog)
52+
53+
4954
@mock_aws
5055
def test_create_table_with_database_location(
5156
_glue: boto3.client,

0 commit comments

Comments
 (0)