Skip to content

Commit 4ba9a8c

Browse files
authored
Add catalog properties for Catalog Tests (#2982)
<!-- Thanks for opening a pull request! --> <!-- In the case this PR will resolve an issue, please replace ${GITHUB_ISSUE_ID} below with the actual Github issue id. --> <!-- Closes #${GITHUB_ISSUE_ID} --> # Rationale for this change I've added a list of catalog-level properties to help the catalog tests. The goal is that the catalog tests work off of "features" and don't have exceptions for various catalogs. As part of this, I discovered a discrepancy where HiveCatalog throws a different error than everyone else. I'm happy to change it back. ## Are these changes tested? Mostly a test change. ## Are there any user-facing changes? <!-- In the case of user-facing changes, please add the changelog label. -->
1 parent 1eeae90 commit 4ba9a8c

File tree

5 files changed

+85
-21
lines changed

5 files changed

+85
-21
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -733,9 +733,9 @@ def namespace_to_string(identifier: str | Identifier, err: type[ValueError] | ty
733733

734734
return ".".join(segment.strip() for segment in tuple_identifier)
735735

736+
@abstractmethod
736737
def supports_server_side_planning(self) -> bool:
737738
"""Check if the catalog supports server-side scan planning."""
738-
return False
739739

740740
@staticmethod
741741
def identifier_to_database(
@@ -836,6 +836,9 @@ class MetastoreCatalog(Catalog, ABC):
836836
def __init__(self, name: str, **properties: str):
837837
super().__init__(name, **properties)
838838

839+
def supports_server_side_planning(self) -> bool:
840+
return False
841+
839842
def create_table_transaction(
840843
self,
841844
identifier: str | Identifier,

pyiceberg/catalog/hive.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -729,7 +729,7 @@ def drop_namespace(self, namespace: str | Identifier) -> None:
729729
open_client.drop_database(database_name, deleteData=False, cascade=False)
730730
except InvalidOperationException as e:
731731
raise NamespaceNotEmptyError(f"Database {database_name} is not empty") from e
732-
except MetaException as e:
732+
except (MetaException, NoSuchObjectException) as e:
733733
raise NoSuchNamespaceError(f"Database does not exists: {database_name}") from e
734734

735735
def list_tables(self, namespace: str | Identifier) -> list[Identifier]:

pyiceberg/catalog/noop.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,9 @@ def register_table(self, identifier: str | Identifier, metadata_location: str) -
8585
def drop_table(self, identifier: str | Identifier) -> None:
8686
raise NotImplementedError
8787

88+
def supports_server_side_planning(self) -> bool:
89+
raise NotImplementedError
90+
8891
def purge_table(self, identifier: str | Identifier) -> None:
8992
raise NotImplementedError
9093

tests/conftest.py

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,13 @@
4949
from pytest_lazy_fixtures import lf
5050

5151
from pyiceberg.catalog import Catalog, load_catalog
52+
from pyiceberg.catalog.bigquery_metastore import BigQueryMetastoreCatalog
53+
from pyiceberg.catalog.dynamodb import DynamoDbCatalog
54+
from pyiceberg.catalog.glue import GlueCatalog
55+
from pyiceberg.catalog.hive import HiveCatalog
5256
from pyiceberg.catalog.memory import InMemoryCatalog
5357
from pyiceberg.catalog.noop import NoopCatalog
58+
from pyiceberg.catalog.rest import RestCatalog
5459
from pyiceberg.catalog.sql import SqlCatalog
5560
from pyiceberg.expressions import BoundReference
5661
from pyiceberg.io import (
@@ -98,6 +103,7 @@
98103
UUIDType,
99104
)
100105
from pyiceberg.utils.datetime import datetime_to_millis
106+
from pyiceberg.utils.properties import property_as_bool
101107

102108
if TYPE_CHECKING:
103109
import pyarrow as pa
@@ -3143,3 +3149,51 @@ def test_table_properties() -> dict[str, str]:
31433149
"key1": "value1",
31443150
"key2": "value2",
31453151
}
3152+
3153+
3154+
def does_support_purge_table(catalog: Catalog) -> bool:
3155+
if isinstance(catalog, RestCatalog):
3156+
return property_as_bool(catalog.properties, "supports_purge_table", True)
3157+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
3158+
return False
3159+
return True
3160+
3161+
3162+
def does_support_atomic_concurrent_updates(catalog: Catalog) -> bool:
3163+
if isinstance(catalog, RestCatalog):
3164+
return property_as_bool(catalog.properties, "supports_atomic_concurrent_updates", True)
3165+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
3166+
return False
3167+
return True
3168+
3169+
3170+
def does_support_nested_namespaces(catalog: Catalog) -> bool:
3171+
if isinstance(catalog, RestCatalog):
3172+
return property_as_bool(catalog.properties, "supports_nested_namespaces", True)
3173+
if isinstance(catalog, (HiveCatalog, NoopCatalog, GlueCatalog, BigQueryMetastoreCatalog, DynamoDbCatalog)):
3174+
return False
3175+
return True
3176+
3177+
3178+
def does_support_schema_evolution(catalog: Catalog) -> bool:
3179+
if isinstance(catalog, RestCatalog):
3180+
return property_as_bool(catalog.properties, "supports_schema_evolution", True)
3181+
if isinstance(catalog, (HiveCatalog, NoopCatalog)):
3182+
return False
3183+
return True
3184+
3185+
3186+
def does_support_slash_in_identifier(catalog: Catalog) -> bool:
3187+
if isinstance(catalog, RestCatalog):
3188+
return property_as_bool(catalog.properties, "supports_slash_in_identifier", True)
3189+
if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
3190+
return False
3191+
return True
3192+
3193+
3194+
def does_support_dot_in_identifier(catalog: Catalog) -> bool:
3195+
if isinstance(catalog, RestCatalog):
3196+
return property_as_bool(catalog.properties, "supports_dot_in_identifier", True)
3197+
if isinstance(catalog, (HiveCatalog, NoopCatalog, SqlCatalog)):
3198+
return False
3199+
return True

tests/integration/test_catalog.py

Lines changed: 23 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,15 @@
4444
from pyiceberg.table.sorting import INITIAL_SORT_ORDER_ID, SortField, SortOrder
4545
from pyiceberg.transforms import BucketTransform, DayTransform, IdentityTransform
4646
from pyiceberg.types import IntegerType, LongType, NestedField, TimestampType, UUIDType
47-
from tests.conftest import clean_up
47+
from tests.conftest import (
48+
clean_up,
49+
does_support_atomic_concurrent_updates,
50+
does_support_dot_in_identifier,
51+
does_support_nested_namespaces,
52+
does_support_purge_table,
53+
does_support_schema_evolution,
54+
does_support_slash_in_identifier,
55+
)
4856

4957

5058
@pytest.fixture(scope="function")
@@ -247,8 +255,8 @@ def test_drop_table(test_catalog: Catalog, table_schema_nested: Schema, table_na
247255
@pytest.mark.integration
248256
@pytest.mark.parametrize("test_catalog", CATALOGS)
249257
def test_purge_table(test_catalog: Catalog, table_schema_nested: Schema, table_name: str, database_name: str) -> None:
250-
if isinstance(test_catalog, HiveCatalog):
251-
pytest.skip("HiveCatalog does not support purge_table operation yet")
258+
if not does_support_purge_table(test_catalog):
259+
pytest.skip("Catalog does not support purge_table operation")
252260

253261
identifier = (database_name, table_name)
254262
test_catalog.create_namespace(database_name)
@@ -300,8 +308,8 @@ def test_update_table_transaction(test_catalog: Catalog, test_schema: Schema, ta
300308
@pytest.mark.integration
301309
@pytest.mark.parametrize("test_catalog", CATALOGS)
302310
def test_update_schema_conflict(test_catalog: Catalog, test_schema: Schema, table_name: str, database_name: str) -> None:
303-
if isinstance(test_catalog, HiveCatalog):
304-
pytest.skip("HiveCatalog fails in this test, need to investigate")
311+
if not does_support_atomic_concurrent_updates(test_catalog):
312+
pytest.skip("Catalog does not support atomic concurrent updates")
305313

306314
identifier = (database_name, table_name)
307315

@@ -647,8 +655,8 @@ def test_rest_custom_namespace_separator(rest_catalog: RestCatalog, table_schema
647655
def test_incompatible_partitioned_schema_evolution(
648656
test_catalog: Catalog, test_schema: Schema, test_partition_spec: PartitionSpec, database_name: str, table_name: str
649657
) -> None:
650-
if isinstance(test_catalog, HiveCatalog):
651-
pytest.skip("HiveCatalog does not support schema evolution")
658+
if not does_support_schema_evolution(test_catalog):
659+
pytest.skip(f"{type(test_catalog).__name__} does not support schema evolution")
652660

653661
identifier = (database_name, table_name)
654662
test_catalog.create_namespace(database_name)
@@ -676,7 +684,7 @@ def test_incompatible_partitioned_schema_evolution(
676684
@pytest.mark.integration
677685
@pytest.mark.parametrize("test_catalog", CATALOGS)
678686
def test_namespace_with_slash(test_catalog: Catalog) -> None:
679-
if isinstance(test_catalog, HiveCatalog):
687+
if not does_support_slash_in_identifier(test_catalog):
680688
pytest.skip(f"{type(test_catalog).__name__} does not support slash in namespace")
681689

682690
namespace = ("new/db",)
@@ -701,8 +709,8 @@ def test_namespace_with_slash(test_catalog: Catalog) -> None:
701709
def test_incompatible_sorted_schema_evolution(
702710
test_catalog: Catalog, test_schema: Schema, test_sort_order: SortOrder, database_name: str, table_name: str
703711
) -> None:
704-
if isinstance(test_catalog, HiveCatalog):
705-
pytest.skip("HiveCatalog does not support schema evolution")
712+
if not does_support_schema_evolution(test_catalog):
713+
pytest.skip(f"{type(test_catalog).__name__} does not support schema evolution")
706714

707715
identifier = (database_name, table_name)
708716
test_catalog.create_namespace(database_name)
@@ -721,7 +729,7 @@ def test_incompatible_sorted_schema_evolution(
721729
@pytest.mark.integration
722730
@pytest.mark.parametrize("test_catalog", CATALOGS)
723731
def test_namespace_with_dot(test_catalog: Catalog) -> None:
724-
if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
732+
if not does_support_dot_in_identifier(test_catalog):
725733
pytest.skip(f"{type(test_catalog).__name__} does not support dot in namespace")
726734

727735
namespace = ("new.db",)
@@ -734,9 +742,8 @@ def test_namespace_with_dot(test_catalog: Catalog) -> None:
734742
test_catalog.create_namespace(namespace)
735743
assert test_catalog.namespace_exists(namespace)
736744

737-
# REST Catalog fixture treats this as a hierarchical namespace.
738-
# Calling list namespaces will get `new`, not `new.db`.
739-
if isinstance(test_catalog, RestCatalog):
745+
# Hierarchical catalogs might treat this as multiple levels.
746+
if does_support_nested_namespaces(test_catalog):
740747
namespaces = test_catalog.list_namespaces()
741748
assert ("new",) in namespaces or ("new.db",) in namespaces
742749
else:
@@ -752,7 +759,7 @@ def test_namespace_with_dot(test_catalog: Catalog) -> None:
752759
@pytest.mark.integration
753760
@pytest.mark.parametrize("test_catalog", CATALOGS)
754761
def test_table_name_with_slash(test_catalog: Catalog, table_schema_simple: Schema) -> None:
755-
if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
762+
if not does_support_slash_in_identifier(test_catalog):
756763
pytest.skip(f"{type(test_catalog).__name__} does not support slash in table name")
757764

758765
namespace = ("ns_slash",)
@@ -779,7 +786,7 @@ def test_table_name_with_slash(test_catalog: Catalog, table_schema_simple: Schem
779786
@pytest.mark.integration
780787
@pytest.mark.parametrize("test_catalog", CATALOGS)
781788
def test_table_name_with_dot(test_catalog: Catalog, table_schema_simple: Schema) -> None:
782-
if isinstance(test_catalog, (HiveCatalog, SqlCatalog)):
789+
if not does_support_dot_in_identifier(test_catalog):
783790
pytest.skip(f"{type(test_catalog).__name__} does not support dot in table name")
784791

785792
namespace = ("ns_dot",)
@@ -818,9 +825,6 @@ def test_drop_missing_table(test_catalog: Catalog, database_name: str) -> None:
818825
@pytest.mark.integration
819826
@pytest.mark.parametrize("test_catalog", CATALOGS)
820827
def test_drop_nonexistent_namespace(test_catalog: Catalog) -> None:
821-
if isinstance(test_catalog, HiveCatalog):
822-
pytest.skip("HiveCatalog raises NoSuchObjectException instead of NoSuchNamespaceError")
823-
824828
namespace = ("non_existent_namespace",)
825829
with pytest.raises(NoSuchNamespaceError):
826830
test_catalog.drop_namespace(namespace)

0 commit comments

Comments
 (0)