Skip to content

Commit 2ff0aa6

Browse files
committed
Deprecate Redundant Identifier Support in TableIdentifier, and row_filter (apache#994)
1 parent 2adbe0c commit 2ff0aa6

File tree

11 files changed

+138
-83
lines changed

11 files changed

+138
-83
lines changed

pyiceberg/catalog/__init__.py

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@
6767
RecursiveDict,
6868
)
6969
from pyiceberg.utils.config import Config, merge_config
70-
from pyiceberg.utils.deprecated import deprecation_message
70+
from pyiceberg.utils.deprecated import deprecated, deprecation_message
7171

7272
if TYPE_CHECKING:
7373
import pyarrow as pa
@@ -613,6 +613,11 @@ def update_namespace_properties(
613613
ValueError: If removals and updates have overlapping keys.
614614
"""
615615

616+
@deprecated(
617+
deprecated_in="0.8.0",
618+
removed_in="0.9.0",
619+
help_message="Support for parsing catalog level identifier in Catalog identifiers is deprecated. Please refer to the table using only its namespace and its table name.",
620+
)
616621
def identifier_to_tuple_without_catalog(self, identifier: Union[str, Identifier]) -> Identifier:
617622
"""Convert an identifier to a tuple and drop this catalog's name from the first element.
618623
@@ -627,6 +632,25 @@ def identifier_to_tuple_without_catalog(self, identifier: Union[str, Identifier]
627632
identifier_tuple = identifier_tuple[1:]
628633
return identifier_tuple
629634

635+
def _identifier_to_tuple_without_catalog(self, identifier: Union[str, Identifier]) -> Identifier:
636+
"""Convert an identifier to a tuple and drop this catalog's name from the first element.
637+
638+
Args:
639+
identifier (str | Identifier): Table identifier.
640+
641+
Returns:
642+
Identifier: a tuple of strings with this catalog's name removed
643+
"""
644+
identifier_tuple = Catalog.identifier_to_tuple(identifier)
645+
if len(identifier_tuple) >= 3 and identifier_tuple[0] == self.name:
646+
deprecation_message(
647+
deprecated_in="0.8.0",
648+
removed_in="0.9.0",
649+
help_message="Support for parsing catalog level identifier in Catalog identifiers is deprecated. Please refer to the table using only its namespace and its table name.",
650+
)
651+
identifier_tuple = identifier_tuple[1:]
652+
return identifier_tuple
653+
630654
@staticmethod
631655
def identifier_to_tuple(identifier: Union[str, Identifier]) -> Identifier:
632656
"""Parse an identifier to a tuple.
@@ -769,7 +793,7 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool:
769793
return False
770794

771795
def purge_table(self, identifier: Union[str, Identifier]) -> None:
772-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
796+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
773797
table = self.load_table(identifier_tuple)
774798
self.drop_table(identifier_tuple)
775799
io = load_file_io(self.properties, table.metadata_location)
@@ -823,7 +847,7 @@ def _create_staged_table(
823847
)
824848
io = self._load_file_io(properties=properties, location=metadata_location)
825849
return StagedTable(
826-
identifier=(self.name, database_name, table_name),
850+
identifier=(database_name, table_name),
827851
metadata=metadata,
828852
metadata_location=metadata_location,
829853
io=io,

pyiceberg/catalog/dynamodb.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
259259
Raises:
260260
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
261261
"""
262-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
262+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
263263
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
264264

265265
try:
@@ -290,7 +290,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
290290
NoSuchPropertyException: When from table miss some required properties.
291291
NoSuchNamespaceError: When the destination namespace doesn't exist.
292292
"""
293-
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
293+
from_identifier_tuple = self._identifier_to_tuple_without_catalog(from_identifier)
294294
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
295295
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
296296

pyiceberg/catalog/glue.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -462,7 +462,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
462462
NoSuchTableError: If a table with the given identifier does not exist.
463463
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
464464
"""
465-
identifier_tuple = self.identifier_to_tuple_without_catalog(
465+
identifier_tuple = self._identifier_to_tuple_without_catalog(
466466
tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
467467
)
468468
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple)
@@ -554,7 +554,7 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
554554
Raises:
555555
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
556556
"""
557-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
557+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
558558
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
559559
try:
560560
self.glue.delete_table(DatabaseName=database_name, Name=table_name)
@@ -580,7 +580,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
580580
NoSuchPropertyException: When from table miss some required properties.
581581
NoSuchNamespaceError: When the destination namespace doesn't exist.
582582
"""
583-
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
583+
from_identifier_tuple = self._identifier_to_tuple_without_catalog(from_identifier)
584584
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
585585
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
586586
try:

pyiceberg/catalog/hive.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -297,7 +297,7 @@ def _convert_hive_into_iceberg(self, table: HiveTable) -> Table:
297297
)
298298

299299
def _convert_iceberg_into_hive(self, table: Table) -> HiveTable:
300-
identifier_tuple = self.identifier_to_tuple_without_catalog(table.identifier)
300+
identifier_tuple = self._identifier_to_tuple_without_catalog(table.identifier)
301301
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
302302
current_time_millis = int(time.time() * 1000)
303303

@@ -431,7 +431,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
431431
NoSuchTableError: If a table with the given identifier does not exist.
432432
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
433433
"""
434-
identifier_tuple = self.identifier_to_tuple_without_catalog(
434+
identifier_tuple = self._identifier_to_tuple_without_catalog(
435435
tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
436436
)
437437
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
@@ -477,7 +477,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
477477
# Table does not exist, create it.
478478
hive_table = self._convert_iceberg_into_hive(
479479
StagedTable(
480-
identifier=(self.name, database_name, table_name),
480+
identifier=(database_name, table_name),
481481
metadata=updated_staged_table.metadata,
482482
metadata_location=updated_staged_table.metadata_location,
483483
io=updated_staged_table.io,
@@ -525,7 +525,7 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
525525
Raises:
526526
NoSuchTableError: If a table with the name does not exist, or the identifier is invalid.
527527
"""
528-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
528+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
529529
database_name, table_name = self.identifier_to_database_and_table(identifier_tuple, NoSuchTableError)
530530
try:
531531
with self._client as open_client:
@@ -553,7 +553,7 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
553553
NoSuchTableError: When a table with the name does not exist.
554554
NoSuchNamespaceError: When the destination namespace doesn't exist.
555555
"""
556-
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
556+
from_identifier_tuple = self._identifier_to_tuple_without_catalog(from_identifier)
557557
from_database_name, from_table_name = self.identifier_to_database_and_table(from_identifier_tuple, NoSuchTableError)
558558
to_database_name, to_table_name = self.identifier_to_database_and_table(to_identifier)
559559
try:

pyiceberg/catalog/rest.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def load_table(self, identifier: Union[str, Identifier]) -> Table:
672672

673673
@retry(**_RETRY_ARGS)
674674
def drop_table(self, identifier: Union[str, Identifier], purge_requested: bool = False) -> None:
675-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
675+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
676676
response = self._session.delete(
677677
self.url(
678678
Endpoints.drop_table, prefixed=True, purge=purge_requested, **self._split_identifier_for_path(identifier_tuple)
@@ -689,7 +689,7 @@ def purge_table(self, identifier: Union[str, Identifier]) -> None:
689689

690690
@retry(**_RETRY_ARGS)
691691
def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: Union[str, Identifier]) -> Table:
692-
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
692+
from_identifier_tuple = self._identifier_to_tuple_without_catalog(from_identifier)
693693
payload = {
694694
"source": self._split_identifier_for_json(from_identifier_tuple),
695695
"destination": self._split_identifier_for_json(to_identifier),
@@ -824,7 +824,7 @@ def table_exists(self, identifier: Union[str, Identifier]) -> bool:
824824
Returns:
825825
bool: True if the table exists, False otherwise.
826826
"""
827-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
827+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
828828
response = self._session.head(
829829
self.url(Endpoints.load_table, prefixed=True, **self._split_identifier_for_path(identifier_tuple))
830830
)

pyiceberg/catalog/sql.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,7 @@ def create_table(
192192
"""
193193
schema: Schema = self._convert_schema_if_needed(schema) # type: ignore
194194

195-
identifier_nocatalog = self.identifier_to_tuple_without_catalog(identifier)
195+
identifier_nocatalog = self._identifier_to_tuple_without_catalog(identifier)
196196
namespace_identifier = Catalog.namespace_from(identifier_nocatalog)
197197
table_name = Catalog.table_name_from(identifier_nocatalog)
198198
if not self._namespace_exists(namespace_identifier):
@@ -238,7 +238,7 @@ def register_table(self, identifier: Union[str, Identifier], metadata_location:
238238
TableAlreadyExistsError: If the table already exists
239239
NoSuchNamespaceError: If namespace does not exist
240240
"""
241-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
241+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
242242
namespace_tuple = Catalog.namespace_from(identifier_tuple)
243243
namespace = Catalog.namespace_to_string(namespace_tuple)
244244
table_name = Catalog.table_name_from(identifier_tuple)
@@ -300,7 +300,7 @@ def drop_table(self, identifier: Union[str, Identifier]) -> None:
300300
Raises:
301301
NoSuchTableError: If a table with the name does not exist.
302302
"""
303-
identifier_tuple = self.identifier_to_tuple_without_catalog(identifier)
303+
identifier_tuple = self._identifier_to_tuple_without_catalog(identifier)
304304
namespace_tuple = Catalog.namespace_from(identifier_tuple)
305305
namespace = Catalog.namespace_to_string(namespace_tuple)
306306
table_name = Catalog.table_name_from(identifier_tuple)
@@ -347,8 +347,8 @@ def rename_table(self, from_identifier: Union[str, Identifier], to_identifier: U
347347
TableAlreadyExistsError: If a table with the new name already exist.
348348
NoSuchNamespaceError: If the target namespace does not exist.
349349
"""
350-
from_identifier_tuple = self.identifier_to_tuple_without_catalog(from_identifier)
351-
to_identifier_tuple = self.identifier_to_tuple_without_catalog(to_identifier)
350+
from_identifier_tuple = self._identifier_to_tuple_without_catalog(from_identifier)
351+
to_identifier_tuple = self._identifier_to_tuple_without_catalog(to_identifier)
352352
from_namespace_tuple = Catalog.namespace_from(from_identifier_tuple)
353353
from_namespace = Catalog.namespace_to_string(from_namespace_tuple)
354354
from_table_name = Catalog.table_name_from(from_identifier_tuple)
@@ -406,7 +406,7 @@ def _commit_table(self, table_request: CommitTableRequest) -> CommitTableRespons
406406
NoSuchTableError: If a table with the given identifier does not exist.
407407
CommitFailedException: Requirement not met, or a conflict with a concurrent commit.
408408
"""
409-
identifier_tuple = self.identifier_to_tuple_without_catalog(
409+
identifier_tuple = self._identifier_to_tuple_without_catalog(
410410
tuple(table_request.identifier.namespace.root + [table_request.identifier.name])
411411
)
412412
namespace_tuple = Catalog.namespace_from(identifier_tuple)

pyiceberg/expressions/parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@
6464
)
6565
from pyiceberg.typedef import L
6666
from pyiceberg.types import strtobool
67+
from pyiceberg.utils.deprecated import deprecation_message
6768

6869
ParserElement.enablePackrat()
6970

@@ -84,6 +85,14 @@
8485

8586
@column.set_parse_action
8687
def _(result: ParseResults) -> Reference:
88+
if len(result.column) > 1:
89+
deprecation_message(
90+
deprecated_in="0.8.0",
91+
removed_in="0.9.0",
92+
help_message="Parsing expressions with table name is deprecated. Only provide field names in the row_filter.",
93+
)
94+
# TODO: Once this is removed, we will no longer take just the last index of parsed column result
95+
# And introduce support for parsing filter expressions with nested fields.
8796
return Reference(result.column[-1])
8897

8998

pyiceberg/table/__init__.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -152,7 +152,7 @@
152152
from pyiceberg.utils.concurrent import ExecutorFactory
153153
from pyiceberg.utils.config import Config
154154
from pyiceberg.utils.datetime import datetime_to_millis
155-
from pyiceberg.utils.deprecated import deprecated
155+
from pyiceberg.utils.deprecated import deprecated, deprecation_message
156156
from pyiceberg.utils.properties import property_as_bool, property_as_int
157157
from pyiceberg.utils.singleton import _convert_to_hashable_type
158158

@@ -1392,7 +1392,7 @@ class CommitTableResponse(IcebergBaseModel):
13921392

13931393

13941394
class Table:
1395-
identifier: Identifier = Field()
1395+
_identifier: Identifier = Field()
13961396
metadata: TableMetadata
13971397
metadata_location: str = Field()
13981398
io: FileIO
@@ -1401,7 +1401,7 @@ class Table:
14011401
def __init__(
14021402
self, identifier: Identifier, metadata: TableMetadata, metadata_location: str, io: FileIO, catalog: Catalog
14031403
) -> None:
1404-
self.identifier = identifier
1404+
self._identifier = identifier
14051405
self.metadata = metadata
14061406
self.metadata_location = metadata_location
14071407
self.io = io
@@ -1428,6 +1428,16 @@ def refresh(self) -> Table:
14281428
self.metadata_location = fresh.metadata_location
14291429
return self
14301430

1431+
@property
1432+
def identifier(self) -> Identifier:
1433+
"""Return the identifier of this table."""
1434+
deprecation_message(
1435+
deprecated_in="0.8.0",
1436+
removed_in="0.9.0",
1437+
help_message="Table.identifier property is deprecated. Please use Table.name() function instead.",
1438+
)
1439+
return (self.catalog.name,) + self._identifier
1440+
14311441
def name(self) -> Identifier:
14321442
"""Return the identifier of this table."""
14331443
return self.identifier
@@ -1643,7 +1653,7 @@ def refs(self) -> Dict[str, SnapshotRef]:
16431653
def _do_commit(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[TableRequirement, ...]) -> None:
16441654
response = self.catalog._commit_table( # pylint: disable=W0212
16451655
CommitTableRequest(
1646-
identifier=TableIdentifier(namespace=self.identifier[:-1], name=self.identifier[-1]),
1656+
identifier=TableIdentifier(namespace=self._identifier[:-1], name=self._identifier[-1]),
16471657
updates=updates,
16481658
requirements=requirements,
16491659
)
@@ -1654,16 +1664,14 @@ def _do_commit(self, updates: Tuple[TableUpdate, ...], requirements: Tuple[Table
16541664
def __eq__(self, other: Any) -> bool:
16551665
"""Return the equality of two instances of the Table class."""
16561666
return (
1657-
self.identifier == other.identifier
1658-
and self.metadata == other.metadata
1659-
and self.metadata_location == other.metadata_location
1667+
self.name() == other.name() and self.metadata == other.metadata and self.metadata_location == other.metadata_location
16601668
if isinstance(other, Table)
16611669
else False
16621670
)
16631671

16641672
def __repr__(self) -> str:
16651673
"""Return the string representation of the Table class."""
1666-
table_name = self.catalog.table_name_from(self.identifier)
1674+
table_name = self.catalog.table_name_from(self._identifier)
16671675
schema_str = ",\n ".join(str(column) for column in self.schema().columns if self.schema())
16681676
partition_str = f"partition by: [{', '.join(field.name for field in self.spec().fields if self.spec())}]"
16691677
sort_order_str = f"sort order: [{', '.join(str(field) for field in self.sort_order().fields if self.sort_order())}]"

0 commit comments

Comments
 (0)