Skip to content

Commit e92e10a

Browse files
authored
Table Requirements Validation (#200)
* implement requirements validation * change the exception to CommitFailedException * add docstring * fix CI issue * make base_metadata optional and add null check
1 parent 043aba5 commit e92e10a

4 files changed

Lines changed: 210 additions & 3 deletions

File tree

pyiceberg/exceptions.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ class GenericDynamoDbError(DynamoDbError):
104104
pass
105105

106106

107-
class CommitFailedException(RESTError):
107+
class CommitFailedException(Exception):
108108
"""Commit failed, refresh and try again."""
109109

110110

pyiceberg/table/__init__.py

Lines changed: 78 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@
4444
from sortedcontainers import SortedList
4545
from typing_extensions import Annotated
4646

47-
from pyiceberg.exceptions import ResolveError, ValidationError
47+
from pyiceberg.exceptions import CommitFailedException, ResolveError, ValidationError
4848
from pyiceberg.expressions import (
4949
AlwaysTrue,
5050
And,
@@ -540,18 +540,40 @@ def update_table_metadata(base_metadata: TableMetadata, updates: Tuple[TableUpda
540540
class TableRequirement(IcebergBaseModel):
541541
type: str
542542

543+
@abstractmethod
544+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
545+
"""Validate the requirement against the base metadata.
546+
547+
Args:
548+
base_metadata: The base metadata to be validated against.
549+
550+
Raises:
551+
CommitFailedException: When the requirement is not met.
552+
"""
553+
...
554+
543555

544556
class AssertCreate(TableRequirement):
545557
"""The table must not already exist; used for create transactions."""
546558

547559
type: Literal["assert-create"] = Field(default="assert-create")
548560

561+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
562+
if base_metadata is not None:
563+
raise CommitFailedException("Table already exists")
564+
549565

550566
class AssertTableUUID(TableRequirement):
551567
"""The table UUID must match the requirement's `uuid`."""
552568

553569
type: Literal["assert-table-uuid"] = Field(default="assert-table-uuid")
554-
uuid: str
570+
uuid: uuid.UUID
571+
572+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
573+
if base_metadata is None:
574+
raise CommitFailedException("Requirement failed: current table metadata is missing")
575+
elif self.uuid != base_metadata.table_uuid:
576+
raise CommitFailedException(f"Table UUID does not match: {self.uuid} != {base_metadata.table_uuid}")
555577

556578

557579
class AssertRefSnapshotId(TableRequirement):
@@ -564,41 +586,95 @@ class AssertRefSnapshotId(TableRequirement):
564586
ref: str
565587
snapshot_id: Optional[int] = Field(default=None, alias="snapshot-id")
566588

589+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
590+
if base_metadata is None:
591+
raise CommitFailedException("Requirement failed: current table metadata is missing")
592+
elif snapshot_ref := base_metadata.refs.get(self.ref):
593+
ref_type = snapshot_ref.snapshot_ref_type
594+
if self.snapshot_id is None:
595+
raise CommitFailedException(f"Requirement failed: {ref_type} {self.ref} was created concurrently")
596+
elif self.snapshot_id != snapshot_ref.snapshot_id:
597+
raise CommitFailedException(
598+
f"Requirement failed: {ref_type} {self.ref} has changed: expected id {self.snapshot_id}, found {snapshot_ref.snapshot_id}"
599+
)
600+
elif self.snapshot_id is not None:
601+
raise CommitFailedException(f"Requirement failed: branch or tag {self.ref} is missing, expected {self.snapshot_id}")
602+
567603

568604
class AssertLastAssignedFieldId(TableRequirement):
569605
"""The table's last assigned column id must match the requirement's `last-assigned-field-id`."""
570606

571607
type: Literal["assert-last-assigned-field-id"] = Field(default="assert-last-assigned-field-id")
572608
last_assigned_field_id: int = Field(..., alias="last-assigned-field-id")
573609

610+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
611+
if base_metadata is None:
612+
raise CommitFailedException("Requirement failed: current table metadata is missing")
613+
elif base_metadata.last_column_id != self.last_assigned_field_id:
614+
raise CommitFailedException(
615+
f"Requirement failed: last assigned field id has changed: expected {self.last_assigned_field_id}, found {base_metadata.last_column_id}"
616+
)
617+
574618

575619
class AssertCurrentSchemaId(TableRequirement):
576620
"""The table's current schema id must match the requirement's `current-schema-id`."""
577621

578622
type: Literal["assert-current-schema-id"] = Field(default="assert-current-schema-id")
579623
current_schema_id: int = Field(..., alias="current-schema-id")
580624

625+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
626+
if base_metadata is None:
627+
raise CommitFailedException("Requirement failed: current table metadata is missing")
628+
elif self.current_schema_id != base_metadata.current_schema_id:
629+
raise CommitFailedException(
630+
f"Requirement failed: current schema id has changed: expected {self.current_schema_id}, found {base_metadata.current_schema_id}"
631+
)
632+
581633

582634
class AssertLastAssignedPartitionId(TableRequirement):
583635
"""The table's last assigned partition id must match the requirement's `last-assigned-partition-id`."""
584636

585637
type: Literal["assert-last-assigned-partition-id"] = Field(default="assert-last-assigned-partition-id")
586638
last_assigned_partition_id: int = Field(..., alias="last-assigned-partition-id")
587639

640+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
641+
if base_metadata is None:
642+
raise CommitFailedException("Requirement failed: current table metadata is missing")
643+
elif base_metadata.last_partition_id != self.last_assigned_partition_id:
644+
raise CommitFailedException(
645+
f"Requirement failed: last assigned partition id has changed: expected {self.last_assigned_partition_id}, found {base_metadata.last_partition_id}"
646+
)
647+
588648

589649
class AssertDefaultSpecId(TableRequirement):
590650
"""The table's default spec id must match the requirement's `default-spec-id`."""
591651

592652
type: Literal["assert-default-spec-id"] = Field(default="assert-default-spec-id")
593653
default_spec_id: int = Field(..., alias="default-spec-id")
594654

655+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
656+
if base_metadata is None:
657+
raise CommitFailedException("Requirement failed: current table metadata is missing")
658+
elif self.default_spec_id != base_metadata.default_spec_id:
659+
raise CommitFailedException(
660+
f"Requirement failed: default spec id has changed: expected {self.default_spec_id}, found {base_metadata.default_spec_id}"
661+
)
662+
595663

596664
class AssertDefaultSortOrderId(TableRequirement):
597665
"""The table's default sort order id must match the requirement's `default-sort-order-id`."""
598666

599667
type: Literal["assert-default-sort-order-id"] = Field(default="assert-default-sort-order-id")
600668
default_sort_order_id: int = Field(..., alias="default-sort-order-id")
601669

670+
def validate(self, base_metadata: Optional[TableMetadata]) -> None:
671+
if base_metadata is None:
672+
raise CommitFailedException("Requirement failed: current table metadata is missing")
673+
elif self.default_sort_order_id != base_metadata.default_sort_order_id:
674+
raise CommitFailedException(
675+
f"Requirement failed: default sort order id has changed: expected {self.default_sort_order_id}, found {base_metadata.default_sort_order_id}"
676+
)
677+
602678

603679
class Namespace(IcebergRootModel[List[str]]):
604680
"""Reference to one or more levels of a namespace."""

pyiceberg/table/refs.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,10 @@ def __repr__(self) -> str:
3434
"""Return the string representation of the SnapshotRefType class."""
3535
return f"SnapshotRefType.{self.name}"
3636

37+
def __str__(self) -> str:
38+
"""Return the string representation of the SnapshotRefType class."""
39+
return self.value
40+
3741

3842
class SnapshotRef(IcebergBaseModel):
3943
snapshot_id: int = Field(alias="snapshot-id")

tests/table/test_init.py

Lines changed: 127 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,14 @@
1515
# specific language governing permissions and limitations
1616
# under the License.
1717
# pylint:disable=redefined-outer-name
18+
import uuid
1819
from copy import copy
1920
from typing import Dict
2021

2122
import pytest
2223
from sortedcontainers import SortedList
2324

25+
from pyiceberg.exceptions import CommitFailedException
2426
from pyiceberg.expressions import (
2527
AlwaysTrue,
2628
And,
@@ -39,6 +41,14 @@
3941
from pyiceberg.schema import Schema
4042
from pyiceberg.table import (
4143
AddSnapshotUpdate,
44+
AssertCreate,
45+
AssertCurrentSchemaId,
46+
AssertDefaultSortOrderId,
47+
AssertDefaultSpecId,
48+
AssertLastAssignedFieldId,
49+
AssertLastAssignedPartitionId,
50+
AssertRefSnapshotId,
51+
AssertTableUUID,
4252
SetPropertiesUpdate,
4353
SetSnapshotRefUpdate,
4454
SnapshotRef,
@@ -721,3 +731,120 @@ def test_metadata_isolation_from_illegal_updates(table_v1: Table) -> None:
721731
def test_generate_snapshot_id(table_v2: Table) -> None:
722732
assert isinstance(_generate_snapshot_id(), int)
723733
assert isinstance(table_v2.new_snapshot_id(), int)
734+
735+
736+
def test_assert_create(table_v2: Table) -> None:
737+
AssertCreate().validate(None)
738+
739+
with pytest.raises(CommitFailedException, match="Table already exists"):
740+
AssertCreate().validate(table_v2.metadata)
741+
742+
743+
def test_assert_table_uuid(table_v2: Table) -> None:
744+
base_metadata = table_v2.metadata
745+
AssertTableUUID(uuid=base_metadata.table_uuid).validate(base_metadata)
746+
747+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
748+
AssertTableUUID(uuid=uuid.UUID("9c12d441-03fe-4693-9a96-a0705ddf69c2")).validate(None)
749+
750+
with pytest.raises(
751+
CommitFailedException,
752+
match="Table UUID does not match: 9c12d441-03fe-4693-9a96-a0705ddf69c2 != 9c12d441-03fe-4693-9a96-a0705ddf69c1",
753+
):
754+
AssertTableUUID(uuid=uuid.UUID("9c12d441-03fe-4693-9a96-a0705ddf69c2")).validate(base_metadata)
755+
756+
757+
def test_assert_ref_snapshot_id(table_v2: Table) -> None:
758+
base_metadata = table_v2.metadata
759+
AssertRefSnapshotId(ref="main", snapshot_id=base_metadata.current_snapshot_id).validate(base_metadata)
760+
761+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
762+
AssertRefSnapshotId(ref="main", snapshot_id=1).validate(None)
763+
764+
with pytest.raises(
765+
CommitFailedException,
766+
match="Requirement failed: branch main was created concurrently",
767+
):
768+
AssertRefSnapshotId(ref="main", snapshot_id=None).validate(base_metadata)
769+
770+
with pytest.raises(
771+
CommitFailedException,
772+
match="Requirement failed: branch main has changed: expected id 1, found 3055729675574597004",
773+
):
774+
AssertRefSnapshotId(ref="main", snapshot_id=1).validate(base_metadata)
775+
776+
with pytest.raises(
777+
CommitFailedException,
778+
match="Requirement failed: branch or tag not_exist is missing, expected 1",
779+
):
780+
AssertRefSnapshotId(ref="not_exist", snapshot_id=1).validate(base_metadata)
781+
782+
783+
def test_assert_last_assigned_field_id(table_v2: Table) -> None:
784+
base_metadata = table_v2.metadata
785+
AssertLastAssignedFieldId(last_assigned_field_id=base_metadata.last_column_id).validate(base_metadata)
786+
787+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
788+
AssertLastAssignedFieldId(last_assigned_field_id=1).validate(None)
789+
790+
with pytest.raises(
791+
CommitFailedException,
792+
match="Requirement failed: last assigned field id has changed: expected 1, found 3",
793+
):
794+
AssertLastAssignedFieldId(last_assigned_field_id=1).validate(base_metadata)
795+
796+
797+
def test_assert_current_schema_id(table_v2: Table) -> None:
798+
base_metadata = table_v2.metadata
799+
AssertCurrentSchemaId(current_schema_id=base_metadata.current_schema_id).validate(base_metadata)
800+
801+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
802+
AssertCurrentSchemaId(current_schema_id=1).validate(None)
803+
804+
with pytest.raises(
805+
CommitFailedException,
806+
match="Requirement failed: current schema id has changed: expected 2, found 1",
807+
):
808+
AssertCurrentSchemaId(current_schema_id=2).validate(base_metadata)
809+
810+
811+
def test_last_assigned_partition_id(table_v2: Table) -> None:
812+
base_metadata = table_v2.metadata
813+
AssertLastAssignedPartitionId(last_assigned_partition_id=base_metadata.last_partition_id).validate(base_metadata)
814+
815+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
816+
AssertLastAssignedPartitionId(last_assigned_partition_id=1).validate(None)
817+
818+
with pytest.raises(
819+
CommitFailedException,
820+
match="Requirement failed: last assigned partition id has changed: expected 1, found 1000",
821+
):
822+
AssertLastAssignedPartitionId(last_assigned_partition_id=1).validate(base_metadata)
823+
824+
825+
def test_assert_default_spec_id(table_v2: Table) -> None:
826+
base_metadata = table_v2.metadata
827+
AssertDefaultSpecId(default_spec_id=base_metadata.default_spec_id).validate(base_metadata)
828+
829+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
830+
AssertDefaultSpecId(default_spec_id=1).validate(None)
831+
832+
with pytest.raises(
833+
CommitFailedException,
834+
match="Requirement failed: default spec id has changed: expected 1, found 0",
835+
):
836+
AssertDefaultSpecId(default_spec_id=1).validate(base_metadata)
837+
838+
839+
def test_assert_default_sort_order_id(table_v2: Table) -> None:
840+
base_metadata = table_v2.metadata
841+
AssertDefaultSortOrderId(default_sort_order_id=base_metadata.default_sort_order_id).validate(base_metadata)
842+
843+
with pytest.raises(CommitFailedException, match="Requirement failed: current table metadata is missing"):
844+
AssertDefaultSortOrderId(default_sort_order_id=1).validate(None)
845+
846+
with pytest.raises(
847+
CommitFailedException,
848+
match="Requirement failed: default sort order id has changed: expected 1, found 3",
849+
):
850+
AssertDefaultSortOrderId(default_sort_order_id=1).validate(base_metadata)

0 commit comments

Comments
 (0)