Skip to content

Commit 596df80

Browse files
committed
feat: Update the _RewriteFiles._commit to correctly raise errors in case specifications aren't met
1 parent ef9b84f commit 596df80

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

pyiceberg/table/update/snapshot.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -676,6 +676,20 @@ def __init__(self, operation: Operation, transaction: Transaction, io: FileIO, s
676676
def _commit(self) -> UpdatesAndRequirements:
677677
# Only produce a commit when there is something to rewrite
678678
if self._deleted_data_files or self._added_data_files:
679+
# Grab the entries that we actually found in the table's manifests
680+
deleted_entries = self._deleted_entries()
681+
found_deleted_files = {entry.data_file for entry in deleted_entries}
682+
683+
# If the user asked to delete files that aren't in the table, abort.
684+
if len(found_deleted_files) != len(self._deleted_data_files):
685+
raise ValueError("Cannot delete files that are not present in the table")
686+
687+
added_records = sum(f.record_count for f in self._added_data_files)
688+
deleted_records = sum(entry.data_file.record_count for entry in deleted_entries)
689+
690+
if added_records > deleted_records:
691+
raise ValueError(f"Invalid replace: records added ({added_records}) exceeds records removed ({deleted_records})")
692+
679693
return super()._commit()
680694
else:
681695
return (), ()

0 commit comments

Comments
 (0)