Skip to content

Commit c3570d8

Browse files
committed
fix: add missing branch arg to _RewriteFiles and a corresponding test
1 parent d939b67 commit c3570d8

File tree

2 files changed

+66
-3
lines changed

2 files changed

+66
-3
lines changed

pyiceberg/table/update/snapshot.py

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,9 +672,6 @@ def _get_entries(manifest: ManifestFile) -> list[ManifestEntry]:
672672
class _RewriteFiles(_SnapshotProducer["_RewriteFiles"]):
673673
"""A snapshot producer that rewrites data files."""
674674

675-
def __init__(self, operation: Operation, transaction: Transaction, io: FileIO, snapshot_properties: dict[str, str]):
676-
super().__init__(operation, transaction, io, snapshot_properties=snapshot_properties)
677-
678675
def _commit(self) -> UpdatesAndRequirements:
679676
# Only produce a commit when there is something to rewrite
680677
if self._deleted_data_files or self._added_data_files:
@@ -795,6 +792,7 @@ def replace(self) -> _RewriteFiles:
795792
operation=Operation.REPLACE,
796793
transaction=self._transaction,
797794
io=self._io,
795+
branch=self._branch,
798796
snapshot_properties=self._snapshot_properties,
799797
)
800798

tests/table/test_replace.py

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -635,3 +635,68 @@ def test_replace_no_op_on_non_empty_table(catalog: Catalog) -> None:
635635
# Successive calls to current_snapshot() should yield the same snapshot
636636
assert table.current_snapshot() == initial_snapshot
637637
assert len(table.history()) == 1
638+
639+
640+
def test_replace_on_custom_branch(catalog: Catalog) -> None:
641+
# Setup a basic table using the catalog fixture
642+
catalog.create_namespace("default")
643+
table = catalog.create_table(
644+
identifier="default.test_replace_branch",
645+
schema=Schema(),
646+
)
647+
648+
# 1. File we will delete
649+
file_to_delete = DataFile.from_args(
650+
file_path="s3://bucket/test/data/deleted.parquet",
651+
file_format=FileFormat.PARQUET,
652+
partition=Record(),
653+
record_count=100,
654+
file_size_in_bytes=1024,
655+
content=DataFileContent.DATA,
656+
)
657+
file_to_delete.spec_id = 0
658+
659+
# 2. File we are adding as a replacement
660+
file_to_add = DataFile.from_args(
661+
file_path="s3://bucket/test/data/added.parquet",
662+
file_format=FileFormat.PARQUET,
663+
partition=Record(),
664+
record_count=100,
665+
file_size_in_bytes=1024,
666+
content=DataFileContent.DATA,
667+
)
668+
file_to_add.spec_id = 0
669+
670+
# Initially append to have something to replace on main
671+
with table.transaction() as tx:
672+
with tx.update_snapshot().fast_append() as append_snapshot:
673+
append_snapshot.append_data_file(file_to_delete)
674+
675+
initial_main_snapshot = cast(Snapshot, table.current_snapshot())
676+
initial_main_snapshot_id = initial_main_snapshot.snapshot_id
677+
678+
# Create a new branch called "test-branch" pointing to the initial snapshot
679+
table.manage_snapshots().create_branch(branch_name="test-branch", snapshot_id=initial_main_snapshot_id).commit()
680+
681+
# Perform a replace() operation explicitly targeting "test-branch"
682+
with table.transaction() as tx:
683+
with tx.update_snapshot(branch="test-branch").replace() as rewrite:
684+
rewrite.delete_data_file(file_to_delete)
685+
rewrite.append_data_file(file_to_add)
686+
687+
# Reload table to get updated refs
688+
table = catalog.load_table("default.test_replace_branch")
689+
690+
test_branch_ref = table.metadata.refs["test-branch"]
691+
main_branch_ref = table.metadata.refs["main"]
692+
693+
# Assert that the operation was successful on test-branch
694+
assert test_branch_ref.snapshot_id != initial_main_snapshot_id
695+
696+
# Assert that the "test-branch" reference now points to a REPLACE snapshot
697+
new_snapshot = table.snapshot_by_id(test_branch_ref.snapshot_id)
698+
assert new_snapshot is not None
699+
assert new_snapshot.summary["operation"] == Operation.REPLACE
700+
701+
# Assert that the "main" branch reference was completely untouched
702+
assert main_branch_ref.snapshot_id == initial_main_snapshot_id

0 commit comments

Comments
 (0)