Skip to content

Commit c63cc55

Browse files
committed
tests passing
1 parent 7f6bf9d commit c63cc55

3 files changed

Lines changed: 45 additions & 24 deletions

File tree

pyiceberg/table/snapshots.py

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
from pydantic import Field, PrivateAttr, model_serializer
2626

2727
from pyiceberg.io import FileIO
28-
from pyiceberg.manifest import DataFile, DataFileContent, ManifestContent, ManifestFile, _manifests
28+
from pyiceberg.manifest import DataFile, DataFileContent, ManifestFile, _manifests
2929
from pyiceberg.partitioning import UNPARTITIONED_PARTITION_SPEC, PartitionSpec
3030
from pyiceberg.schema import Schema
3131

@@ -251,18 +251,9 @@ def __str__(self) -> str:
251251
result_str = f"{operation}id={self.snapshot_id}{parent_id}{schema_id}"
252252
return result_str
253253

254-
def manifests(self, io: FileIO, content_filter: Optional[ManifestContent] = None) -> List[ManifestFile]:
255-
"""Return the manifests for the given snapshot.
256-
257-
Args:
258-
io: The IO instance to read the manifest list.
259-
content_filter: The content filter to apply to the manifests. One of ManifestContent.DATA or ManifestContent.DELETES.
260-
"""
261-
return [
262-
manifest
263-
for manifest in _manifests(io, self.manifest_list)
264-
if content_filter is None or manifest.content == content_filter
265-
]
254+
def manifests(self, io: FileIO) -> List[ManifestFile]:
255+
"""Return the manifests for the given snapshot."""
256+
return list(_manifests(io, self.manifest_list))
266257

267258

268259
class MetadataLogEntry(IcebergBaseModel):

pyiceberg/table/update/validate.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ def validation_history(
5959
manifests_files.extend(
6060
[
6161
manifest
62-
for manifest in snapshot.manifests(table.io, manifest_content_filter)
63-
if manifest.added_snapshot_id == snapshot.snapshot_id
62+
for manifest in snapshot.manifests(table.io)
63+
if manifest.added_snapshot_id == snapshot.snapshot_id and manifest.content == manifest_content_filter
6464
]
6565
)
6666

tests/table/test_validate.py

Lines changed: 39 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,52 @@
1616
# under the License.
1717
# pylint:disable=redefined-outer-name,eval-used
1818
from typing import cast
19+
from unittest.mock import patch
1920

20-
from pyiceberg.manifest import ManifestContent
21+
from pyiceberg.io import FileIO
22+
from pyiceberg.manifest import ManifestContent, ManifestFile
2123
from pyiceberg.table import Table
2224
from pyiceberg.table.snapshots import Operation, Snapshot
2325
from pyiceberg.table.update.validate import validation_history
2426

2527

2628
def test_validation_history(table_v2_with_extensive_snapshots: Table) -> None:
2729
"""Test the validation history function."""
30+
mock_manifests = {}
31+
32+
for i, snapshot in enumerate(table_v2_with_extensive_snapshots.snapshots()):
33+
mock_manifest = ManifestFile(
34+
manifest_path=f"foo/bar/{i}",
35+
manifest_length=1,
36+
partition_spec_id=1,
37+
content=ManifestContent.DATA if i % 2 == 0 else ManifestContent.DELETES,
38+
sequence_number=1,
39+
min_sequence_number=1,
40+
added_snapshot_id=snapshot.snapshot_id,
41+
)
42+
43+
# Store the manifest for this specific snapshot
44+
mock_manifests[snapshot.snapshot_id] = [mock_manifest]
45+
46+
expected_manifest_data_counts = len([m for m in mock_manifests.values() if m[0].content == ManifestContent.DATA]) - 1
47+
2848
oldest_snapshot = table_v2_with_extensive_snapshots.snapshots()[0]
2949
newest_snapshot = cast(Snapshot, table_v2_with_extensive_snapshots.current_snapshot())
30-
manifests, snapshots = validation_history(
31-
table_v2_with_extensive_snapshots,
32-
newest_snapshot,
33-
oldest_snapshot,
34-
{Operation.APPEND},
35-
ManifestContent.DATA,
36-
)
37-
assert len(snapshots) == 2
50+
51+
def mock_read_manifest_side_effect(self: Snapshot, io: FileIO) -> list[ManifestFile]:
52+
"""Mock the manifests method to use the snapshot_id for lookup."""
53+
snapshot_id = self.snapshot_id
54+
if snapshot_id in mock_manifests:
55+
return mock_manifests[snapshot_id]
56+
return []
57+
58+
with patch("pyiceberg.table.snapshots.Snapshot.manifests", new=mock_read_manifest_side_effect):
59+
manifests, snapshots = validation_history(
60+
table_v2_with_extensive_snapshots,
61+
newest_snapshot,
62+
oldest_snapshot,
63+
{Operation.APPEND},
64+
ManifestContent.DATA,
65+
)
66+
67+
assert len(manifests) == expected_manifest_data_counts

0 commit comments

Comments
 (0)