|
64 | 64 | SortField, |
65 | 65 | SortOrder, |
66 | 66 | ) |
67 | | -from pyiceberg.table.statistics import BlobMetadata, StatisticsFile |
| 67 | +from pyiceberg.table.statistics import BlobMetadata, PartitionStatisticsFile, StatisticsFile |
68 | 68 | from pyiceberg.table.update import ( |
69 | 69 | AddSnapshotUpdate, |
70 | 70 | AddSortOrderUpdate, |
|
77 | 77 | AssertRefSnapshotId, |
78 | 78 | AssertTableUUID, |
79 | 79 | RemovePropertiesUpdate, |
| 80 | + RemovePartitionStatisticsUpdate, |
| 81 | + RemovePropertiesUpdate, |
80 | 82 | RemoveSnapshotRefUpdate, |
81 | 83 | RemoveSnapshotsUpdate, |
82 | 84 | RemoveStatisticsUpdate, |
83 | 85 | SetDefaultSortOrderUpdate, |
| 86 | + SetPartitionStatisticsUpdate, |
84 | 87 | SetPropertiesUpdate, |
85 | 88 | SetSnapshotRefUpdate, |
86 | 89 | SetStatisticsUpdate, |
@@ -1359,3 +1362,80 @@ def test_remove_statistics_update(table_v2_with_statistics: Table) -> None: |
1359 | 1362 | table_v2_with_statistics.metadata, |
1360 | 1363 | (RemoveStatisticsUpdate(snapshot_id=123456789),), |
1361 | 1364 | ) |
| 1365 | + |
| 1366 | + |
| 1367 | +def test_set_partition_statistics_update(table_v2_with_statistics: Table) -> None: |
| 1368 | + snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id |
| 1369 | + |
| 1370 | + partition_statistics_file = PartitionStatisticsFile( |
| 1371 | + snapshot_id=snapshot_id, |
| 1372 | + statistics_path="s3://bucket/warehouse/stats.puffin", |
| 1373 | + file_size_in_bytes=124, |
| 1374 | + ) |
| 1375 | + |
| 1376 | + update = SetPartitionStatisticsUpdate( |
| 1377 | + partition_statistics=partition_statistics_file, |
| 1378 | + ) |
| 1379 | + |
| 1380 | + new_metadata = update_table_metadata( |
| 1381 | + table_v2_with_statistics.metadata, |
| 1382 | + (update,), |
| 1383 | + ) |
| 1384 | + |
| 1385 | + expected = """ |
| 1386 | + { |
| 1387 | + "snapshot-id": 3055729675574597004, |
| 1388 | + "statistics-path": "s3://bucket/warehouse/stats.puffin", |
| 1389 | + "file-size-in-bytes": 124 |
| 1390 | + }""" |
| 1391 | + |
| 1392 | + assert len(new_metadata.partition_statistics) == 1 |
| 1393 | + |
| 1394 | + updated_statistics = [stat for stat in new_metadata.partition_statistics if stat.snapshot_id == snapshot_id] |
| 1395 | + |
| 1396 | + assert len(updated_statistics) == 1 |
| 1397 | + assert json.loads(updated_statistics[0].model_dump_json()) == json.loads(expected) |
| 1398 | + |
| 1399 | + |
| 1400 | +def test_remove_partition_statistics_update(table_v2_with_statistics: Table) -> None: |
| 1401 | + # Add partition statistics file. |
| 1402 | + snapshot_id = table_v2_with_statistics.metadata.current_snapshot_id |
| 1403 | + |
| 1404 | + partition_statistics_file = PartitionStatisticsFile( |
| 1405 | + snapshot_id=snapshot_id, |
| 1406 | + statistics_path="s3://bucket/warehouse/stats.puffin", |
| 1407 | + file_size_in_bytes=124, |
| 1408 | + ) |
| 1409 | + |
| 1410 | + update = SetPartitionStatisticsUpdate( |
| 1411 | + partition_statistics=partition_statistics_file, |
| 1412 | + ) |
| 1413 | + |
| 1414 | + new_metadata = update_table_metadata( |
| 1415 | + table_v2_with_statistics.metadata, |
| 1416 | + (update,), |
| 1417 | + ) |
| 1418 | + assert len(new_metadata.partition_statistics) == 1 |
| 1419 | + |
| 1420 | + # Remove the same partition statistics file. |
| 1421 | + remove_update = RemovePartitionStatisticsUpdate( |
| 1422 | + snapshot_id=snapshot_id |
| 1423 | + ) |
| 1424 | + |
| 1425 | + remove_metadata = update_table_metadata( |
| 1426 | + new_metadata, |
| 1427 | + (remove_update,), |
| 1428 | + ) |
| 1429 | + |
| 1430 | + assert len(remove_metadata.partition_statistics) == 0 |
| 1431 | + |
| 1432 | +def test_remove_partition_statistics_update_with_invalid_snapshot_id(table_v2_with_statistics: Table) -> None: |
| 1433 | + # Remove the same partition statistics file. |
| 1434 | + with pytest.raises( |
| 1435 | + ValueError, |
| 1436 | + match="Partition Statistics with snapshot id 123456789 does not exist", |
| 1437 | + ): |
| 1438 | + update_table_metadata( |
| 1439 | + table_v2_with_statistics.metadata, |
| 1440 | + (RemovePartitionStatisticsUpdate(snapshot_id=123456789),), |
| 1441 | + ) |
0 commit comments