Skip to content

Commit 625f6d3

Browse files
ForeverAngryJanKaul
authored andcommitted
refactor: update snapshot expiration implementation and remove legacy code
1 parent 65e6982 commit 625f6d3

7 files changed

Lines changed: 248 additions & 441 deletions

File tree

iceberg-rust/examples/expire_snapshots.rs

Lines changed: 0 additions & 108 deletions
This file was deleted.

iceberg-rust/src/table/maintenance/expire_snapshots.rs

Lines changed: 7 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1-
//! Snapshot expiration functionality for Iceberg tables
1+
//! Snapshot expiration functionality for Iceberg tables (Legacy implementation)
22
//!
3-
//! This module provides the ability to expire old snapshots and clean up associated
4-
//! manifest and data files. The implementation follows Iceberg's atomic commit model
5-
//! and supports various expiration criteria including:
3+
//! **Note: This module contains the standalone implementation of snapshot expiration.**
4+
//! **The recommended approach is to use `table.expire_snapshots()` which integrates**
5+
//! **with the Operation framework for better transaction support.**
66
//!
7-
//! * Time-based expiration (older than timestamp)
8-
//! * Count-based retention (keep only last N snapshots)
9-
//! * Reference-aware cleanup (preserve snapshots referenced by branches/tags)
10-
//! * Optional orphaned file cleanup
7+
//! This module provides functionality to expire (remove) old snapshots from Iceberg tables
8+
//! based on various retention policies. Snapshot expiration helps manage storage costs
9+
//! by removing metadata for old table versions while preserving data integrity.
1110
1211
use std::collections::HashSet;
1312

iceberg-rust/src/table/maintenance/mod.rs

Lines changed: 0 additions & 10 deletions
This file was deleted.

iceberg-rust/src/table/mod.rs

Lines changed: 94 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,11 +41,90 @@ use crate::{
4141
table::transaction::TableTransaction,
4242
};
4343

44-
pub mod maintenance;
4544
pub mod manifest;
4645
pub mod manifest_list;
4746
pub mod transaction;
4847

48+
/// Builder for configuring and executing snapshot expiration operations
49+
///
50+
/// This builder provides a fluent API for configuring how snapshots should be expired:
51+
/// * [`expire_older_than`](ExpireSnapshotsBuilder::expire_older_than) - Remove snapshots older than a timestamp
52+
/// * [`retain_last`](ExpireSnapshotsBuilder::retain_last) - Keep only the most recent N snapshots
53+
/// * [`clean_orphan_files`](ExpireSnapshotsBuilder::clean_orphan_files) - Also remove unreferenced data files
54+
/// * [`dry_run`](ExpireSnapshotsBuilder::dry_run) - Preview what would be deleted without actually deleting
55+
pub struct ExpireSnapshotsBuilder<'a> {
56+
table: &'a mut Table,
57+
older_than: Option<i64>,
58+
retain_last: Option<usize>,
59+
clean_orphan_files: bool,
60+
retain_ref_snapshots: bool,
61+
dry_run: bool,
62+
}
63+
64+
impl<'a> ExpireSnapshotsBuilder<'a> {
65+
/// Create a new snapshot expiration builder for the given table
66+
fn new(table: &'a mut Table) -> Self {
67+
Self {
68+
table,
69+
older_than: None,
70+
retain_last: None,
71+
clean_orphan_files: false,
72+
retain_ref_snapshots: true,
73+
dry_run: false,
74+
}
75+
}
76+
77+
/// Expire snapshots older than the given timestamp (in milliseconds since Unix epoch)
78+
pub fn expire_older_than(mut self, timestamp_ms: i64) -> Self {
79+
self.older_than = Some(timestamp_ms);
80+
self
81+
}
82+
83+
/// Retain only the most recent N snapshots, expiring all others
84+
pub fn retain_last(mut self, count: usize) -> Self {
85+
self.retain_last = Some(count);
86+
self
87+
}
88+
89+
/// Enable or disable cleanup of orphaned data files
90+
pub fn clean_orphan_files(mut self, enabled: bool) -> Self {
91+
self.clean_orphan_files = enabled;
92+
self
93+
}
94+
95+
/// Control whether snapshots referenced by branches/tags should be preserved
96+
pub fn retain_ref_snapshots(mut self, enabled: bool) -> Self {
97+
self.retain_ref_snapshots = enabled;
98+
self
99+
}
100+
101+
/// Enable dry run mode to preview what would be deleted without actually deleting
102+
pub fn dry_run(mut self, enabled: bool) -> Self {
103+
self.dry_run = enabled;
104+
self
105+
}
106+
107+
/// Execute the snapshot expiration operation
108+
pub async fn execute(self) -> Result<Vec<i64>, Error> {
109+
let _result = self.table.new_transaction(None)
110+
.expire_snapshots(
111+
self.older_than,
112+
self.retain_last,
113+
self.clean_orphan_files,
114+
self.retain_ref_snapshots,
115+
self.dry_run,
116+
)
117+
.commit()
118+
.await?;
119+
120+
// Extract the expired snapshot IDs from the commit result
121+
// For now, we'll need to return empty vec since the transaction commit
122+
// doesn't directly return the expired snapshot IDs
123+
// TODO: Enhance transaction result to include operation-specific details
124+
Ok(vec![])
125+
}
126+
}
127+
49128
#[derive(Debug, Clone)]
50129
/// Iceberg table
51130
pub struct Table {
@@ -299,14 +378,20 @@ impl Table {
299378
TableTransaction::new(self, branch)
300379
}
301380

302-
/// Creates a new snapshot expiration builder for cleaning up old snapshots
381+
/// Configures snapshot expiration for this table
382+
///
383+
/// Returns a builder that allows configuring snapshot expiration policies:
384+
/// * Time-based expiration: Remove snapshots older than a timestamp
385+
/// * Count-based retention: Keep only the most recent N snapshots
386+
/// * Orphan file cleanup: Remove data files no longer referenced by any snapshot
387+
/// * Reference preservation: Protect snapshots referenced by branches/tags
388+
/// * Dry run mode: Preview what would be deleted without actually deleting
303389
///
304-
/// This method returns a builder that can be configured to expire snapshots based on
305-
/// various criteria such as age, count, or reference status. The operation is atomic
306-
/// and will either succeed completely or not modify the table at all.
390+
/// The operation is executed through the table's transaction system, ensuring
391+
/// atomicity and consistency with other table operations.
307392
///
308393
/// # Returns
309-
/// * `ExpireSnapshots` - A builder for configuring and executing snapshot expiration
394+
/// * `ExpireSnapshotsBuilder` - A builder for configuring expiration parameters
310395
///
311396
/// # Examples
312397
/// ```rust,no_run
@@ -319,12 +404,12 @@ impl Table {
319404
/// .execute()
320405
/// .await?;
321406
///
322-
/// println!("Expired {} snapshots", result.expired_snapshot_ids.len());
407+
/// println!("Expired {} snapshots", result.len());
323408
/// # Ok(())
324409
/// # }
325410
/// ```
326-
pub fn expire_snapshots(&mut self) -> maintenance::ExpireSnapshots<'_> {
327-
maintenance::ExpireSnapshots::new(self)
411+
pub fn expire_snapshots(&mut self) -> ExpireSnapshotsBuilder<'_> {
412+
ExpireSnapshotsBuilder::new(self)
328413
}
329414
}
330415

iceberg-rust/src/table/transaction/mod.rs

Lines changed: 50 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,9 @@ pub(crate) static REPLACE_INDEX: usize = 4;
3838
pub(crate) static OVERWRITE_INDEX: usize = 5;
3939
pub(crate) static UPDATE_PROPERTIES_INDEX: usize = 6;
4040
pub(crate) static SET_SNAPSHOT_REF_INDEX: usize = 7;
41+
pub(crate) static EXPIRE_SNAPSHOTS_INDEX: usize = 8;
4142

42-
pub(crate) static NUM_OPERATIONS: usize = 8;
43+
pub(crate) static NUM_OPERATIONS: usize = 9;
4344

4445
/// A transaction that can perform multiple operations on a table atomically
4546
///
@@ -395,6 +396,54 @@ impl<'table> TableTransaction<'table> {
395396
self.operations[SET_SNAPSHOT_REF_INDEX] = Some(Operation::SetSnapshotRef(entry));
396397
self
397398
}
399+
400+
/// Expire snapshots based on the provided configuration
401+
///
402+
/// This operation expires snapshots according to the retention policies specified.
403+
/// It can expire snapshots older than a certain timestamp, retain only the most recent N snapshots,
404+
/// and optionally clean up orphaned data files.
405+
///
406+
/// # Arguments
407+
/// * `older_than` - Optional timestamp (ms since Unix epoch) to expire snapshots older than this time
408+
/// * `retain_last` - Optional number of most recent snapshots to keep, regardless of timestamp
409+
/// * `clean_orphan_files` - Whether to clean up data files that are no longer referenced
410+
/// * `retain_ref_snapshots` - Whether to preserve snapshots that are referenced by branches/tags
411+
/// * `dry_run` - Whether to perform a dry run without actually deleting anything
412+
///
413+
/// # Returns
414+
/// * `Self` - The transaction builder for method chaining
415+
///
416+
/// # Examples
417+
/// ```
418+
/// let result = table.new_transaction(None)
419+
/// .expire_snapshots(
420+
/// Some(chrono::Utc::now().timestamp_millis() - 7 * 24 * 60 * 60 * 1000),
421+
/// Some(5),
422+
/// true,
423+
/// true,
424+
/// false
425+
/// )
426+
/// .commit()
427+
/// .await?;
428+
/// ```
429+
pub fn expire_snapshots(
430+
mut self,
431+
older_than: Option<i64>,
432+
retain_last: Option<usize>,
433+
clean_orphan_files: bool,
434+
retain_ref_snapshots: bool,
435+
dry_run: bool,
436+
) -> Self {
437+
self.operations[EXPIRE_SNAPSHOTS_INDEX] = Some(Operation::ExpireSnapshots {
438+
older_than,
439+
retain_last,
440+
clean_orphan_files,
441+
retain_ref_snapshots,
442+
dry_run,
443+
});
444+
self
445+
}
446+
398447
/// Commits all operations in this transaction atomically
399448
///
400449
/// This method executes all operations in the transaction and updates the table

0 commit comments

Comments
 (0)