@@ -1025,110 +1025,71 @@ def _current_ancestors(self) -> set[int]:
10251025
10261026
10271027class ExpireSnapshots (UpdateTableMetadata ["ExpireSnapshots" ]):
1028- """Expire snapshots by ID .
1028+ """Expire snapshots and refs .
10291029
10301030 Use table.expire_snapshots().<operation>().commit() to run a specific operation.
10311031 Use table.expire_snapshots().<operation-one>().<operation-two>().commit() to run multiple operations.
1032- Pending changes are applied on commit.
1032+ Pending changes are applied on commit. Call order does not affect the result.
10331033 """
10341034
10351035 _updates : tuple [TableUpdate , ...]
10361036 _requirements : tuple [TableRequirement , ...]
10371037 _snapshot_ids_to_expire : set [int ]
1038+ _ref_names_to_expire : set [str ]
1039+ _expire_older_than_ms : int | None
10381040
10391041 def __init__ (self , transaction : Transaction ) -> None :
10401042 super ().__init__ (transaction )
10411043 self ._updates = ()
10421044 self ._requirements = ()
10431045 self ._snapshot_ids_to_expire = set ()
1046+ self ._ref_names_to_expire = set ()
1047+ self ._expire_older_than_ms = None
10441048
10451049 def _commit (self ) -> UpdatesAndRequirements :
10461050 """
10471051 Commit the staged updates and requirements.
10481052
1049- This will remove the snapshots with the given IDs, but will always skip protected snapshots (branch/tag heads).
1050- Refs (branches/tags) whose snapshot age exceeds their configured max-ref-age-ms are also removed.
1053+ Applies all pending expirations: explicit snapshot IDs, age-based snapshot expiry,
1054+ and ref removals. Protected snapshots (branch/tag heads not being expired) are always
1055+ excluded. The age threshold from older_than() is evaluated here so that call order
1056+ with remove_expired_refs() does not affect the result.
10511057
10521058 Returns:
10531059 Tuple of updates and requirements to be committed,
10541060 as required by the calling parent apply functions.
10551061 """
1056- now_ms = int (datetime .now ().timestamp () * 1000 )
1057- expired_ref_names = self ._compute_expired_refs (now_ms )
1058- protected_ids = self ._get_protected_snapshot_ids (expired_ref_names )
1062+ protected_ids = self ._get_protected_snapshot_ids ()
10591063
1060- # Snapshots exclusively referenced by expired refs are also eligible for expiration
1061- for ref_name in expired_ref_names :
1062- ref = self ._transaction .table_metadata .refs [ref_name ]
1063- if ref .snapshot_id not in protected_ids :
1064- self ._snapshot_ids_to_expire .add (ref .snapshot_id )
1064+ if self ._expire_older_than_ms is not None :
1065+ for snapshot in self ._transaction .table_metadata .snapshots :
1066+ if snapshot .timestamp_ms < self ._expire_older_than_ms and snapshot .snapshot_id not in protected_ids :
1067+ self ._snapshot_ids_to_expire .add (snapshot .snapshot_id )
10651068
10661069 snapshot_ids_to_expire = self ._snapshot_ids_to_expire - protected_ids
10671070
10681071 updates : list [TableUpdate ] = list (self ._updates )
1069- for ref_name in expired_ref_names :
1072+ for ref_name in self . _ref_names_to_expire :
10701073 updates .append (RemoveSnapshotRefUpdate (ref_name = ref_name ))
10711074 if snapshot_ids_to_expire :
10721075 updates .append (RemoveSnapshotsUpdate (snapshot_ids = snapshot_ids_to_expire ))
10731076 self ._updates = tuple (updates )
10741077 return self ._updates , self ._requirements
10751078
1076- def _compute_expired_refs (self , now_ms : int ) -> set [str ]:
1077- """
1078- Compute the set of ref names (branches/tags) that should be expired.
1079-
1080- A ref is expired when the age of its snapshot exceeds:
1081- - the ref's own max_ref_age_ms, or
1082- - the table property history.expire.max-ref-age-ms, if the ref has no per-ref setting.
1083- The main branch is never expired. Refs with no effective max-ref-age configuration are skipped.
1084-
1085- Args:
1086- now_ms: Current time in milliseconds.
1087-
1088- Returns:
1089- Set of ref names to remove.
1090- """
1091- from pyiceberg .table import TableProperties
1092-
1093- props = self ._transaction .table_metadata .properties
1094- table_max_ref_age_ms : int | None = (
1095- int (props [TableProperties .MAX_REF_AGE_MS ]) if TableProperties .MAX_REF_AGE_MS in props else None
1096- )
1097-
1098- expired : set [str ] = set ()
1099- for name , ref in self ._transaction .table_metadata .refs .items ():
1100- if name == MAIN_BRANCH :
1101- continue
1102- effective_max_ref_age_ms = ref .max_ref_age_ms if ref .max_ref_age_ms is not None else table_max_ref_age_ms
1103- if effective_max_ref_age_ms is None :
1104- continue
1105- snapshot = self ._transaction .table_metadata .snapshot_by_id (ref .snapshot_id )
1106- if snapshot is None :
1107- expired .add (name )
1108- continue
1109- if (now_ms - snapshot .timestamp_ms ) > effective_max_ref_age_ms :
1110- expired .add (name )
1111- return expired
1112-
1113- def _get_protected_snapshot_ids (self , expired_ref_names : set [str ] | None = None ) -> set [int ]:
1079+ def _get_protected_snapshot_ids (self ) -> set [int ]:
11141080 """
1115- Get the IDs of protected snapshots.
1081+ Get the IDs of snapshots that must not be expired .
11161082
1117- These are the HEAD snapshots of all branches and all tagged snapshots that are NOT being expired.
1118- Snapshots exclusively pointed to by expired refs are not protected.
1119-
1120- Args:
1121- expired_ref_names: Set of ref names that are being expired. Defaults to empty set.
1083+ These are the HEAD snapshots of all branches and tags that are not
1084+ already marked for removal via remove_expired_refs().
11221085
11231086 Returns:
11241087 Set of protected snapshot IDs to exclude from expiration.
11251088 """
1126- if expired_ref_names is None :
1127- expired_ref_names = set ()
11281089 return {
11291090 ref .snapshot_id
11301091 for name , ref in self ._transaction .table_metadata .refs .items ()
1131- if name not in expired_ref_names
1092+ if name not in self . _ref_names_to_expire
11321093 }
11331094
11341095 def by_id (self , snapshot_id : int ) -> ExpireSnapshots :
@@ -1169,17 +1130,46 @@ def by_ids(self, snapshot_ids: list[int]) -> ExpireSnapshots:
11691130
11701131 def older_than (self , dt : datetime ) -> ExpireSnapshots :
11711132 """
1172- Expire all unprotected snapshots with a timestamp older than a given value.
1133+ Expire all unprotected snapshots with a timestamp older than the given value.
1134+
1135+ The filter is evaluated at commit time so that snapshots left without a ref
1136+ by remove_expired_refs() are also considered, regardless of call order.
11731137
11741138 Args:
1175- dt (datetime): Only snapshots with datetime < this value will be expired.
1139+ dt (datetime): Only snapshots with timestamp < this value will be expired.
11761140
11771141 Returns:
11781142 This for method chaining.
11791143 """
1180- protected_ids = self ._get_protected_snapshot_ids ()
1181- expire_from = datetime_to_millis (dt )
1182- for snapshot in self ._transaction .table_metadata .snapshots :
1183- if snapshot .timestamp_ms < expire_from and snapshot .snapshot_id not in protected_ids :
1184- self ._snapshot_ids_to_expire .add (snapshot .snapshot_id )
1144+ self ._expire_older_than_ms = datetime_to_millis (dt )
1145+ return self
1146+
1147+ def remove_expired_refs (self , default_max_ref_age_ms : int | None = None ) -> ExpireSnapshots :
1148+ """
1149+ Mark stale branches and tags for removal.
1150+
1151+ A ref is expired when the age of its snapshot exceeds its own max_ref_age_ms.
1152+ If a ref has no per-ref max_ref_age_ms set, default_max_ref_age_ms is used as fallback.
1153+ The main branch is never removed.
1154+
1155+ Snapshots left without any live ref after this call are no longer protected,
1156+ so a subsequent older_than() will include them in age-based expiry.
1157+
1158+ Args:
1159+ default_max_ref_age_ms: Fallback max age in milliseconds for refs that have no
1160+ per-ref max_ref_age_ms configured. If None, such refs are skipped.
1161+
1162+ Returns:
1163+ This for method chaining.
1164+ """
1165+ now_ms = int (datetime .now ().timestamp () * 1000 )
1166+ for name , ref in self ._transaction .table_metadata .refs .items ():
1167+ if name == MAIN_BRANCH :
1168+ continue
1169+ effective_max_ref_age_ms = ref .max_ref_age_ms if ref .max_ref_age_ms is not None else default_max_ref_age_ms
1170+ if effective_max_ref_age_ms is None :
1171+ continue
1172+ snapshot = self ._transaction .table_metadata .snapshot_by_id (ref .snapshot_id )
1173+ if snapshot is None or (now_ms - snapshot .timestamp_ms ) > effective_max_ref_age_ms :
1174+ self ._ref_names_to_expire .add (name )
11851175 return self
0 commit comments