@@ -1931,6 +1931,11 @@ pub struct Coordinator {
19311931 /// The interval at which to collect storage usage information.
19321932 storage_usage_collection_interval : Duration ,
19331933
1934+ /// Set once all compute objects have been observed as hydrated, gating
1935+ /// the first write into `mz_object_arrangement_size_history`. Sticky:
1936+ /// later partial re-hydrations (e.g. replica restart) don't re-arm it.
1937+ arrangement_sizes_hydration_observed : bool ,
1938+
19341939 /// Segment analytics client.
19351940 #[ derivative( Debug = "ignore" ) ]
19361941 segment_client : Option < mz_segment:: Client > ,
@@ -4274,27 +4279,11 @@ impl Coordinator {
42744279 differential_dataflow:: consolidation:: consolidate ( & mut current_contents) ;
42754280
42764281 let cutoff_ts = u128:: from ( read_ts) . saturating_sub ( retention_period. as_millis ( ) ) ;
4277- let mut expired = Vec :: new ( ) ;
4278- for ( row, diff) in current_contents {
4279- assert_eq ! (
4280- diff, 1 ,
4281- "consolidated contents should not contain retractions: ({row:#?}, {diff:#?})"
4282- ) ;
4283- // Column 3 is `collection_timestamp`.
4284- let collection_timestamp = row
4285- . unpack ( )
4286- . get ( 3 )
4287- . expect ( "definition of mz_object_arrangement_size_history changed" )
4288- . unwrap_timestamptz ( ) ;
4289- let collection_timestamp = collection_timestamp. timestamp_millis ( ) ;
4290- let collection_timestamp: u128 = collection_timestamp
4291- . try_into ( )
4292- . expect ( "all collections happen after Jan 1 1970" ) ;
4293- if collection_timestamp < cutoff_ts {
4294- let builtin_update = BuiltinTableUpdate :: row ( item_id, row, Diff :: MINUS_ONE ) ;
4295- expired. push ( builtin_update) ;
4296- }
4297- }
4282+ let expired = arrangement_sizes_expired_retractions (
4283+ current_contents,
4284+ cutoff_ts,
4285+ item_id,
4286+ ) ;
42984287
42994288 // TODO(arrangement-sizes): when the writeable-catalog-server
43004289 // plumbing in https://github.com/MaterializeInc/materialize/pull/35436
@@ -4322,6 +4311,40 @@ impl Coordinator {
43224311 }
43234312}
43244313
4314+ /// Returns retraction updates for rows in a consolidated
4315+ /// `mz_object_arrangement_size_history` snapshot whose `collection_timestamp`
4316+ /// (column 3) is strictly before `cutoff_ts`.
4317+ ///
4318+ /// Panics if any input row has `diff != 1`: the caller must consolidate first,
4319+ /// and a consolidated history table should never contain retractions because
4320+ /// the only source of retractions is this function itself.
4321+ fn arrangement_sizes_expired_retractions (
4322+ rows : impl IntoIterator < Item = ( mz_repr:: Row , i64 ) > ,
4323+ cutoff_ts : u128 ,
4324+ item_id : CatalogItemId ,
4325+ ) -> Vec < BuiltinTableUpdate > {
4326+ let mut expired = Vec :: new ( ) ;
4327+ for ( row, diff) in rows {
4328+ assert_eq ! (
4329+ diff, 1 ,
4330+ "consolidated contents should not contain retractions: ({row:#?}, {diff:#?})"
4331+ ) ;
4332+ let collection_timestamp = row
4333+ . unpack ( )
4334+ . get ( 3 )
4335+ . expect ( "definition of mz_object_arrangement_size_history changed" )
4336+ . unwrap_timestamptz ( )
4337+ . timestamp_millis ( ) ;
4338+ let collection_timestamp: u128 = collection_timestamp
4339+ . try_into ( )
4340+ . expect ( "all collections happen after Jan 1 1970" ) ;
4341+ if collection_timestamp < cutoff_ts {
4342+ expired. push ( BuiltinTableUpdate :: row ( item_id, row, Diff :: MINUS_ONE ) ) ;
4343+ }
4344+ }
4345+ expired
4346+ }
4347+
43254348#[ cfg( test) ]
43264349impl Coordinator {
43274350 #[ allow( dead_code) ]
@@ -4783,6 +4806,7 @@ pub fn serve(
47834806 cloud_resource_controller,
47844807 storage_usage_client,
47854808 storage_usage_collection_interval,
4809+ arrangement_sizes_hydration_observed : false ,
47864810 segment_client,
47874811 metrics,
47884812 optimizer_metrics,
@@ -5248,3 +5272,56 @@ mod id_pool_tests {
52485272 pool. refill ( 10 , 5 ) ;
52495273 }
52505274}
5275+
5276+ #[ cfg( test) ]
5277+ mod arrangement_sizes_pruner_tests {
5278+ use mz_repr:: catalog_item_id:: CatalogItemId ;
5279+ use mz_repr:: { Datum , Row } ;
5280+
5281+ use super :: arrangement_sizes_expired_retractions;
5282+
5283+ // Pack a row shaped like `mz_object_arrangement_size_history`: the pruner
5284+ // only cares about column 3 (`collection_timestamp`), but we stuff the
5285+ // other three columns with realistic values so shape changes would fail.
5286+ fn history_row ( ts_ms : i64 ) -> Row {
5287+ let dt = mz_ore:: now:: to_datetime ( ts_ms. try_into ( ) . expect ( "non-negative" ) ) ;
5288+ Row :: pack_slice ( & [
5289+ Datum :: String ( "r1" ) ,
5290+ Datum :: String ( "u1" ) ,
5291+ Datum :: Int64 ( 123 ) ,
5292+ Datum :: TimestampTz ( dt. try_into ( ) . expect ( "fits in TimestampTz" ) ) ,
5293+ ] )
5294+ }
5295+
5296+ fn item_id ( ) -> CatalogItemId {
5297+ // Any CatalogItemId will do; tests don't dispatch on it.
5298+ CatalogItemId :: User ( 42 )
5299+ }
5300+
5301+ #[ mz_ore:: test]
5302+ fn empty_input_produces_no_retractions ( ) {
5303+ let out = arrangement_sizes_expired_retractions ( Vec :: new ( ) , 1_000 , item_id ( ) ) ;
5304+ assert ! ( out. is_empty( ) ) ;
5305+ }
5306+
5307+ #[ mz_ore:: test]
5308+ fn retracts_only_rows_strictly_before_cutoff ( ) {
5309+ // Mixes both sides of the filter and includes a row at exactly
5310+ // the cutoff timestamp to pin down the strict-less-than boundary.
5311+ let rows = vec ! [
5312+ ( history_row( 100 ) , 1 ) ,
5313+ ( history_row( 500 ) , 1 ) ,
5314+ ( history_row( 1_000 ) , 1 ) , // at cutoff: kept (strict <)
5315+ ( history_row( 5_000 ) , 1 ) ,
5316+ ] ;
5317+ let out = arrangement_sizes_expired_retractions ( rows, 1_000 , item_id ( ) ) ;
5318+ assert_eq ! ( out. len( ) , 2 ) ;
5319+ }
5320+
5321+ #[ mz_ore:: test]
5322+ #[ should_panic( expected = "consolidated contents should not contain retractions" ) ]
5323+ fn retraction_in_input_panics ( ) {
5324+ let rows = vec ! [ ( history_row( 100 ) , -1 ) ] ;
5325+ let _ = arrangement_sizes_expired_retractions ( rows, 1_000 , item_id ( ) ) ;
5326+ }
5327+ }
0 commit comments