@@ -46,7 +46,7 @@ pub struct GcMetrics {
4646 pub failed_splits : IntCounter ,
4747}
4848
49- trait RecordGcMetrics {
49+ pub ( crate ) trait RecordGcMetrics {
5050 fn record ( & self , num_delete_splits : usize , num_deleted_bytes : u64 , num_failed_splits : usize ) ;
5151}
5252
@@ -72,7 +72,7 @@ pub struct DeleteSplitsError {
7272 metastore_failures : Vec < SplitInfo > ,
7373}
7474
75- async fn protect_future < Fut , T > ( progress : Option < & Progress > , future : Fut ) -> T
75+ pub ( crate ) async fn protect_future < Fut , T > ( progress : Option < & Progress > , future : Fut ) -> T
7676where Fut : Future < Output = T > {
7777 match progress {
7878 None => future. await ,
@@ -289,7 +289,7 @@ async fn list_splits_metadata(
289289
290290/// In order to avoid hammering the load on the metastore, we can throttle the rate of split
291291/// deletion by setting this environment variable.
292- fn get_maximum_split_deletion_rate_per_sec ( ) -> Option < usize > {
292+ pub ( crate ) fn get_maximum_split_deletion_rate_per_sec ( ) -> Option < usize > {
293293 static MAX_SPLIT_DELETION_RATE_PER_SEC : OnceLock < Option < usize > > = OnceLock :: new ( ) ;
294294 * MAX_SPLIT_DELETION_RATE_PER_SEC . get_or_init ( || {
295295 quickwit_common:: get_from_env_opt :: < usize > ( "QW_MAX_SPLIT_DELETION_RATE_PER_SEC" , false )
@@ -408,6 +408,43 @@ async fn delete_splits_marked_for_deletion_several_indexes(
408408 split_removal_info
409409}
410410
411+ /// A split normalized for storage deletion: just the id, path, and size.
412+ /// Used as the common currency between tantivy and parquet GC paths.
413+ pub ( crate ) struct SplitToDelete {
414+ pub split_id : String ,
415+ pub path : PathBuf ,
416+ pub size_bytes : u64 ,
417+ }
418+
419+ /// Deletes split files from storage and partitions into (succeeded, failed).
420+ ///
421+ /// Returns the `BulkDeleteError` if there was a partial failure, so the caller
422+ /// can log it with index-specific context. Does NOT touch the metastore.
423+ pub ( crate ) async fn delete_split_files (
424+ storage : & dyn Storage ,
425+ splits : Vec < SplitToDelete > ,
426+ progress_opt : Option < & Progress > ,
427+ ) -> ( Vec < SplitToDelete > , Vec < SplitToDelete > , Option < BulkDeleteError > ) {
428+ if splits. is_empty ( ) {
429+ return ( Vec :: new ( ) , Vec :: new ( ) , None ) ;
430+ }
431+ let paths: Vec < & Path > = splits. iter ( ) . map ( |s| s. path . as_path ( ) ) . collect ( ) ;
432+ let result = protect_future ( progress_opt, storage. bulk_delete ( & paths) ) . await ;
433+
434+ if let Some ( progress) = progress_opt {
435+ progress. record_progress ( ) ;
436+ }
437+ match result {
438+ Ok ( ( ) ) => ( splits, Vec :: new ( ) , None ) ,
439+ Err ( bulk_err) => {
440+ let success_paths: HashSet < & PathBuf > = bulk_err. successes . iter ( ) . collect ( ) ;
441+ let ( succeeded, failed) =
442+ splits. into_iter ( ) . partition ( |s| success_paths. contains ( & s. path ) ) ;
443+ ( succeeded, failed, Some ( bulk_err) )
444+ }
445+ }
446+ }
447+
411448/// Delete a list of splits from the storage and the metastore.
412449/// It should leave the index and the metastore in good state.
413450///
@@ -424,49 +461,41 @@ pub async fn delete_splits_from_storage_and_metastore(
424461 progress_opt : Option < & Progress > ,
425462) -> Result < Vec < SplitInfo > , DeleteSplitsError > {
426463 let mut split_infos: HashMap < PathBuf , SplitInfo > = HashMap :: with_capacity ( splits. len ( ) ) ;
427-
428464 for split in splits {
429465 let split_info = split. as_split_info ( ) ;
430466 split_infos. insert ( split_info. file_name . clone ( ) , split_info) ;
431467 }
432- let split_paths = split_infos
433- . keys ( )
434- . map ( |split_path_buf| split_path_buf. as_path ( ) )
435- . collect :: < Vec < & Path > > ( ) ;
436- let delete_result = protect_future ( progress_opt, storage. bulk_delete ( & split_paths) ) . await ;
437468
438- if let Some ( progress) = progress_opt {
439- progress. record_progress ( ) ;
440- }
441- let mut successes = Vec :: with_capacity ( split_infos. len ( ) ) ;
469+ let splits_to_delete: Vec < SplitToDelete > = split_infos
470+ . values ( )
471+ . map ( |info| SplitToDelete {
472+ split_id : info. split_id . clone ( ) ,
473+ path : info. file_name . clone ( ) ,
474+ size_bytes : info. file_size_bytes . as_u64 ( ) ,
475+ } )
476+ . collect ( ) ;
477+
478+ let ( succeeded_stds, failed_stds, storage_err) =
479+ delete_split_files ( & * storage, splits_to_delete, progress_opt) . await ;
480+
481+ let successes: Vec < SplitInfo > = succeeded_stds. iter ( ) . map ( |s| split_infos[ & s. path ] . clone ( ) ) . collect ( ) ;
482+ let storage_failures: Vec < SplitInfo > = failed_stds. iter ( ) . map ( |s| split_infos[ & s. path ] . clone ( ) ) . collect ( ) ;
483+
442484 let mut storage_error: Option < BulkDeleteError > = None ;
443- let mut storage_failures = Vec :: new ( ) ;
444-
445- match delete_result {
446- Ok ( _) => successes. extend ( split_infos. into_values ( ) ) ,
447- Err ( bulk_delete_error) => {
448- let success_split_paths: HashSet < & PathBuf > =
449- bulk_delete_error. successes . iter ( ) . collect ( ) ;
450- for ( split_path, split_info) in split_infos {
451- if success_split_paths. contains ( & split_path) {
452- successes. push ( split_info) ;
453- } else {
454- storage_failures. push ( split_info) ;
455- }
456- }
457- let failed_split_paths = storage_failures
458- . iter ( )
459- . map ( |split_info| split_info. file_name . as_path ( ) )
460- . collect :: < Vec < _ > > ( ) ;
461- error ! (
462- error=?bulk_delete_error. error,
463- index_id=index_uid. index_id,
464- "failed to delete split file(s) {:?} from storage" ,
465- PrettySample :: new( & failed_split_paths, 5 ) ,
466- ) ;
467- storage_error = Some ( bulk_delete_error) ;
468- }
469- } ;
485+ if let Some ( bulk_delete_error) = storage_err {
486+ let failed_split_paths = storage_failures
487+ . iter ( )
488+ . map ( |split_info| split_info. file_name . as_path ( ) )
489+ . collect :: < Vec < _ > > ( ) ;
490+ error ! (
491+ error=?bulk_delete_error. error,
492+ index_id=index_uid. index_id,
493+ "failed to delete split file(s) {:?} from storage" ,
494+ PrettySample :: new( & failed_split_paths, 5 ) ,
495+ ) ;
496+ storage_error = Some ( bulk_delete_error) ;
497+ }
498+
470499 if !successes. is_empty ( ) {
471500 let split_ids: Vec < SplitId > = successes
472501 . iter ( )
@@ -486,25 +515,23 @@ pub async fn delete_splits_from_storage_and_metastore(
486515 "failed to delete split(s) {:?} from metastore" ,
487516 PrettySample :: new( & split_ids, 5 ) ,
488517 ) ;
489- let delete_splits_error = DeleteSplitsError {
518+ return Err ( DeleteSplitsError {
490519 successes : Vec :: new ( ) ,
491520 storage_error,
492521 storage_failures,
493522 metastore_error : Some ( metastore_error) ,
494523 metastore_failures : successes,
495- } ;
496- return Err ( delete_splits_error) ;
524+ } ) ;
497525 }
498526 }
499527 if !storage_failures. is_empty ( ) {
500- let delete_splits_error = DeleteSplitsError {
528+ return Err ( DeleteSplitsError {
501529 successes,
502530 storage_error,
503531 storage_failures,
504532 metastore_error : None ,
505533 metastore_failures : Vec :: new ( ) ,
506- } ;
507- return Err ( delete_splits_error) ;
534+ } ) ;
508535 }
509536 Ok ( successes)
510537}
0 commit comments