Skip to content

Commit 7387620

Browse files
g-talbotclaude
andcommitted
refactor: extract shared score_merge() function for scheduler
Both Tantivy and Parquet merge scheduling used identical score logic (prefer merges that reduce more splits for less total bytes). Extracted the core arithmetic into score_merge(num_splits, total_bytes) and have both score_merge_operation() and score_parquet_merge_operation() call it. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 6a93427 commit 7387620

1 file changed

Lines changed: 20 additions & 18 deletions

File tree

quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -318,24 +318,30 @@ struct ScheduleMerge {
318318
split_downloader_mailbox: Mailbox<MergeSplitDownloader>,
319319
}
320320

321-
/// The higher, the sooner we will execute the merge operation.
322-
/// A good merge operation
323-
/// - strongly reduces the number splits
324-
/// - is light.
325-
fn score_merge_operation(merge_operation: &MergeOperation) -> u64 {
326-
let total_num_bytes: u64 = merge_operation.total_num_bytes();
321+
/// Scores a merge operation for priority scheduling.
322+
///
323+
/// Higher score = scheduled sooner. Prefers merges that strongly reduce
324+
/// split count relative to their total byte cost. Used by both Tantivy
325+
/// and Parquet merge scheduling.
326+
fn score_merge(num_splits: usize, total_num_bytes: u64) -> u64 {
327327
if total_num_bytes == 0 {
328-
// Silly corner case that should never happen.
329328
return u64::MAX;
330329
}
331-
// We will remove splits.len() and add 1 merge splits.
332-
let delta_num_splits = (merge_operation.splits.len() - 1) as u64;
333-
// We use integer arithmetic to avoid `f64 are not ordered` silliness.
330+
// We will remove num_splits and add 1 merged split.
331+
let delta_num_splits = (num_splits - 1) as u64;
332+
// Integer arithmetic to avoid `f64 are not ordered` silliness.
334333
(delta_num_splits << 48)
335334
.checked_div(total_num_bytes)
336335
.unwrap_or(1u64)
337336
}
338337

338+
fn score_merge_operation(merge_operation: &MergeOperation) -> u64 {
339+
score_merge(
340+
merge_operation.splits.len(),
341+
merge_operation.total_num_bytes(),
342+
)
343+
}
344+
339345
impl ScheduleMerge {
340346
pub fn new(
341347
merge_operation: TrackedObject<MergeOperation>,
@@ -406,14 +412,10 @@ impl Handler<PermitReleased> for MergeSchedulerService {
406412

407413
#[cfg(feature = "metrics")]
408414
fn score_parquet_merge_operation(merge_operation: &ParquetMergeOperation) -> u64 {
409-
let total_num_bytes = merge_operation.total_size_bytes();
410-
if total_num_bytes == 0 {
411-
return u64::MAX;
412-
}
413-
let delta_num_splits = (merge_operation.splits.len() - 1) as u64;
414-
(delta_num_splits << 48)
415-
.checked_div(total_num_bytes)
416-
.unwrap_or(1u64)
415+
score_merge(
416+
merge_operation.splits.len(),
417+
merge_operation.total_size_bytes(),
418+
)
417419
}
418420

419421
#[cfg(feature = "metrics")]

0 commit comments

Comments
 (0)