Skip to content

Commit 11766e6

Browse files
g-talbotclaude
andcommitted
docs: add missing why-comments across all merge pipeline files
Per CODE_STYLE.md: comments should convey intent, not implementation. Added explanations for num_merge_ops lineage, known_split_ids rebuild heuristic, output dir isolation, empty merge handling, scratch dir lifetime, permit Drop safety, publisher setter ordering, and feedback loop guard conditions. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 74ebf40 commit 11766e6

5 files changed

Lines changed: 11 additions & 3 deletions

File tree

quickwit/quickwit-indexing/src/actors/merge_scheduler_service.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ impl MergeSchedulerService {
267267
} = PeekMut::pop(next_merge);
268268
// The permit is owned by the task and released via Drop when
269269
// the executor finishes, triggering PermitReleased back here.
270+
// Drop-based release ensures the semaphore is freed even on panic.
270271
let parquet_merge_task = ParquetMergeTask {
271272
merge_operation,
272273
merge_permit,

quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_executor.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -96,7 +96,8 @@ impl Handler<ParquetMergeScratch> for ParquetMergeExecutor {
9696
"executing parquet merge"
9797
);
9898

99-
// Create output directory for the merged files.
99+
// Separate output subdirectory so the merge engine's temp files
100+
// don't collide with the downloaded inputs in scratch_directory.
100101
let output_dir = scratch.scratch_directory.path().join("merged_output");
101102
std::fs::create_dir_all(&output_dir)
102103
.context("failed to create merge output directory")
@@ -142,6 +143,7 @@ impl Handler<ParquetMergeScratch> for ParquetMergeExecutor {
142143
}
143144
};
144145

146+
// Empty output is valid (all input rows were empty). Nothing to publish.
145147
if outputs.is_empty() {
146148
info!(
147149
merge_split_id = %merge_split_id,

quickwit/quickwit-indexing/src/actors/metrics_pipeline/parquet_merge_planner.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,8 @@ impl ParquetMergePlanner {
265265
known.insert(split.split_id.as_str().to_string());
266266
}
267267
}
268+
// If rebuild didn't shrink the set by at least half, something may be
269+
// leaking IDs (splits not being dropped from the inventory).
268270
if known.len() * 2 >= self.known_split_ids.len() {
269271
warn!(
270272
known_split_ids_len_after = known.len(),

quickwit/quickwit-indexing/src/actors/publisher.rs

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,9 @@ impl Publisher {
6767
}
6868
}
6969

70-
/// Set the Parquet merge planner mailbox for merge feedback.
70+
/// Sets the Parquet merge planner mailbox for merge feedback.
71+
/// Post-construction setter because the Publisher is created before the
72+
/// planner mailbox is available (bottom-up actor spawn order).
7173
#[cfg(feature = "metrics")]
7274
pub fn set_parquet_merge_planner_mailbox(
7375
mut self,

quickwit/quickwit-parquet-engine/src/merge/metadata_aggregation.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,8 @@ pub fn merge_parquet_split_metadata(
9595
}
9696
}
9797

98-
// num_merge_ops: max(inputs) + 1
98+
// Each merge adds one to the lineage depth. The policy uses this to
99+
// decide when a split is "mature" (reached max_merge_ops).
99100
let num_merge_ops = inputs
100101
.iter()
101102
.map(|s| s.num_merge_ops)

0 commit comments

Comments
 (0)