Skip to content

Commit 909a95d

Browse files
committed
perf: Improve macos indexing wall time
1 parent ff81719 commit 909a95d

7 files changed

Lines changed: 323 additions & 62 deletions

File tree

autoresearch.jsonl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"type":"config","name":"Reduce bigram build time by grouping files per parent dir for openat","metricName":"post_scan_s","metricUnit":"s","bestDirection":"lower"}

crates/fff-core/src/bigram_filter.rs

Lines changed: 84 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -631,34 +631,76 @@ pub(crate) fn build_bigram_index(
631631
budget: &crate::types::ContentCacheBudget,
632632
base_path: &std::path::Path,
633633
arena: crate::simd_path::ArenaPtr,
634+
warmup: bool,
634635
) -> (BigramFilter, Vec<usize>) {
635636
let start = std::time::Instant::now();
636-
tracing::info!("Building bigram index for {} files...", files.len());
637+
tracing::info!(
638+
"Building bigram index for {} files (warmup={})",
639+
files.len(),
640+
warmup,
641+
);
637642

638643
let builder = BigramIndexBuilder::new(files.len());
639644
let skip_builder = BigramIndexBuilder::new(files.len());
640645

641-
// this does remove a memcpy for every single file + actually reducing open time on macos
642646
#[cfg(unix)]
643647
let base_fd: libc::c_int = open_base_dir_fd(base_path);
644648
#[cfg(not(unix))]
645649
let base_fd: i32 = -1;
646650

647-
// `content_binary` is only touched from the Binary branch below, so
648-
// the mutex is cold in practice. A lock-free collector wasn't worth
649-
// the complexity.
650651
let content_binary: std::sync::Mutex<Vec<usize>> = std::sync::Mutex::new(Vec::new());
651652

652-
crate::file_picker::BACKGROUND_THREAD_POOL.install(|| {
653-
files
654-
.par_chunks(BIGRAM_CHUNK_FILES)
655-
.enumerate()
656-
.for_each(|(chunk_idx, chunk)| {
657-
let base_idx = chunk_idx * BIGRAM_CHUNK_FILES;
658-
for (offset, file) in chunk.iter().enumerate() {
659-
let file_idx = base_idx + offset;
653+
// When warmup is enabled, process high-frecency files first so they
654+
// fill the limited cache budget before lower-priority files consume it.
655+
// This replaces the separate warmup_mmaps pass with zero extra syscalls.
656+
if warmup {
657+
let max_files = budget.max_files;
658+
// Partition indices: top `max_files` by frecency go first.
659+
let mut indices: Vec<usize> = (0..files.len()).collect();
660+
if indices.len() > max_files {
661+
indices.select_nth_unstable_by(max_files, |&a, &b| {
662+
let fa = &files[a];
663+
let fb = &files[b];
664+
let a_ok = !fa.is_binary() && fa.size > 0;
665+
let b_ok = !fb.is_binary() && fb.size > 0;
666+
match (a_ok, b_ok) {
667+
(true, false) => std::cmp::Ordering::Less,
668+
(false, true) => std::cmp::Ordering::Greater,
669+
(false, false) => std::cmp::Ordering::Equal,
670+
(true, true) => fb.total_frecency_score().cmp(&fa.total_frecency_score()),
671+
}
672+
});
673+
}
674+
675+
// Process priority files first (fills cache), then the rest.
676+
let priority_count = max_files.min(indices.len());
677+
let (priority, rest) = indices.split_at(priority_count);
678+
679+
crate::file_picker::BACKGROUND_THREAD_POOL.install(|| {
680+
// Phase 1: high-frecency files fill the cache budget.
681+
priority.par_chunks(BIGRAM_CHUNK_FILES).for_each(|chunk| {
682+
for &file_idx in chunk {
683+
let outcome = process_file(
684+
&files[file_idx],
685+
file_idx,
686+
&builder,
687+
&skip_builder,
688+
base_fd,
689+
base_path,
690+
arena,
691+
budget,
692+
);
693+
if matches!(outcome, FileOutcome::Binary) {
694+
content_binary.lock().unwrap().push(file_idx);
695+
}
696+
}
697+
});
698+
699+
// Phase 2: remaining files (cache budget likely exhausted, uses openat).
700+
rest.par_chunks(BIGRAM_CHUNK_FILES).for_each(|chunk| {
701+
for &file_idx in chunk {
660702
let outcome = process_file(
661-
file,
703+
&files[file_idx],
662704
file_idx,
663705
&builder,
664706
&skip_builder,
@@ -672,12 +714,37 @@ pub(crate) fn build_bigram_index(
672714
}
673715
}
674716
});
675-
});
717+
});
718+
} else {
719+
// No warmup: process in natural order (no cache priority needed).
720+
crate::file_picker::BACKGROUND_THREAD_POOL.install(|| {
721+
files
722+
.par_chunks(BIGRAM_CHUNK_FILES)
723+
.enumerate()
724+
.for_each(|(chunk_idx, chunk)| {
725+
let base_idx = chunk_idx * BIGRAM_CHUNK_FILES;
726+
for (offset, file) in chunk.iter().enumerate() {
727+
let file_idx = base_idx + offset;
728+
let outcome = process_file(
729+
file,
730+
file_idx,
731+
&builder,
732+
&skip_builder,
733+
base_fd,
734+
base_path,
735+
arena,
736+
budget,
737+
);
738+
if matches!(outcome, FileOutcome::Binary) {
739+
content_binary.lock().unwrap().push(file_idx);
740+
}
741+
}
742+
});
743+
});
744+
}
676745

677746
#[cfg(unix)]
678747
if base_fd >= 0 {
679-
// SAFETY: we opened `base_fd` at the top of this function and
680-
// no worker still references it once the rayon pool joined.
681748
unsafe { libc::close(base_fd) };
682749
}
683750

@@ -688,9 +755,6 @@ pub(crate) fn build_bigram_index(
688755
let skip_index = skip_builder.compress(Some(SKIP_INDEX_MIN_DENSITY_PCT));
689756
index.set_skip_index(skip_index);
690757

691-
// Builder buffers were freed by `compress()` above (one deallocation
692-
// each); nudge mimalloc to return them (and any transient allocs)
693-
// to the OS.
694758
crate::file_picker::hint_allocator_collect();
695759

696760
tracing::info!(

crates/fff-core/src/file_picker.rs

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,11 +74,10 @@ pub(crate) static BACKGROUND_THREAD_POOL: LazyLock<rayon::ThreadPool> = LazyLock
7474
let total = std::thread::available_parallelism()
7575
.map(|p| p.get())
7676
.unwrap_or(4);
77-
let bg_threads = total.saturating_sub(2).max(1);
78-
info!(
79-
"Background pool: {} threads (system has {})",
80-
bg_threads, total
81-
);
77+
78+
// benchmarks show that most of the work backgound tasks spend on waiting for syscalls,
79+
// by halfing avialable parallelism we loose some performance, but it is mostly nothing
80+
let bg_threads = (total / 2).max(2);
8281
rayon::ThreadPoolBuilder::new()
8382
.num_threads(bg_threads)
8483
.thread_name(|i| format!("fff-bg-{i}"))
@@ -745,6 +744,17 @@ impl FilePicker {
745744

746745
{
747746
let mut guard = shared_picker.write()?;
747+
// If the old picker has a post-scan in flight, wait for it to
748+
// finish. cancel() was already called so the rayon loop exits
749+
// within microseconds (each worker checks cancelled per item).
750+
if let Some(ref old_picker) = *guard {
751+
let flag = Arc::clone(&old_picker.signals.post_scan_indexing_active);
752+
drop(guard);
753+
while flag.load(Ordering::Acquire) {
754+
std::thread::sleep(std::time::Duration::from_millis(1));
755+
}
756+
guard = shared_picker.write()?;
757+
}
748758
*guard = Some(picker);
749759
}
750760

@@ -1288,6 +1298,7 @@ impl FilePicker {
12881298
arena: self.sync_data.arena_base_ptr(),
12891299
budget: &*self.cache_budget as *const _,
12901300
base_path: self.base_path.clone(),
1301+
cancelled: Arc::clone(&self.signals.cancelled),
12911302
post_scan_flag: Arc::clone(&self.signals.post_scan_indexing_active),
12921303
})
12931304
}
@@ -1655,8 +1666,7 @@ pub(crate) struct PostScanUnsafeSnapshot {
16551666
pub arena: ArenaPtr,
16561667
pub budget: *const crate::types::ContentCacheBudget,
16571668
pub base_path: PathBuf,
1658-
/// Holds the flag reference so it is automatically flips
1659-
/// when the pointsr
1669+
pub cancelled: Arc<AtomicBool>,
16601670
post_scan_flag: Arc<AtomicBool>,
16611671
}
16621672

crates/fff-core/src/scan.rs

Lines changed: 24 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -217,14 +217,24 @@ impl ScanJob {
217217
rescubscribe_watcher_post_scan(&shared_picker);
218218
}
219219

220-
// 3. Apply git status + frecency off-lock.
220+
// 3. Post-scan warmup + bigram build.
221+
// Run BEFORE git status join so bigram I/O overlaps with the
222+
// git-status thread (which can take 10+ seconds on huge repos).
223+
// Frecency scores are already populated during walk_filesystem,
224+
// so warmup cache priority works correctly without git status.
225+
if (config.warmup || config.content_indexing) && !signals.cancelled.load(Ordering::Acquire)
226+
{
227+
Self::run_post_scan(&shared_picker, &signals, &config);
228+
}
229+
230+
// 4. Apply git status + frecency off-lock (joins the git thread).
221231
if !signals.cancelled.load(Ordering::Acquire)
222232
&& let Some(status_handle) = status_handle
223233
{
224234
apply_git_status_and_frecency(&shared_picker, &shared_frecency, status_handle, mode);
225235
}
226236

227-
// 4. Install filesystem watcher (initial scan only).
237+
// 5. Install filesystem watcher (initial scan only).
228238
if config.install_watcher && config.watch && !signals.cancelled.load(Ordering::Acquire) {
229239
let shared_picker: &SharedFilePicker = &shared_picker;
230240
let shared_frecency: &SharedFrecency = &shared_frecency;
@@ -248,12 +258,6 @@ impl ScanJob {
248258
};
249259
}
250260

251-
// 5. Post-scan warmup + bigram build.
252-
if (config.warmup || config.content_indexing) && !signals.cancelled.load(Ordering::Acquire)
253-
{
254-
Self::run_post_scan(&shared_picker, &signals, &config);
255-
}
256-
257261
// 6. Drain any rescan that arrived while we were busy.
258262
//
259263
// `trigger_full_rescan_async` sets `rescan_pending` whenever a
@@ -312,22 +316,17 @@ impl ScanJob {
312316
};
313317
let budget: &ContentCacheBudget = unsafe { &*unsafe_snapshot.budget };
314318

315-
if config.warmup && !signals.cancelled.load(Ordering::Acquire) {
316-
warmup_mmaps(
317-
files,
318-
budget,
319-
&unsafe_snapshot.base_path,
320-
unsafe_snapshot.arena,
321-
);
322-
}
323-
324319
if config.content_indexing && !signals.cancelled.load(Ordering::Acquire) {
320+
// Unified pass: bigram indexing + warmup cache fill in one sweep.
321+
// When warmup is enabled, high-frecency files are processed first
322+
// so they fill the cache budget before it's exhausted.
325323
let indexable_files = &files[..unsafe_snapshot.indexable_count.min(files.len())];
326324
let (index, content_binary) = build_bigram_index(
327325
indexable_files,
328326
budget,
329327
&unsafe_snapshot.base_path,
330328
unsafe_snapshot.arena,
329+
config.warmup,
331330
);
332331

333332
if let Ok(mut guard) = shared_picker.write()
@@ -340,28 +339,14 @@ impl ScanJob {
340339
}
341340
picker.set_bigram_index(index, BigramOverlay::new(unsafe_snapshot.indexable_count));
342341
}
343-
}
344-
345-
if config.content_indexing && !signals.cancelled.load(Ordering::Acquire) {
346-
let indexable_files = &files[..unsafe_snapshot.indexable_count.min(files.len())];
347-
let (index, content_binary) = build_bigram_index(
348-
indexable_files,
342+
} else if config.warmup && !signals.cancelled.load(Ordering::Acquire) {
343+
// Warmup-only: no bigram indexing, just fill the mmap cache.
344+
warmup_mmaps(
345+
files,
349346
budget,
350347
&unsafe_snapshot.base_path,
351348
unsafe_snapshot.arena,
352349
);
353-
354-
if let Ok(mut guard) = shared_picker.write()
355-
&& let Some(picker) = guard.as_mut()
356-
{
357-
for &idx in &content_binary {
358-
if let Some(file) = picker.get_file_mut(idx) {
359-
file.set_binary(true);
360-
}
361-
}
362-
363-
picker.set_bigram_index(index, BigramOverlay::new(unsafe_snapshot.indexable_count));
364-
}
365350
}
366351
}
367352
}
@@ -453,6 +438,10 @@ fn apply_git_status_and_frecency(
453438

454439
BACKGROUND_THREAD_POOL.install(|| {
455440
files.par_iter_mut().for_each(|file| {
441+
if unsafe_snapshot.cancelled.load(Ordering::Relaxed) {
442+
return;
443+
}
444+
456445
let mut buf = [0u8; crate::simd_path::PATH_BUF_SIZE];
457446
let absolute_path = file.write_absolute_path(
458447
unsafe_snapshot.arena,

crates/fff-core/src/shared.rs

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,44 @@ impl SharedFilePicker {
113113
.is_some_and(|p| p.has_mmap_cache() || p.has_content_indexing())
114114
}
115115

116+
/// Block until post-scan indexing (bigram + warmup) finishes.
117+
pub fn wait_for_post_scan(&self, timeout: Duration) -> bool {
118+
let signal = {
119+
let guard = self.0.picker.read();
120+
match &*guard {
121+
Some(picker) => Arc::clone(&picker.signals.post_scan_indexing_active),
122+
None => return true,
123+
}
124+
};
125+
126+
let start = std::time::Instant::now();
127+
// The post_scan_indexing_active flag cycles twice:
128+
// once for git+frecency, once for warmup+bigram. We need to wait for
129+
// both cycles. Strategy: keep waiting until it's been inactive for a
130+
// sustained period indicating no more work is coming.
131+
let mut saw_active = false;
132+
let mut last_inactive_at: Option<std::time::Instant> = None;
133+
134+
loop {
135+
if start.elapsed() >= timeout {
136+
return false;
137+
}
138+
139+
let active = signal.load(std::sync::atomic::Ordering::Acquire);
140+
if active {
141+
saw_active = true;
142+
last_inactive_at = None;
143+
} else if saw_active {
144+
// Flag went inactive. Wait 200ms to confirm it doesn't reactivate.
145+
let inactive_since = last_inactive_at.get_or_insert_with(std::time::Instant::now);
146+
if inactive_since.elapsed() > Duration::from_millis(200) {
147+
return true;
148+
}
149+
}
150+
std::thread::sleep(Duration::from_millis(10));
151+
}
152+
}
153+
116154
/// Block until the background filesystem scan finishes.
117155
/// Returns `true` if scan completed, `false` on timeout.
118156
pub fn wait_for_scan(&self, timeout: Duration) -> bool {

crates/fff-nvim/Cargo.toml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,3 +105,7 @@ harness = false
105105
[[bench]]
106106
name = "query_tracker_bench"
107107
harness = false
108+
109+
[[bench]]
110+
name = "post_scan_bench"
111+
harness = false

0 commit comments

Comments
 (0)