Skip to content

Commit 8f8df87

Browse files
committed
fix: Parallelize git & indexing
1 parent 6314d85 commit 8f8df87

6 files changed

Lines changed: 80 additions & 40 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/fff-core/src/file_picker.rs

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1233,9 +1233,12 @@ impl FilePicker {
12331233
}
12341234
}
12351235

1236-
pub(crate) fn set_bigram_index(&mut self, index: BigramFilter, overlay: BigramOverlay) {
1236+
pub(crate) fn set_bigram_index(&mut self, index: BigramFilter) {
12371237
self.sync_data.bigram_index = Some(Arc::new(index));
1238-
self.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(overlay)));
1238+
// once the index is reset automatically reset the overaly
1239+
self.sync_data.bigram_overlay = Some(Arc::new(parking_lot::RwLock::new(
1240+
BigramOverlay::new(self.sync_data.indexable_count),
1241+
)));
12391242
}
12401243

12411244
pub(crate) fn scan_signals(&self) -> crate::scan::ScanSignals {
@@ -1720,14 +1723,15 @@ impl FileSync {
17201723
std::thread::spawn(move || {
17211724
GitStatusCache::read_git_status(
17221725
Some(git_workdir.as_path()),
1723-
&mut crate::git::default_status_options(),
1726+
&mut crate::git::initial_scan_status_options(),
17241727
)
17251728
})
17261729
}
17271730

17281731
/// Returns files immediately (searchable) and a handle to the in-progress
17291732
/// git status computation. This avoids blocking on `git status` which can
17301733
/// take 10+ seconds on very large repos (e.g. chromium).
1734+
#[tracing::instrument(skip_all, name = "walk_filesystem", level = Level::INFO)]
17311735
pub(crate) fn walk_filesystem(
17321736
base_path: &Path,
17331737
git_workdir: Option<PathBuf>,
@@ -1767,6 +1771,7 @@ impl FileSync {
17671771
// no chunking, no HashMap, just Vec::push under the Mutex.
17681772
let pairs = parking_lot::Mutex::new(Vec::<(FileItem, String)>::new());
17691773

1774+
let walker_span = tracing::info_span!("walker_run").entered();
17701775
walker.run(|| {
17711776
let pairs = &pairs;
17721777
let counter = Arc::clone(synced_files_count);
@@ -1800,6 +1805,7 @@ impl FileSync {
18001805
ignore::WalkState::Continue
18011806
})
18021807
});
1808+
drop(walker_span);
18031809

18041810
let mut pairs = pairs.into_inner();
18051811
info!(

crates/fff-core/src/git.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,20 @@ pub(crate) fn default_status_options() -> StatusOptions {
1515
opts
1616
}
1717

18+
/// Status options for the initial scan / rescan.
19+
///
20+
/// Skips `include_unmodified` because every `FileItem` starts with
21+
/// `git_status: None` (== clean), so a missing cache entry already means
22+
/// "clean" — no need to ask libgit2 to enumerate every tracked path.
23+
/// Saves seconds on huge dirty trees (e.g. chromium with 400k+ entries).
24+
pub(crate) fn initial_scan_status_options() -> StatusOptions {
25+
let mut opts = StatusOptions::new();
26+
opts.include_untracked(true)
27+
.recurse_untracked_dirs(true)
28+
.exclude_submodules(true);
29+
opts
30+
}
31+
1832
#[derive(Debug, Clone, Default)]
1933
pub(crate) struct GitStatusCache(AHashMap<PathBuf, Status>);
2034

crates/fff-core/src/scan.rs

Lines changed: 31 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ use std::path::PathBuf;
2727
use std::sync::Arc;
2828
use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering};
2929

30+
use libc::unsetenv;
3031
use tracing::{error, info};
3132

3233
use crate::FileSync;
@@ -58,7 +59,7 @@ pub(crate) struct ScanSignals {
5859
}
5960

6061
/// Which optional phases a scan should run.
61-
#[derive(Clone, Copy, Default)]
62+
#[derive(Clone, Copy, Default, Debug)]
6263
pub(crate) struct ScanConfig {
6364
pub(crate) warmup: bool,
6465
pub(crate) content_indexing: bool,
@@ -232,26 +233,26 @@ impl ScanJob {
232233
None
233234
};
234235

235-
// 3. Apply git status + frecency off-lock (joins the git thread).
236-
// Done BEFORE the bigram/warmup pass so `FileItem::git_status` is
237-
// populated when the indexer decides which files to pin in the
238-
// mmap cache (dirty files stay hot in the page cache on their own).
239-
if !signals.cancelled.load(Ordering::Acquire)
240-
&& let Some(status_handle) = status_handle
241-
&& let Some(snap) = snapshot.as_mut()
242-
{
243-
apply_git_status_and_frecency(&shared_frecency, status_handle, mode, snap);
244-
}
245-
246-
// 4. Post-scan warmup + bigram build.
236+
// 3. Post-scan warmup + bigram build — runs in parallel with the
237+
// git-status thread to overlap the two expensive phases.
247238
if (config.warmup || config.content_indexing)
248239
&& !signals.cancelled.load(Ordering::Acquire)
249240
&& let Some(snap) = snapshot.as_ref()
250241
{
251242
Self::run_post_scan(&shared_picker, &signals, &config, snap);
252243
}
253244

254-
drop(snapshot);
245+
// 4. Join and git status, this HAS to be done after the post scan
246+
if !signals.cancelled.load(Ordering::Acquire)
247+
&& let Some(status_handle) = status_handle
248+
&& let Some(snapshot) = snapshot.as_mut()
249+
{
250+
if let Ok(Some(git_status)) = status_handle.join() {
251+
apply_git_status_and_frecency(git_status, &shared_frecency, mode, snapshot);
252+
}
253+
}
254+
255+
drop(snapshot); // SNAPSHOT SHOULD NOT BE USED AFTER THIS POINT
255256

256257
// 5. Install filesystem watcher (initial scan only).
257258
if config.install_watcher && config.watch && !signals.cancelled.load(Ordering::Acquire) {
@@ -300,7 +301,7 @@ impl ScanJob {
300301
}
301302
}
302303

303-
#[tracing::instrument(skip_all)]
304+
#[tracing::instrument(skip_all, fields(warmup = ?config.warmup, indexing = ?config.content_indexing))]
304305
fn run_post_scan(
305306
shared_picker: &SharedFilePicker,
306307
signals: &ScanSignals,
@@ -314,24 +315,27 @@ impl ScanJob {
314315
.unwrap_or(ArenaPtr::null());
315316
let budget: &ContentCacheBudget = &unsafe_snapshot.budget;
316317
let files: &[crate::types::FileItem] = &unsafe_snapshot.files[..unsafe_snapshot.base_count];
318+
if !signals.cancelled.load(Ordering::Acquire) {
319+
return;
320+
}
317321

318322
// unified bigram and warmup_mmaps in one go, it's important to reuse open files as much as possible
319-
if config.content_indexing && !signals.cancelled.load(Ordering::Acquire) {
323+
if config.content_indexing {
320324
let indexable_files = &files[..unsafe_snapshot.indexable_count.min(files.len())];
321325
let index = build_bigram_index(
322326
indexable_files,
323327
budget,
324328
&unsafe_snapshot.base_path,
325329
arena,
326-
config.warmup,
330+
config.warmup, // can be optionally skipped
327331
);
328332

329333
if let Ok(mut guard) = shared_picker.write()
330334
&& let Some(picker) = guard.as_mut()
331335
{
332-
picker.set_bigram_index(index, BigramOverlay::new(unsafe_snapshot.indexable_count));
336+
picker.set_bigram_index(index);
333337
}
334-
} else if config.warmup && !signals.cancelled.load(Ordering::Acquire) {
338+
} else if config.warmup {
335339
// Warmup-only: no bigram indexing, just fill the mmap cache.
336340
warmup_mmaps(files, budget, &unsafe_snapshot.base_path, arena);
337341
}
@@ -386,21 +390,17 @@ fn rescubscribe_watcher_post_scan(shared_picker: &SharedFilePicker) {
386390
});
387391
}
388392

393+
#[tracing::instrument(
394+
level = "debug",
395+
skip_all,
396+
fields(file_count = tracing::field::Empty, dirty_count = tracing::field::Empty),
397+
)]
389398
fn apply_git_status_and_frecency(
399+
git_cache: GitStatusCache,
390400
shared_frecency: &SharedFrecency,
391-
git_handle: std::thread::JoinHandle<Option<GitStatusCache>>,
392401
mode: FFFMode,
393402
unsafe_snapshot: &mut crate::file_picker::PostScanUnsafeSnapshot,
394403
) {
395-
let git_cache = match git_handle.join() {
396-
Ok(Some(cache)) => cache,
397-
Ok(None) => return,
398-
Err(_) => {
399-
error!("Git status thread panicked");
400-
return;
401-
}
402-
};
403-
404404
let frecency = shared_frecency.read().ok();
405405
let frecency_ref = frecency.as_ref().and_then(|f| f.as_ref());
406406

@@ -445,11 +445,7 @@ fn apply_git_status_and_frecency(
445445
}
446446
});
447447
});
448-
drop(frecency);
449448

450-
info!(
451-
"SCAN: Applied git status to {} files ({} dirty)",
452-
unsafe_snapshot.base_count,
453-
git_cache.statuses_len(),
454-
);
449+
let span = tracing::Span::current();
450+
span.record("dirty_count", git_cache.statuses_len());
455451
}

crates/fff-nvim/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ smartstring = { version = "1.0.1", features = ["serde"] }
9393
criterion = { version = "0.5", features = ["html_reports"] }
9494
rand = { version = "0.8", features = ["small_rng"] }
9595
tempfile = "3.8"
96+
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
9697

9798
[[bench]]
9899
name = "fuzzy_search"

crates/fff-nvim/benches/scan_bench.rs

Lines changed: 24 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,30 @@ use criterion::{Criterion, criterion_group, criterion_main};
33
use fff::file_picker::{FFFMode, FilePicker};
44
use fff::{FilePickerOptions, SharedFilePicker, SharedFrecency};
55
use std::path::PathBuf;
6+
use std::sync::Once;
67
use std::time::{Duration, Instant};
78

89
const WAIT_TIMEOUT: Duration = Duration::from_secs(300);
910

11+
static TRACING_INIT: Once = Once::new();
12+
13+
fn init_tracing() {
14+
TRACING_INIT.call_once(|| {
15+
use tracing_subscriber::EnvFilter;
16+
use tracing_subscriber::fmt::format::FmtSpan;
17+
18+
let _ = tracing_subscriber::fmt()
19+
.with_env_filter(
20+
EnvFilter::try_from_default_env()
21+
.unwrap_or_else(|_| EnvFilter::new("warn,fff_search=info")),
22+
)
23+
.with_span_events(FmtSpan::CLOSE)
24+
.with_target(true)
25+
.with_writer(std::io::stderr)
26+
.try_init();
27+
});
28+
}
29+
1030
fn resolve_repo() -> Option<PathBuf> {
1131
if let Ok(env_path) = std::env::var("FFF_BENCH_REPO") {
1232
let p = PathBuf::from(env_path);
@@ -15,8 +35,7 @@ fn resolve_repo() -> Option<PathBuf> {
1535
}
1636
}
1737
// Resolve relative to the workspace root (two levels up from this crate).
18-
let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR"))
19-
.join("../..");
38+
let workspace_root = PathBuf::from(env!("CARGO_MANIFEST_DIR")).join("../..");
2039
let default = workspace_root.join("big-repo");
2140
if default.exists() {
2241
return fff::path_utils::canonicalize(&default).ok();
@@ -107,6 +126,7 @@ fn cleanup(sp: SharedFilePicker) {
107126
}
108127

109128
fn bench_full_init(c: &mut Criterion) {
129+
init_tracing();
110130
let Some(repo) = resolve_repo() else {
111131
eprintln!("skip: set FFF_BENCH_REPO or clone a repo to ./big-repo");
112132
return;
@@ -136,6 +156,7 @@ fn bench_full_init(c: &mut Criterion) {
136156
}
137157

138158
fn bench_post_scan_only(c: &mut Criterion) {
159+
init_tracing();
139160
let Some(repo) = resolve_repo() else {
140161
return;
141162
};
@@ -164,6 +185,7 @@ fn bench_post_scan_only(c: &mut Criterion) {
164185
}
165186

166187
fn bench_walk_only(c: &mut Criterion) {
188+
init_tracing();
167189
let Some(repo) = resolve_repo() else {
168190
return;
169191
};

0 commit comments

Comments
 (0)