Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 11 additions & 7 deletions crates/fff-c/include/fff.h
Original file line number Diff line number Diff line change
Expand Up @@ -340,12 +340,14 @@ typedef struct FffMixedSearchResult {
*
* # Parameters
*
* * `base_path` – directory to index (required)
* * `frecency_db_path` – path to frecency LMDB database (NULL/empty to skip)
* * `history_db_path` – path to query history LMDB database (NULL/empty to skip)
* * `use_unsafe_no_lock` – use MDB_NOLOCK for LMDB (useful in single-process setups)
* * `warmup_mmap_cache` – pre-populate mmap caches after the initial scan
* * `ai_mode` – enable AI-agent optimizations (auto-track frecency on modifications)
* * `base_path` – directory to index (required)
* * `frecency_db_path` – path to frecency LMDB database (NULL/empty to skip)
* * `history_db_path` – path to query history LMDB database (NULL/empty to skip)
* * `use_unsafe_no_lock` – use MDB_NOLOCK for LMDB (useful in single-process setups)
* * `enable_mmap_cache` – pre-populate mmap caches after the initial scan
* * `enable_content_indexing` – build content index after the initial scan
* * `watch` – start a background file-system watcher for live updates
* * `ai_mode` – enable AI-agent optimizations (auto-track frecency on modifications)
*
* ## Safety
* String parameters must be valid null-terminated UTF-8 or NULL.
Expand All @@ -354,7 +356,9 @@ struct FffResult *fff_create_instance(const char *base_path,
const char *frecency_db_path,
const char *history_db_path,
bool use_unsafe_no_lock,
bool warmup_mmap_cache,
bool enable_mmap_cache,
bool enable_content_indexing,
bool watch,
bool ai_mode);

/**
Expand Down
38 changes: 23 additions & 15 deletions crates/fff-c/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,12 +110,14 @@ fn default_i32(val: i32, default: i32) -> i32 {
///
/// # Parameters
///
/// * `base_path` – directory to index (required)
/// * `frecency_db_path` – path to frecency LMDB database (NULL/empty to skip)
/// * `history_db_path` – path to query history LMDB database (NULL/empty to skip)
/// * `use_unsafe_no_lock` – use MDB_NOLOCK for LMDB (useful in single-process setups)
/// * `warmup_mmap_cache` – pre-populate mmap caches after the initial scan
/// * `ai_mode` – enable AI-agent optimizations (auto-track frecency on modifications)
/// * `base_path` – directory to index (required)
/// * `frecency_db_path` – path to frecency LMDB database (NULL/empty to skip)
/// * `history_db_path` – path to query history LMDB database (NULL/empty to skip)
/// * `use_unsafe_no_lock` – use MDB_NOLOCK for LMDB (useful in single-process setups)
/// * `enable_mmap_cache` – pre-populate mmap caches after the initial scan
/// * `enable_content_indexing` – build content index after the initial scan
/// * `watch` – start a background file-system watcher for live updates
/// * `ai_mode` – enable AI-agent optimizations (auto-track frecency on modifications)
///
/// ## Safety
/// String parameters must be valid null-terminated UTF-8 or NULL.
Expand All @@ -125,7 +127,9 @@ pub unsafe extern "C" fn fff_create_instance(
frecency_db_path: *const c_char,
history_db_path: *const c_char,
use_unsafe_no_lock: bool,
warmup_mmap_cache: bool,
enable_mmap_cache: bool,
enable_content_indexing: bool,
watch: bool,
ai_mode: bool,
) -> *mut FffResult {
let base_path_str = match unsafe { cstr_to_str(base_path) } {
Expand Down Expand Up @@ -186,10 +190,11 @@ pub unsafe extern "C" fn fff_create_instance(
shared_frecency.clone(),
fff::FilePickerOptions {
base_path: base_path_str,
warmup_mmap_cache,
enable_mmap_cache,
enable_content_indexing,
watch,
mode,
cache_budget: None,
..Default::default()
},
) {
return FffResult::err(&format!("Failed to init file picker: {}", e));
Expand Down Expand Up @@ -829,13 +834,15 @@ pub unsafe extern "C" fn fff_restart_index(
Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)),
};

let (warmup_caches, mode) = if let Some(mut picker) = guard.take() {
let warmup = picker.need_warmup_mmap_cache();
let (warmup_caches, content_indexing, watch, mode) = if let Some(mut picker) = guard.take() {
let warmup = picker.need_enable_mmap_cache();
let ci = picker.need_enable_content_indexing();
let w = picker.need_watch();
let mode = picker.mode();
picker.stop_background_monitor();
(warmup, mode)
(warmup, ci, w, mode)
} else {
(false, FFFMode::default())
(false, false, true, FFFMode::default())
};

drop(guard);
Expand All @@ -845,10 +852,11 @@ pub unsafe extern "C" fn fff_restart_index(
inst.frecency.clone(),
fff::FilePickerOptions {
base_path: canonical_path.to_string_lossy().to_string(),
warmup_mmap_cache: warmup_caches,
enable_mmap_cache: warmup_caches,
enable_content_indexing: content_indexing,
watch,
mode,
cache_budget: None,
..Default::default()
},
) {
Ok(()) => FffResult::ok_empty(),
Expand Down
4 changes: 3 additions & 1 deletion crates/fff-core/src/background_watcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -560,7 +560,9 @@ fn trigger_full_rescan(shared_picker: &SharedPicker, shared_frecency: &SharedFre
// Spawn background warmup + bigram rebuild (mirrors the initial scan's
// post-scan phase). The write lock is still held here but the spawned
// thread re-acquires it later — safe because the guard drops at function end.
picker.spawn_post_rescan_rebuild(shared_picker.clone());
if shared_picker.need_complex_rebuild() {
picker.spawn_post_rescan_rebuild(shared_picker.clone());
}
}

/// After registering a watch on a newly created directory, list its
Expand Down
77 changes: 50 additions & 27 deletions crates/fff-core/src/file_picker.rs
Original file line number Diff line number Diff line change
Expand Up @@ -389,21 +389,25 @@ impl FileItem {
/// Options for creating a [`FilePicker`].
pub struct FilePickerOptions {
pub base_path: String,
pub warmup_mmap_cache: bool,
/// Pre-populate mmap caches for top-frecency files after the initial scan.
pub enable_mmap_cache: bool,
/// Build content index after the initial scan for faster content-aware filtering.
pub enable_content_indexing: bool,
/// Mode of the picker impact the way file watcher events are handled and the scoring logic
pub mode: FFFMode,
/// Explicit cache budget. When `None`, the budget is auto-computed from
/// the repo size after the initial scan completes.
pub cache_budget: Option<ContentCacheBudget>,
/// When `false`, `new_with_shared_state` skips the background file watcher.
/// Files are still scanned, warmed up, and bigram-indexed.
pub watch: bool,
}

impl Default for FilePickerOptions {
fn default() -> Self {
Self {
base_path: ".".into(),
warmup_mmap_cache: false,
enable_mmap_cache: false,
enable_content_indexing: false,
mode: FFFMode::default(),
cache_budget: None,
watch: true,
Expand All @@ -421,7 +425,8 @@ pub struct FilePicker {
watcher_ready: Arc<AtomicBool>,
scanned_files_count: Arc<AtomicUsize>,
background_watcher: Option<BackgroundWatcher>,
warmup_mmap_cache: bool,
enable_mmap_cache: bool,
enable_content_indexing: bool,
watch: bool,
cancelled: Arc<AtomicBool>,
// This is a soft lock that we use to prevent rescan be triggered while the
Expand Down Expand Up @@ -479,8 +484,16 @@ impl FilePicker {
.and_then(|p| p.to_str())
}

pub fn need_warmup_mmap_cache(&self) -> bool {
self.warmup_mmap_cache
pub fn need_enable_mmap_cache(&self) -> bool {
self.enable_mmap_cache
}

pub fn need_enable_content_indexing(&self) -> bool {
self.enable_content_indexing
}

pub fn need_watch(&self) -> bool {
self.watch
}

pub fn mode(&self) -> FFFMode {
Expand Down Expand Up @@ -632,7 +645,8 @@ impl FilePicker {
post_scan_busy: Arc::new(AtomicBool::new(false)),
scanned_files_count: Arc::new(AtomicUsize::new(0)),
sync_data: FileSync::new(),
warmup_mmap_cache: options.warmup_mmap_cache,
enable_mmap_cache: options.enable_mmap_cache,
enable_content_indexing: options.enable_content_indexing,
watch: options.watch,
watcher_ready: Arc::new(AtomicBool::new(false)),
})
Expand All @@ -648,13 +662,15 @@ impl FilePicker {
let picker = Self::new(options)?;

info!(
"Spawning background threads: base_path={}, warmup={}, mode={:?}",
"Spawning background threads: base_path={}, warmup={}, content_indexing={}, mode={:?}",
picker.base_path.display(),
picker.warmup_mmap_cache,
picker.enable_mmap_cache,
picker.enable_content_indexing,
picker.mode,
);

let warmup = picker.warmup_mmap_cache;
let warmup = picker.enable_mmap_cache;
let content_indexing = picker.enable_content_indexing;
let watch = picker.watch;
let mode = picker.mode;

Expand All @@ -678,6 +694,7 @@ impl FilePicker {
watcher_ready,
synced_files_count,
warmup,
content_indexing,
watch,
mode,
shared_picker,
Expand Down Expand Up @@ -1400,13 +1417,15 @@ impl FilePicker {

/// Spawn a background thread to rebuild the bigram index after rescan.
pub(crate) fn spawn_post_rescan_rebuild(&self, shared_picker: SharedPicker) -> bool {
if !self.warmup_mmap_cache || self.cancelled.load(Ordering::Relaxed) {
if self.cancelled.load(Ordering::Relaxed) {
return false;
}

let post_scan_busy = Arc::clone(&self.post_scan_busy);
let cancelled = Arc::clone(&self.cancelled);
let auto_budget = !self.has_explicit_cache_budget;
let do_warmup = self.enable_mmap_cache;
let do_content_indexing = self.enable_content_indexing;

post_scan_busy.store(true, Ordering::Release);

Expand Down Expand Up @@ -1450,7 +1469,7 @@ impl FilePicker {

if let Some((files, budget, bp, arena)) = files_snapshot {
// Warmup mmap caches.
if !cancelled.load(Ordering::Acquire) {
if do_warmup && !cancelled.load(Ordering::Acquire) {
let t = std::time::Instant::now();
warmup_mmaps(files, &budget, &bp, arena);
info!(
Expand All @@ -1462,7 +1481,7 @@ impl FilePicker {
}

// Build bigram index (lock-free).
if !cancelled.load(Ordering::Acquire) {
if do_content_indexing && !cancelled.load(Ordering::Acquire) {
let t = std::time::Instant::now();
info!(
"Rescan: starting bigram index build for {} files...",
Expand Down Expand Up @@ -1495,8 +1514,10 @@ impl FilePicker {

post_scan_busy.store(false, Ordering::Release);
info!(
"Rescan post-scan warmup + bigram total: {:.2}s",
"Rescan post-scan phase total: {:.2}s (warmup={}, content_indexing={})",
phase_start.elapsed().as_secs_f64(),
do_warmup,
do_content_indexing,
);
});

Expand Down Expand Up @@ -1617,7 +1638,8 @@ fn spawn_scan_and_watcher(
scan_signal: Arc<AtomicBool>,
watcher_ready: Arc<AtomicBool>,
synced_files_count: Arc<AtomicUsize>,
warmup_mmap_cache: bool,
enable_mmap_cache: bool,
enable_content_indexing: bool,
watch: bool,
mode: FFFMode,
shared_picker: SharedPicker,
Expand Down Expand Up @@ -1725,7 +1747,10 @@ fn spawn_scan_and_watcher(

watcher_ready.store(true, Ordering::Release);

if warmup_mmap_cache && !cancelled.load(Ordering::Acquire) {
let need_post_scan =
(enable_mmap_cache || enable_content_indexing) && !cancelled.load(Ordering::Acquire);

if need_post_scan {
post_scan_busy.store(true, Ordering::Release);
let phase_start = std::time::Instant::now();

Expand Down Expand Up @@ -1768,9 +1793,11 @@ fn spawn_scan_and_watcher(
None
};

// both of this is using a custom soft lock not guaranteed by compiler
// this is required to keep the picker functioning if someone opened a really crazy
// e.g 10m files directory but potentially unsafe
if let Some((files, budget, arena)) = files_snapshot {
// Warmup: populate mmap caches for top-frecency files.
if !cancelled.load(Ordering::Acquire) {
if enable_mmap_cache && !cancelled.load(Ordering::Acquire) {
let warmup_start = std::time::Instant::now();
warmup_mmaps(files, &budget, &base_path, arena);
info!(
Expand All @@ -1781,16 +1808,9 @@ fn spawn_scan_and_watcher(
);
}

// Build bigram index — entirely lock-free.
if !cancelled.load(Ordering::Acquire) {
let bigram_start = std::time::Instant::now();
info!("Starting bigram index build for {} files...", files.len());
if enable_content_indexing && !cancelled.load(Ordering::Acquire) {
let (index, content_binary) =
build_bigram_index(files, &budget, &base_path, arena);
info!(
"Bigram index ready in {:.2}s",
bigram_start.elapsed().as_secs_f64(),
);

if let Ok(mut guard) = shared_picker.write()
&& let Some(ref mut picker) = *guard
Expand All @@ -1813,8 +1833,10 @@ fn spawn_scan_and_watcher(
post_scan_busy.store(false, Ordering::Release);

info!(
"Post-scan warmup + bigram total: {:.2}s",
"Post-scan phase total: {:.2}s (warmup={}, content_indexing={})",
phase_start.elapsed().as_secs_f64(),
enable_mmap_cache,
enable_content_indexing,
);
}

Expand Down Expand Up @@ -1897,6 +1919,7 @@ pub(crate) fn warmup_mmaps(
/// so reading further adds no new information to the index.
pub const BIGRAM_CONTENT_CAP: usize = 64 * 1024;

#[tracing::instrument(skip_all, name = "Building Bigram Index", level = Level::DEBUG)]
pub(crate) fn build_bigram_index(
files: &[FileItem],
budget: &ContentCacheBudget,
Expand Down
9 changes: 9 additions & 0 deletions crates/fff-core/src/shared.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,15 @@ impl SharedPicker {
Ok(self.0.write())
}

/// Return `true` if this is an instance of the picker that requires a complicated post-scan
/// indexing/cache warmup job. The indexing is not crazy but it takes time.
pub fn need_complex_rebuild(&self) -> bool {
let guard = self.0.read();
guard
.as_ref()
.is_some_and(|p| p.need_enable_mmap_cache() || p.need_enable_content_indexing())
}

/// Block until the background filesystem scan finishes.
/// Returns `true` if scan completed, `false` on timeout.
pub fn wait_for_scan(&self, timeout: Duration) -> bool {
Expand Down
3 changes: 2 additions & 1 deletion crates/fff-core/tests/bigram_overlay_coherence_test.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1435,7 +1435,8 @@ fn make_picker(base: &Path) -> (SharedPicker, SharedFrecency) {
shared_frecency.clone(),
FilePickerOptions {
base_path: base.to_string_lossy().to_string(),
warmup_mmap_cache: true,
enable_mmap_cache: true,
enable_content_indexing: true,
mode: FFFMode::Neovim,
watch: false, // we drive events manually
..Default::default()
Expand Down
Loading