Skip to content

Commit b005c0a

Browse files
fix: Add reasonable memap cache limits (#301)
* fix: Allow fff to work outside of git repo * chore: Update docs for - fix: Allow fff to work outside of git repo
1 parent 2ba8415 commit b005c0a

14 files changed

Lines changed: 825 additions & 130 deletions

File tree

crates/fff-c/src/lib.rs

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -403,7 +403,8 @@ pub unsafe extern "C" fn fff_live_grep(
403403
classify_definitions,
404404
};
405405

406-
let result = fff::grep::grep_search(picker.get_files(), &parsed, &options);
406+
let result =
407+
fff::grep::grep_search(picker.get_files(), &parsed, &options, picker.cache_budget());
407408
let grep_result = FffGrepResult::from_core(&result);
408409
FffResult::ok_handle(grep_result as *mut c_void)
409410
}
@@ -504,7 +505,13 @@ pub unsafe extern "C" fn fff_multi_grep(
504505
classify_definitions,
505506
};
506507

507-
let result = fff::multi_grep_search(picker.get_files(), &patterns, constraint_refs, &options);
508+
let result = fff::multi_grep_search(
509+
picker.get_files(),
510+
&patterns,
511+
constraint_refs,
512+
&options,
513+
picker.cache_budget(),
514+
);
508515
let grep_result = FffGrepResult::from_core(&result);
509516
FffResult::ok_handle(grep_result as *mut c_void)
510517
}
@@ -661,8 +668,8 @@ pub unsafe extern "C" fn fff_restart_index(
661668
Err(e) => return FffResult::err(&format!("Failed to acquire file picker lock: {}", e)),
662669
};
663670

664-
let (warmup, mode) = if let Some(mut picker) = guard.take() {
665-
let warmup = picker.warmup_mmap_cache();
671+
let (warmup_caches, mode) = if let Some(mut picker) = guard.take() {
672+
let warmup = picker.need_warmup_mmap_cache();
666673
let mode = picker.mode();
667674
picker.stop_background_monitor();
668675
(warmup, mode)
@@ -674,7 +681,7 @@ pub unsafe extern "C" fn fff_restart_index(
674681

675682
match FilePicker::new_with_shared_state(
676683
canonical_path.to_string_lossy().to_string(),
677-
warmup,
684+
warmup_caches,
678685
mode,
679686
Arc::clone(&inst.picker),
680687
Arc::clone(&inst.frecency),

crates/fff-core/src/file_picker.rs

Lines changed: 70 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ use crate::git::GitStatusCache;
3737
use crate::grep::{GrepResult, GrepSearchOptions, grep_search};
3838
use crate::query_tracker::QueryTracker;
3939
use crate::score::match_and_score_files;
40-
use crate::types::{FileItem, PaginationArgs, ScoringContext, SearchResult};
40+
use crate::types::{ContentCacheBudget, FileItem, PaginationArgs, ScoringContext, SearchResult};
4141
use crate::{SharedFrecency, SharedPicker};
4242
use fff_query_parser::FFFQuery;
4343
use git2::{Repository, Status, StatusOptions};
@@ -47,7 +47,7 @@ use std::io::Read;
4747
use std::path::{Path, PathBuf};
4848
use std::sync::{
4949
Arc,
50-
atomic::{AtomicBool, AtomicUsize, Ordering},
50+
atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering},
5151
};
5252
use std::time::SystemTime;
5353
use tracing::{Level, debug, error, info, warn};
@@ -226,6 +226,7 @@ pub struct FilePicker {
226226
warmup_mmap_cache: bool,
227227
cancelled: Arc<AtomicBool>,
228228
mode: FFFMode,
229+
pub cache_budget: Arc<ContentCacheBudget>,
229230
}
230231

231232
impl std::fmt::Debug for FilePicker {
@@ -247,14 +248,18 @@ impl FilePicker {
247248
&self.base_path
248249
}
249250

250-
pub fn warmup_mmap_cache(&self) -> bool {
251+
pub fn need_warmup_mmap_cache(&self) -> bool {
251252
self.warmup_mmap_cache
252253
}
253254

254255
pub fn mode(&self) -> FFFMode {
255256
self.mode
256257
}
257258

259+
pub fn cache_budget(&self) -> &ContentCacheBudget {
260+
&self.cache_budget
261+
}
262+
258263
pub fn git_root(&self) -> Option<&Path> {
259264
self.sync_data.git_workdir.as_deref()
260265
}
@@ -306,6 +311,7 @@ impl FilePicker {
306311
warmup_mmap_cache,
307312
cancelled: Arc::clone(&cancelled),
308313
mode,
314+
cache_budget: Arc::new(ContentCacheBudget::default()),
309315
};
310316

311317
// Place the picker into the shared handle before spawning the
@@ -423,8 +429,9 @@ impl FilePicker {
423429
files: &'a [FileItem],
424430
query: &FFFQuery<'_>,
425431
options: &GrepSearchOptions,
432+
budget: &ContentCacheBudget,
426433
) -> GrepResult<'a> {
427-
grep_search(files, query, options)
434+
grep_search(files, query, options, budget)
428435
}
429436

430437
// Returns an ongoing or finisshed scan progress
@@ -594,7 +601,7 @@ impl FilePicker {
594601
// mapping here because on linux and macos with the shared map opening it
595602
// should be automatically available everywhere automatically which saves
596603
// some time from doing extra remapping on every search
597-
file.invalidate_mmap();
604+
file.invalidate_mmap(&self.cache_budget);
598605
}
599606
}
600607

@@ -680,12 +687,15 @@ impl FilePicker {
680687
);
681688

682689
self.sync_data = sync;
690+
// Old FileItems (and their mmaps) were dropped — reset the budget.
691+
self.cache_budget.reset();
683692

684693
if self.warmup_mmap_cache {
685694
// Warmup in background to avoid blocking
686695
let files = self.sync_data.files().to_vec(); // Clone all files
696+
let budget = Arc::clone(&self.cache_budget);
687697
std::thread::spawn(move || {
688-
warmup_mmaps(&files);
698+
warmup_mmaps(&files, &budget);
689699
});
690700
}
691701
}
@@ -778,6 +788,8 @@ fn spawn_scan_and_watcher(
778788
let write_result = shared_picker.write().ok().map(|mut guard| {
779789
if let Some(ref mut picker) = *guard {
780790
picker.sync_data = sync;
791+
// Old FileItems (and their mmaps) were dropped — reset the budget.
792+
picker.cache_budget.reset();
781793
}
782794
});
783795

@@ -791,7 +803,7 @@ fn spawn_scan_and_watcher(
791803
&& let Ok(guard) = shared_picker.read()
792804
&& let Some(ref picker) = *guard
793805
{
794-
warmup_mmaps(picker.sync_data.files());
806+
warmup_mmaps(picker.sync_data.files(), &picker.cache_budget);
795807
}
796808
}
797809
Err(e) => {
@@ -844,26 +856,64 @@ fn spawn_scan_and_watcher(
844856
});
845857
}
846858

847-
/// Pre-populate mmap caches for all eligible files so the first grep search
848-
/// doesn't pay the mmap creation + page fault cost.
859+
/// Pre-populate mmap caches for the most valuable files so the first grep
860+
/// search doesn't pay the mmap creation + page fault cost.
849861
///
850-
/// Each file is mmap'd and a single byte is read to trigger the page fault.
851-
/// This runs in parallel using rayon.
862+
/// All files are collected once, then an O(n) `select_nth_unstable_by`
863+
/// partitions the top [`MAX_CACHED_CONTENT_FILES`] highest-frecency eligible
864+
/// files to the front (binary / empty files are pushed to the end by the
865+
/// comparator). The selected prefix is warmed in parallel via rayon.
866+
///
867+
/// Files beyond the budget are still available via temporary mmaps on first
868+
/// grep access, so correctness is unaffected.
852869
#[tracing::instrument(skip(files), name = "warmup_mmaps", level = Level::DEBUG)]
853-
fn warmup_mmaps(files: &[FileItem]) {
854-
let warmed = std::sync::atomic::AtomicUsize::new(0);
870+
fn warmup_mmaps(files: &[FileItem], budget: &ContentCacheBudget) {
871+
let max_files = budget.max_files;
872+
let max_bytes: u64 = 512 * 1024 * 1024;
873+
874+
// Single collect — no pre-filter. The comparator in select_nth pushes
875+
// ineligible files (binary, empty) to the tail automatically.
876+
let mut all: Vec<&FileItem> = files.iter().collect();
877+
878+
// O(n) partial sort: top max_files eligible-by-frecency files land in
879+
// all[..max_files]. Ineligible files compare as "lowest priority" so
880+
// they naturally sink past the partition boundary.
881+
if all.len() > max_files {
882+
all.select_nth_unstable_by(max_files, |a, b| {
883+
let a_ok = !a.is_binary && a.size > 0;
884+
let b_ok = !b.is_binary && b.size > 0;
885+
match (a_ok, b_ok) {
886+
(true, false) => std::cmp::Ordering::Less,
887+
(false, true) => std::cmp::Ordering::Greater,
888+
(false, false) => std::cmp::Ordering::Equal,
889+
(true, true) => b.total_frecency_score.cmp(&a.total_frecency_score),
890+
}
891+
});
892+
}
893+
894+
let to_warm = &all[..all.len().min(max_files)];
895+
896+
let warmed_bytes = AtomicU64::new(0);
897+
let budget_exhausted = AtomicBool::new(false);
855898

856-
files.par_iter().for_each(|file| {
857-
if file.is_binary || file.size == 0 {
899+
to_warm.par_iter().for_each(|file| {
900+
if budget_exhausted.load(Ordering::Relaxed) {
858901
return;
859902
}
860903

861-
if let Some(content) = file.get_mmap() {
862-
// Read the first byte to trigger the initial page fault (mmap)
863-
// or ensure the content is cached (Windows buffer).
864-
let _ = std::hint::black_box(content.first());
904+
if file.is_binary || file.size == 0 || file.size > 5 * 1024 * 1024 {
905+
return;
906+
}
865907

866-
warmed.fetch_add(1, Ordering::Relaxed);
908+
// Byte budget.
909+
let prev_bytes = warmed_bytes.fetch_add(file.size, Ordering::Relaxed);
910+
if prev_bytes + file.size > max_bytes {
911+
budget_exhausted.store(true, Ordering::Relaxed);
912+
return;
913+
}
914+
915+
if let Some(content) = file.get_mmap(budget) {
916+
let _ = std::hint::black_box(content.first());
867917
}
868918
});
869919
}

crates/fff-core/src/grep.rs

Lines changed: 22 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
88
use crate::constraints::apply_constraints;
99
use crate::sort_buffer::sort_with_buffer;
10-
use crate::types::FileItem;
10+
use crate::types::{ContentCacheBudget, FileItem};
1111
use aho_corasick::AhoCorasick;
1212
use fff_grep::lines::{self, LineStep};
1313
use fff_grep::{Searcher, SearcherBuilder, Sink, SinkMatch};
@@ -879,6 +879,7 @@ pub fn multi_grep_search<'a>(
879879
patterns: &[&str],
880880
constraints: &[fff_query_parser::Constraint<'_>],
881881
options: &GrepSearchOptions,
882+
budget: &ContentCacheBudget,
882883
) -> GrepResult<'a> {
883884
let total_files = files.len();
884885

@@ -937,6 +938,7 @@ pub fn multi_grep_search<'a>(
937938
total_files,
938939
filtered_file_count,
939940
None,
941+
budget,
940942
|file_bytes: &[u8], max_matches: usize| {
941943
let state = SinkState {
942944
file_index: 0,
@@ -1049,6 +1051,7 @@ fn run_file_search<'a, F>(
10491051
total_files: usize,
10501052
filtered_file_count: usize,
10511053
regex_fallback_error: Option<String>,
1054+
budget: &ContentCacheBudget,
10521055
search_file: F,
10531056
) -> GrepResult<'a>
10541057
where
@@ -1079,8 +1082,8 @@ where
10791082
return None;
10801083
}
10811084

1082-
let content = file.get_mmap()?;
1083-
let file_matches = search_file(content, options.max_matches_per_file);
1085+
let content = file.get_content_for_search(budget)?;
1086+
let file_matches = search_file(&content, options.max_matches_per_file);
10841087

10851088
if file_matches.is_empty() {
10861089
return None;
@@ -1261,6 +1264,7 @@ fn fuzzy_grep_search<'a>(
12611264
total_files: usize,
12621265
filtered_file_count: usize,
12631266
case_insensitive: bool,
1267+
budget: &ContentCacheBudget,
12641268
) -> GrepResult<'a> {
12651269
// max_typos controls how many *needle* characters can be unmatched.
12661270
// A transposition (e.g. "shcema" → "schema") costs ~1 typo with
@@ -1364,7 +1368,8 @@ fn fuzzy_grep_search<'a>(
13641368
return None;
13651369
}
13661370

1367-
let file_bytes = file.get_mmap()?;
1371+
let file_content = file.get_content_for_search(budget)?;
1372+
let file_bytes: &[u8] = &file_content;
13681373

13691374
// File-level prefilter: check if enough distinct needle chars
13701375
// exist anywhere in the file bytes. Uses memchr for speed.
@@ -1558,6 +1563,7 @@ pub fn grep_search<'a>(
15581563
files: &'a [FileItem],
15591564
query: &FFFQuery<'_>,
15601565
options: &GrepSearchOptions,
1566+
budget: &ContentCacheBudget,
15611567
) -> GrepResult<'a> {
15621568
let total_files = files.len();
15631569

@@ -1640,6 +1646,7 @@ pub fn grep_search<'a>(
16401646
total_files,
16411647
filtered_file_count,
16421648
case_insensitive,
1649+
budget,
16431650
);
16441651
}
16451652
GrepMode::Regex => build_regex(&grep_text, options.smart_case)
@@ -1691,6 +1698,7 @@ pub fn grep_search<'a>(
16911698
total_files,
16921699
filtered_file_count,
16931700
regex_fallback_error,
1701+
budget,
16941702
|file_bytes: &[u8], max_matches: usize| {
16951703
let state = SinkState {
16961704
file_index: 0, // set by run_file_search
@@ -1938,6 +1946,7 @@ mod tests {
19381946
&["GrepMode", "GrepMatch", "PlainTextMatcher"],
19391947
&[],
19401948
&options,
1949+
&ContentCacheBudget::zero(),
19411950
);
19421951

19431952
// Should find matches from file1 (GrepMode, GrepMatch) and file2 (PlainTextMatcher)
@@ -1969,15 +1978,22 @@ mod tests {
19691978
assert_eq!(result.files.len(), 2, "Should match exactly 2 files");
19701979

19711980
// Test with single pattern
1972-
let result2 = super::multi_grep_search(&files, &["PlainTextMatcher"], &[], &options);
1981+
let result2 = super::multi_grep_search(
1982+
&files,
1983+
&["PlainTextMatcher"],
1984+
&[],
1985+
&options,
1986+
&ContentCacheBudget::unlimited(),
1987+
);
19731988
assert_eq!(
19741989
result2.matches.len(),
19751990
1,
19761991
"Single pattern should find 1 match"
19771992
);
19781993

19791994
// Test with empty patterns
1980-
let result3 = super::multi_grep_search(&files, &[], &[], &options);
1995+
let result3 =
1996+
super::multi_grep_search(&files, &[], &[], &options, &ContentCacheBudget::unlimited());
19811997
assert_eq!(
19821998
result3.matches.len(),
19831999
0,

0 commit comments

Comments
 (0)