Skip to content

Commit eb577ea

Browse files
fix: Crash on large repos (#309)
* fix: Crash on large repos * chore: Update docs for - fix: Crash on large repos
1 parent dd56a3a commit eb577ea

30 files changed

Lines changed: 2353 additions & 464 deletions

Cargo.lock

Lines changed: 8 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

crates/fff-c/src/ffi_types.rs

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ impl From<&FileItem> for FffFileItem {
8585
git_status: cstring_new(format_git_status(item.git_status)),
8686
size: item.size,
8787
modified: item.modified,
88-
access_frecency_score: item.access_frecency_score,
89-
modification_frecency_score: item.modification_frecency_score,
90-
total_frecency_score: item.total_frecency_score,
88+
access_frecency_score: item.access_frecency_score as i64,
89+
modification_frecency_score: item.modification_frecency_score as i64,
90+
total_frecency_score: item.total_frecency_score as i64,
9191
is_binary: item.is_binary,
9292
}
9393
}
@@ -322,9 +322,9 @@ impl FffGrepMatch {
322322
context_after,
323323
size: file.size,
324324
modified: file.modified,
325-
total_frecency_score: file.total_frecency_score,
326-
access_frecency_score: file.access_frecency_score,
327-
modification_frecency_score: file.modification_frecency_score,
325+
total_frecency_score: file.total_frecency_score as i64,
326+
access_frecency_score: file.access_frecency_score as i64,
327+
modification_frecency_score: file.modification_frecency_score as i64,
328328
line_number: m.line_number,
329329
byte_offset: m.byte_offset,
330330
col: m.col as u32,

crates/fff-c/src/lib.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -403,8 +403,14 @@ pub unsafe extern "C" fn fff_live_grep(
403403
classify_definitions,
404404
};
405405

406-
let result =
407-
fff::grep::grep_search(picker.get_files(), &parsed, &options, picker.cache_budget());
406+
let result = fff::grep::grep_search(
407+
picker.get_files(),
408+
&parsed,
409+
&options,
410+
picker.cache_budget(),
411+
None,
412+
None,
413+
);
408414
let grep_result = FffGrepResult::from_core(&result);
409415
FffResult::ok_handle(grep_result as *mut c_void)
410416
}

crates/fff-core/Cargo.toml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,9 @@ crate-type = ["rlib", "staticlib", "cdylib"]
1414
default = []
1515
# Enable C FFI exports
1616
ffi = []
17+
# Call mi_collect(true) after large allocator churn (bigram build).
18+
# Requires mimalloc to be the global allocator (linked by fff-nvim).
19+
mimalloc-collect = ["dep:libmimalloc-sys"]
1720
# Use zlob (Zig-compiled C globbing library) for glob matching.
1821
# Requires Zig to be installed. When disabled, falls back to globset (pure Rust).
1922
zlob = ["dep:zlob", "fff-query-parser/zlob"]
@@ -55,6 +58,7 @@ toml = "0.8"
5558
tracing-appender = "0.2"
5659
tracing-subscriber = { version = "0.3", features = ["env-filter"] }
5760
zlob = { workspace = true, optional = true }
61+
libmimalloc-sys = { version = "0.1", optional = true, features = ["extended"] }
5862
# Platform-specific: dunce for Windows to avoid \\?\ extended path prefix
5963
[target.'cfg(windows)'.dependencies]
6064
dunce = { workspace = true }
@@ -63,3 +67,11 @@ dunce = { workspace = true }
6367
criterion = { version = "0.5", features = ["html_reports"] }
6468
rand = { version = "0.8", features = ["small_rng"] }
6569
tempfile = "3.8"
70+
71+
[[bench]]
72+
name = "parse_bench"
73+
harness = false
74+
75+
[[bench]]
76+
name = "bigram_bench"
77+
harness = false
Lines changed: 119 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,119 @@
1+
use criterion::{BenchmarkId, Criterion, black_box, criterion_group, criterion_main};
2+
use fff_search::types::{BigramFilter, BigramIndexBuilder};
3+
4+
/// Build a realistic bigram index for benchmarking.
5+
/// Simulates a large repo by generating varied content per file.
6+
fn build_test_index(file_count: usize) -> BigramFilter {
7+
let builder = BigramIndexBuilder::new(file_count);
8+
9+
for i in 0..file_count {
10+
// Generate varied content so we get a mix of sparse and dense columns
11+
let content = format!(
12+
"struct File{i} {{ fn process() {{ let controller = read(path); }} }} // module {i}"
13+
);
14+
builder.add_file_content(i, content.as_bytes());
15+
}
16+
17+
builder.compress()
18+
}
19+
20+
fn bench_bigram_query(c: &mut Criterion) {
21+
let file_counts = [10_000, 100_000, 500_000];
22+
23+
for &file_count in &file_counts {
24+
let index = build_test_index(file_count);
25+
eprintln!(
26+
"Index ({} files): {} columns ({} dense, {} sparse)",
27+
file_count,
28+
index.columns_used(),
29+
index.dense_columns(),
30+
index.sparse_columns(),
31+
);
32+
33+
let mut group = c.benchmark_group(format!("bigram_query_{file_count}"));
34+
group.sample_size(500);
35+
36+
let queries: &[(&str, &[u8])] = &[
37+
("short_2char", b"st"),
38+
("medium_6char", b"struct"),
39+
("long_14char", b"let controller"),
40+
("multi_word", b"fn process"),
41+
];
42+
43+
for (name, query) in queries {
44+
group.bench_with_input(BenchmarkId::from_parameter(name), query, |b, q| {
45+
b.iter(|| {
46+
let result = index.query(black_box(q));
47+
black_box(&result);
48+
});
49+
});
50+
}
51+
52+
group.finish();
53+
}
54+
}
55+
56+
fn bench_bigram_is_candidate(c: &mut Criterion) {
57+
let index = build_test_index(500_000);
58+
let candidates = index.query(b"struct").unwrap();
59+
60+
c.bench_function("is_candidate_500k", |b| {
61+
b.iter(|| {
62+
let mut count = 0u32;
63+
for i in 0..500_000 {
64+
if BigramFilter::is_candidate(black_box(&candidates), i) {
65+
count += 1;
66+
}
67+
}
68+
black_box(count)
69+
});
70+
});
71+
72+
c.bench_function("count_candidates_500k", |b| {
73+
b.iter(|| BigramFilter::count_candidates(black_box(&candidates)));
74+
});
75+
}
76+
77+
fn bench_bigram_build(c: &mut Criterion) {
78+
let mut group = c.benchmark_group("bigram_build");
79+
group.sample_size(10);
80+
81+
let file_counts = [10_000, 100_000];
82+
83+
for &file_count in &file_counts {
84+
// Pre-generate content so we only measure index building
85+
let contents: Vec<String> = (0..file_count)
86+
.map(|i| {
87+
format!(
88+
"struct File{i} {{ fn process() {{ let controller = read(path); }} }} // mod {i}"
89+
)
90+
})
91+
.collect();
92+
93+
group.bench_with_input(
94+
BenchmarkId::new("build_and_compress", file_count),
95+
&file_count,
96+
|b, &fc| {
97+
b.iter(|| {
98+
let builder = BigramIndexBuilder::new(fc);
99+
for (i, content) in contents.iter().enumerate() {
100+
builder.add_file_content(i, content.as_bytes());
101+
}
102+
let index = builder.compress();
103+
black_box(index.columns_used())
104+
});
105+
},
106+
);
107+
}
108+
109+
group.finish();
110+
}
111+
112+
criterion_group!(
113+
benches,
114+
bench_bigram_query,
115+
bench_bigram_is_candidate,
116+
bench_bigram_build,
117+
);
118+
119+
criterion_main!(benches);
File renamed without changes.

crates/fff-core/src/constraints.rs

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,32 @@ use smallvec::SmallVec;
1212

1313
use crate::git::is_modified_status;
1414

15+
/// Case-insensitive ASCII substring search without allocation.
16+
/// `needle` must already be lowercase.
17+
#[inline]
18+
fn contains_ascii_ci(haystack: &str, needle: &str) -> bool {
19+
let h = haystack.as_bytes();
20+
let n = needle.as_bytes();
21+
if n.len() > h.len() {
22+
return false;
23+
}
24+
if n.is_empty() {
25+
return true;
26+
}
27+
let first = n[0];
28+
for i in 0..=(h.len() - n.len()) {
29+
if h[i].to_ascii_lowercase() == first
30+
&& h[i..i + n.len()]
31+
.iter()
32+
.zip(n)
33+
.all(|(a, b)| a.to_ascii_lowercase() == *b)
34+
{
35+
return true;
36+
}
37+
}
38+
false
39+
}
40+
1541
/// Minimum item count before switching to parallel iteration with rayon.
1642
/// Below this threshold, the overhead of thread pool dispatch outweighs the benefit.
1743
const PAR_THRESHOLD: usize = 10_000;
@@ -22,9 +48,6 @@ pub trait Constrainable {
2248
/// The file's relative path (e.g. "src/main.rs")
2349
fn relative_path(&self) -> &str;
2450

25-
/// The file's lowercased relative path for case-insensitive matching
26-
fn relative_path_lower(&self) -> &str;
27-
2851
/// The file name component (e.g. "main.rs")
2952
fn file_name(&self) -> &str;
3053

@@ -152,7 +175,7 @@ fn item_matches_constraint_at_index<T: Constrainable>(
152175
}
153176

154177
// only works with negation
155-
Constraint::Text(text) => item.relative_path_lower().contains(text),
178+
Constraint::Text(text) => contains_ascii_ci(item.relative_path(), text),
156179

157180
// Parts and Exclude are handled at a higher level
158181
Constraint::Parts(_) | Constraint::Exclude(_) | Constraint::FileType(_) => true,

0 commit comments

Comments
 (0)