Skip to content

Commit 3a23558

Browse files
committed
perf: Optimize filename bonus scoring
1 parent e83b137 commit 3a23558

5 files changed

Lines changed: 48 additions & 133 deletions

File tree

Cargo.lock

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ mimalloc = "0.1.47"
3535
zlob = "1.3.0"
3636

3737
mlua = { version = "0.11.1", features = ["module", "luajit"] }
38-
neo_frizbee = "0.8.2"
38+
neo_frizbee = { version = "0.8.3", features = ["match_end_col"] }
3939
notify = "8.1.0"
4040
notify-debouncer-full = "0.7"
4141
once_cell = "1.20.2"

big-repo

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
/Users/neogoose/dev/chromium

crates/fff-core/src/score.rs

Lines changed: 44 additions & 94 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@ use crate::{
22
constraints::apply_constraints,
33
git::is_modified_status,
44
path_utils::calculate_distance_penalty,
5-
sort_buffer::{sort_by_key_with_buffer, sort_with_buffer},
5+
sort_buffer::sort_with_buffer,
66
types::{FileItem, Score, ScoringContext},
77
};
88
use fff_query_parser::FuzzyQuery;
@@ -34,14 +34,6 @@ impl<'a> FileItems<'a> {
3434
self.len() == 0
3535
}
3636

37-
#[inline]
38-
fn get(&self, index: usize) -> Option<&'a FileItem> {
39-
match self {
40-
FileItems::All(s) => s.get(index),
41-
FileItems::Filtered(v) => v.get(index).copied(),
42-
}
43-
}
44-
4537
/// Build the haystack of relative paths (original casing) for fuzzy matching.
4638
/// neo_frizbee lowercases internally for comparison but preserves original casing
4739
/// for capitalization_bonus and matching_case_bonus scoring.
@@ -70,6 +62,7 @@ fn match_fuzzy_parts(
7062
fuzzy_parts: &[&str],
7163
working_files: &FileItems<'_>,
7264
options: &neo_frizbee::Config,
65+
max_threads: usize,
7366
) -> Vec<neo_frizbee::Match> {
7467
if fuzzy_parts.is_empty() {
7568
return vec![];
@@ -90,13 +83,15 @@ fn match_fuzzy_parts(
9083
}
9184

9285
if valid_parts.len() == 1 {
93-
let matches = neo_frizbee::match_list(valid_parts[0], &haystack, options);
86+
let matches =
87+
neo_frizbee::match_list_parallel(valid_parts[0], &haystack, options, max_threads);
9488
return matches;
9589
}
9690

9791
// Multiple parts - match first part, then filter by remaining parts
9892
// TODO figure out if we can move this logic to my frizbee fork at least
99-
let mut matches = neo_frizbee::match_list(valid_parts[0], &haystack, options);
93+
let mut matches =
94+
neo_frizbee::match_list_parallel(valid_parts[0], &haystack, options, max_threads);
10095
for part in valid_parts[1..].iter() {
10196
let mut part_options = *options;
10297
part_options.max_typos = options.max_typos.map(|t| t.min(part.len() as u16));
@@ -176,53 +171,17 @@ pub fn match_and_score_files<'a>(
176171
},
177172
};
178173

179-
let path_matches = match_fuzzy_parts(fuzzy_parts, &working_files, &options);
180-
let primary_text = fuzzy_parts[0]; // Use first part for filename matching
181-
let haystack_of_filenames: Vec<&str> = path_matches
182-
.iter()
183-
.filter_map(|m| {
184-
working_files
185-
.get(m.index as usize)
186-
.map(|f| f.file_name.as_str())
187-
})
188-
.collect();
189-
190-
// if there is a / in the query we don't even match filenames
191-
let filename_matches = if query_contains_path_separator {
192-
vec![]
193-
} else {
194-
// Use parallel matching only if we have enough filenames to justify overhead
195-
// Sequential matching is faster for small result sets (< 1000 matches)
196-
let mut list = if haystack_of_filenames.len() > 1000 {
197-
neo_frizbee::match_list_parallel(
198-
primary_text,
199-
&haystack_of_filenames,
200-
&options,
201-
context.max_threads,
202-
)
203-
} else {
204-
neo_frizbee::match_list(primary_text, &haystack_of_filenames, &options)
205-
};
206-
207-
// Sequential sort is faster for small lists
208-
if list.len() > 1000 {
209-
list.par_sort_unstable_by_key(|m| m.index);
210-
} else {
211-
sort_by_key_with_buffer(&mut list, |m| m.index);
212-
}
213-
214-
list
215-
};
174+
let path_matches =
175+
match_fuzzy_parts(fuzzy_parts, &working_files, &options, context.max_threads);
176+
let needle_len = fuzzy_parts[0].len() as u16;
216177

217-
let mut next_filename_match_index = 0;
218178
let results: Vec<_> = path_matches
219179
.into_iter()
220-
.enumerate()
221-
.map(|(index, path_match)| {
180+
.map(|path_match| {
222181
let file_idx = path_match.index as usize;
223182
let file = working_files.index(file_idx);
224183

225-
let mut base_score = path_match.score as i32;
184+
let base_score = path_match.score as i32;
226185
let frecency_boost = base_score.saturating_mul(file.total_frecency_score) / 100;
227186

228187
// Give modified/dirty files a 15% boost to make them appear higher in results
@@ -235,45 +194,33 @@ pub fn match_and_score_files<'a>(
235194
let distance_penalty =
236195
calculate_distance_penalty(context.current_file, &file.relative_path);
237196

238-
let filename_match = filename_matches
239-
.get(next_filename_match_index)
240-
.and_then(|m| {
241-
if m.index == index as u32 {
242-
next_filename_match_index += 1;
243-
Some(m)
244-
} else {
245-
None
246-
}
247-
});
197+
// Detect filename match using match_end_col from the SIMD pass.
198+
// Approximate match start = end_col - needle_len + 1.
199+
// If this falls within the filename region, it's a filename match.
200+
let filename_start = (file.relative_path.len() - file.file_name.len()) as u16;
201+
let match_start_approx = path_match.match_end_col.saturating_sub(needle_len - 1);
202+
let is_filename_match =
203+
!query_contains_path_separator && match_start_approx >= filename_start;
204+
let is_exact_filename = path_match.exact && is_filename_match;
248205

249206
let mut has_special_filename_bonus = false;
250-
let filename_bonus = match filename_match {
251-
Some(filename_match) if filename_match.exact => {
252-
filename_match.score as i32 / 5 * 2 // 40% bonus for exact filename match
253-
}
254-
// 16% bonus for fuzzy filename match but only if the score of matched path is
255-
// equal or greater than the score of matched filename, thus we are not allowing
256-
// typoed filename to score higher than the path match
257-
Some(filename_match)
258-
if filename_match.score >= path_match.score
259-
&& !query_contains_path_separator =>
260-
{
261-
base_score = filename_match.score as i32;
262-
263-
(base_score / 6)
264-
// for large queries around ~300 score the bonus is too big
265-
// it might lead to situations when much more fitting path with a larger
266-
// base score getting filtered out by combination of score + filename bonus
267-
// so we cap it at 10% of the roughly largest score you can get
268-
.min(30)
269-
}
270-
// 5% bonus for special file but not as much as file name to avoid sitatuions
207+
let filename_bonus = if is_exact_filename {
208+
base_score / 5 * 2 // 40% bonus for exact filename match
209+
} else if is_filename_match {
210+
// 16% bonus for fuzzy filename match that landed in the filename region
211+
(base_score / 6)
212+
// for large queries around ~300 score the bonus is too big
213+
// it might lead to situations when much more fitting path with a larger
214+
// base score getting filtered out by combination of score + filename bonus
215+
// so we cap it at 10% of the roughly largest score you can get
216+
.min(30)
217+
} else if !is_filename_match && is_special_entry_point_file(&file.file_name) {
218+
// 5% bonus for special file but not as much as file name to avoid situations
271219
// when you have /user_service/server.rs and /user_service/server/mod.rs
272-
None if is_special_entry_point_file(&file.file_name) => {
273-
has_special_filename_bonus = true;
274-
base_score * 5 / 100
275-
}
276-
_ => 0,
220+
has_special_filename_bonus = true;
221+
base_score * 5 / 100
222+
} else {
223+
0
277224
};
278225

279226
let current_file_penalty = calculate_current_file_penalty(file, base_score, context);
@@ -319,12 +266,15 @@ pub fn match_and_score_files<'a>(
319266
git_status_boost,
320267
distance_penalty,
321268
combo_match_boost,
322-
exact_match: path_match.exact || filename_match.is_some_and(|m| m.exact),
323-
match_type: match filename_match {
324-
Some(filename_match) if filename_match.exact => "exact_filename",
325-
Some(_) => "fuzzy_filename",
326-
None if path_match.exact => "exact_path",
327-
None => "fuzzy_path",
269+
exact_match: path_match.exact || is_exact_filename,
270+
match_type: if is_exact_filename {
271+
"exact_filename"
272+
} else if is_filename_match {
273+
"fuzzy_filename"
274+
} else if path_match.exact {
275+
"exact_path"
276+
} else {
277+
"fuzzy_path"
328278
},
329279
};
330280

crates/fff-core/src/sort_buffer.rs

Lines changed: 0 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -40,35 +40,6 @@ where
4040
});
4141
}
4242

43-
pub fn sort_by_key_with_buffer<T, K, F>(slice: &mut [T], key_fn: F)
44-
where
45-
K: Ord,
46-
F: FnMut(&T) -> K,
47-
{
48-
SORT_BUFFER.with(|buffer| {
49-
let mut buffer = buffer.borrow_mut();
50-
51-
// Calculate required buffer size in u8 units
52-
let size_of_t = std::mem::size_of::<MaybeUninit<T>>();
53-
let size_of_usize = std::mem::size_of::<u8>();
54-
let required_usizes = (slice.len() * size_of_t).div_ceil(size_of_usize);
55-
56-
// Ensure buffer has enough capacity
57-
if buffer.len() < required_usizes {
58-
buffer.resize(required_usizes, 0);
59-
}
60-
61-
// Cast u8 buffer to MaybeUninit<T> slice
62-
// SAFETY: u8 provides sufficient alignment for most types, and we've ensured
63-
// the buffer is large enough
64-
let typed_buffer = unsafe {
65-
std::slice::from_raw_parts_mut(buffer.as_mut_ptr() as *mut MaybeUninit<T>, slice.len())
66-
};
67-
68-
glidesort::sort_with_buffer_by_key(slice, typed_buffer, key_fn);
69-
});
70-
}
71-
7243
#[cfg(test)]
7344
mod tests {
7445
use super::*;
@@ -80,13 +51,6 @@ mod tests {
8051
assert_eq!(data, vec![1, 2, 5, 8, 9]);
8152
}
8253

83-
#[test]
84-
fn test_sort_by_key_with_buffer() {
85-
let mut data = vec![(2, "b"), (1, "a"), (3, "c")];
86-
sort_by_key_with_buffer(&mut data, |item| item.0);
87-
assert_eq!(data, vec![(1, "a"), (2, "b"), (3, "c")]);
88-
}
89-
9054
#[test]
9155
fn test_reverse_sort() {
9256
let mut data = vec![1, 2, 3, 4, 5];

0 commit comments

Comments
 (0)