Skip to content

Commit 1db36c7

Browse files
committed
gix-blame: prefilter blame walk with changed-path bloom cache
Use the new changed-path bloom filters from the commit graph to greatly speed up blame our implementation. Whenever we find a rejection on the bloom filter for the current path, we skip it altogether and pass the blame without diffing the trees.
1 parent 0b9280f commit 1db36c7

3 files changed

Lines changed: 135 additions & 1 deletion

File tree

gix-blame/src/file/function.rs

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -173,6 +173,20 @@ pub fn incremental(
173173
continue;
174174
}
175175

176+
let first_parent_bloom_result =
177+
maybe_changed_path_in_bloom_filter(cache, suspect, current_file_path.as_ref(), &mut stats);
178+
179+
if first_parent_bloom_result == Some(false) {
180+
let (first_parent_id, first_parent_commit_time) = parent_ids[0];
181+
pass_blame_from_to(suspect, first_parent_id, &mut hunks_to_blame);
182+
queue.insert(first_parent_commit_time, first_parent_id);
183+
previous_entry = previous_entry
184+
.take()
185+
.filter(|(id, _)| *id == suspect)
186+
.map(|(_, entry)| (first_parent_id, entry));
187+
continue 'outer;
188+
}
189+
176190
let mut entry = previous_entry
177191
.take()
178192
.filter(|(id, _)| *id == suspect)
@@ -269,6 +283,9 @@ pub fn incremental(
269283
options.rewrites,
270284
)?;
271285
let Some(modification) = changes_for_file_path else {
286+
if index == 0 && first_parent_bloom_result == Some(true) {
287+
stats.bloom_false_positive += 1;
288+
}
272289
if more_than_one_parent {
273290
// None of the changes affected the file we’re currently blaming.
274291
// Copy blame to parent.
@@ -910,6 +927,33 @@ fn collect_parents(
910927
Ok(parent_ids)
911928
}
912929

930+
fn maybe_changed_path_in_bloom_filter(
931+
cache: Option<&gix_commitgraph::Graph>,
932+
commit_id: ObjectId,
933+
path: &BStr,
934+
stats: &mut Statistics,
935+
) -> Option<bool> {
936+
if let Some(cache) = cache {
937+
let result = cache.maybe_contains_path_by_id(commit_id, path);
938+
match result {
939+
Some(false) => {
940+
stats.bloom_queries += 1;
941+
stats.bloom_definitely_not += 1;
942+
}
943+
Some(true) => {
944+
stats.bloom_queries += 1;
945+
stats.bloom_maybe += 1;
946+
}
947+
None => {
948+
stats.bloom_filter_not_present += 1;
949+
}
950+
}
951+
result
952+
} else {
953+
None
954+
}
955+
}
956+
913957
/// Return an iterator over tokens for use in diffing. These are usually lines, but it's important
914958
/// to unify them so the later access shows the right thing.
915959
pub(crate) fn tokens_for_diffing(data: &[u8]) -> impl TokenSource<Token = &[u8]> {

gix-blame/src/types.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -250,6 +250,16 @@ pub struct Statistics {
250250
/// The amount of blobs there were compared to each other to learn what changed between commits.
251251
/// Note that in order to diff a blob, one needs to load both versions from the database.
252252
pub blobs_diffed: usize,
253+
/// Number of times changed-path Bloom filters were queried successfully.
254+
pub bloom_queries: usize,
255+
/// Number of queries where Bloom filters were unavailable for the commit.
256+
pub bloom_filter_not_present: usize,
257+
/// Number of Bloom answers that were `maybe`.
258+
pub bloom_maybe: usize,
259+
/// Number of Bloom answers that were `definitely not`.
260+
pub bloom_definitely_not: usize,
261+
/// Number of `maybe` Bloom answers that turned out not to affect the path.
262+
pub bloom_false_positive: usize,
253263
}
254264

255265
impl Outcome {

gix-blame/tests/blame.rs

Lines changed: 81 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,8 @@
1-
use std::{collections::BTreeMap, path::PathBuf};
1+
use std::{
2+
collections::BTreeMap,
3+
path::{Path, PathBuf},
4+
process::Command,
5+
};
26

37
use gix_blame::BlameRanges;
48
use gix_hash::ObjectId;
@@ -351,6 +355,56 @@ fn diff_algorithm_parity() {
351355
}
352356
}
353357

358+
#[test]
359+
fn changed_path_bloom_prefilter_keeps_blame_output_identical() -> gix_testtools::Result {
360+
let worktree_path = fixture_path()?;
361+
write_changed_path_commit_graph(&worktree_path)?;
362+
363+
let source_file_name: gix_object::bstr::BString = "simple.txt".into();
364+
let options = gix_blame::Options {
365+
diff_algorithm: gix_diff::blob::Algorithm::Histogram,
366+
ranges: BlameRanges::default(),
367+
since: None,
368+
rewrites: Some(gix_diff::Rewrites::default()),
369+
debug_track_path: false,
370+
};
371+
372+
let Fixture {
373+
odb,
374+
mut resource_cache,
375+
suspect,
376+
} = Fixture::for_worktree_path(worktree_path.clone())?;
377+
let without_bloom = gix_blame::file(
378+
&odb,
379+
suspect,
380+
None,
381+
&mut resource_cache,
382+
source_file_name.as_ref(),
383+
options.clone(),
384+
)?;
385+
386+
let Fixture {
387+
odb,
388+
mut resource_cache,
389+
suspect,
390+
} = Fixture::for_worktree_path(worktree_path.clone())?;
391+
let cache = gix_commitgraph::at(worktree_path.join(".git/objects/info"))
392+
.map_err(|err| std::io::Error::other(format!("loading commit-graph failed: {err}")))?;
393+
let with_bloom = gix_blame::file(
394+
&odb,
395+
suspect,
396+
Some(cache),
397+
&mut resource_cache,
398+
source_file_name.as_ref(),
399+
options,
400+
)?;
401+
402+
pretty_assertions::assert_eq!(with_bloom.entries, without_bloom.entries);
403+
assert!(with_bloom.statistics.bloom_queries > 0);
404+
assert!(with_bloom.statistics.trees_diffed <= without_bloom.statistics.trees_diffed);
405+
Ok(())
406+
}
407+
354408
#[test]
355409
fn file_that_was_added_in_two_branches() -> gix_testtools::Result {
356410
let worktree_path = gix_testtools::scripted_fixture_read_only("make_blame_two_roots_repo.sh")?;
@@ -754,6 +808,32 @@ mod incremental {
754808
}
755809
}
756810

811+
fn write_changed_path_commit_graph(worktree_path: &Path) -> gix_testtools::Result {
812+
let config_status = Command::new("git")
813+
.args(["config", "commitGraph.changedPathsVersion", "2"])
814+
.current_dir(worktree_path)
815+
.status()?;
816+
assert!(
817+
config_status.success(),
818+
"setting commitGraph.changedPathsVersion should succeed"
819+
);
820+
821+
let write_status = Command::new("git")
822+
.args([
823+
"commit-graph",
824+
"write",
825+
"--no-progress",
826+
"--reachable",
827+
"--changed-paths",
828+
])
829+
.current_dir(worktree_path)
830+
.status()?;
831+
assert!(
832+
write_status.success(),
833+
"writing changed-path commit-graph should succeed"
834+
);
835+
Ok(())
836+
}
757837
fn fixture_path() -> gix_testtools::Result<PathBuf> {
758838
gix_testtools::scripted_fixture_read_only("make_blame_repo.sh")
759839
}

0 commit comments

Comments
 (0)