Skip to content

Commit 0b9280f

Browse files
committed
gix-blame: add incremental blame API
Implement a gix_blame::incremental API that yelds the blame entries as they're discovered, similarly to Git's `git blame --incremental`. The implementation simply takes the original gix_blame::file and replaces the Vec of blame entries with a generic BlameSink trait. The original gix_blame::file is now implemented as a wrapper for gix_blame::incremental, by implementing the BlameSink trait on Vec<BlameEntry> and sorting + coalescing the entries before returning.
1 parent 85e36cb commit 0b9280f

4 files changed

Lines changed: 231 additions & 39 deletions

File tree

gix-blame/src/file/function.rs

Lines changed: 68 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ use gix_traverse::commit::find as find_commit;
1010
use smallvec::SmallVec;
1111

1212
use super::{process_changes, Change, UnblamedHunk};
13-
use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statistics};
13+
use crate::{types::BlamePathEntry, BlameEntry, BlameSink, Error, IncrementalOutcome, Options, Outcome, Statistics};
1414

1515
/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
1616
/// at `suspect:<file_path>` originated in.
@@ -60,26 +60,21 @@ use crate::{types::BlamePathEntry, BlameEntry, Error, Options, Outcome, Statisti
6060
/// <---><----------><-------><-----><------->
6161
/// <---><---><-----><-------><-----><------->
6262
/// <---><---><-----><-------><-----><-><-><->
63-
pub fn file(
63+
pub fn incremental(
6464
odb: impl gix_object::Find + gix_object::FindHeader,
6565
suspect: ObjectId,
66-
cache: Option<gix_commitgraph::Graph>,
66+
cache: Option<&gix_commitgraph::Graph>,
6767
resource_cache: &mut gix_diff::blob::Platform,
6868
file_path: &BStr,
69+
sink: &mut impl BlameSink,
6970
options: Options,
70-
) -> Result<Outcome, Error> {
71-
let _span = gix_trace::coarse!("gix_blame::file()", ?file_path, ?suspect);
71+
) -> Result<IncrementalOutcome, Error> {
72+
let _span = gix_trace::coarse!("gix_blame::incremental()", ?file_path, ?suspect);
7273

7374
let mut stats = Statistics::default();
7475
let (mut buf, mut buf2, mut buf3) = (Vec::new(), Vec::new(), Vec::new());
7576
let blamed_file_entry_id = find_path_entry_in_commit(
76-
&odb,
77-
&suspect,
78-
file_path,
79-
cache.as_ref(),
80-
&mut buf,
81-
&mut buf2,
82-
&mut stats,
77+
&odb, &suspect, file_path, cache, &mut buf, &mut buf2, &mut stats,
8378
)?
8479
.ok_or_else(|| Error::FileMissing {
8580
file_path: file_path.to_owned(),
@@ -90,7 +85,7 @@ pub fn file(
9085

9186
// Binary or otherwise empty?
9287
if num_lines_in_blamed == 0 {
93-
return Ok(Outcome::default());
88+
return Ok(IncrementalOutcome::default());
9489
}
9590

9691
let ranges_to_blame = options.ranges.to_zero_based_exclusive_ranges(num_lines_in_blamed);
@@ -100,12 +95,11 @@ pub fn file(
10095
.collect::<Vec<_>>();
10196

10297
let (mut buf, mut buf2) = (Vec::new(), Vec::new());
103-
let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
98+
let commit = find_commit(cache, &odb, &suspect, &mut buf)?;
10499
let mut queue: gix_revwalk::PriorityQueue<gix_date::SecondsSinceUnixEpoch, ObjectId> =
105100
gix_revwalk::PriorityQueue::new();
106101
queue.insert(commit.commit_time()?, suspect);
107102

108-
let mut out = Vec::new();
109103
let mut diff_state = gix_diff::tree::State::default();
110104
let mut previous_entry: Option<(ObjectId, ObjectId)> = None;
111105
let mut blame_path = if options.debug_track_path {
@@ -132,20 +126,20 @@ pub fn file(
132126
.clone()
133127
.unwrap_or_else(|| file_path.to_owned());
134128

135-
let commit = find_commit(cache.as_ref(), &odb, &suspect, &mut buf)?;
129+
let commit = find_commit(cache, &odb, &suspect, &mut buf)?;
136130
let commit_time = commit.commit_time()?;
137131

138132
if let Some(since) = options.since {
139133
if commit_time < since.seconds {
140-
if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
134+
if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
141135
break 'outer;
142136
}
143137

144138
continue;
145139
}
146140
}
147141

148-
let parent_ids: ParentIds = collect_parents(commit, &odb, cache.as_ref(), &mut buf2)?;
142+
let parent_ids: ParentIds = collect_parents(commit, &odb, cache, &mut buf2)?;
149143

150144
if parent_ids.is_empty() {
151145
if queue.is_empty() {
@@ -154,7 +148,7 @@ pub fn file(
154148
// the remaining lines to it, even though we don’t explicitly check whether that is
155149
// true here. We could perhaps use diff-tree-to-tree to compare `suspect` against
156150
// an empty tree to validate this assumption.
157-
if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
151+
if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
158152
if let Some(ref mut blame_path) = blame_path {
159153
let entry = previous_entry
160154
.take()
@@ -188,7 +182,7 @@ pub fn file(
188182
&odb,
189183
&suspect,
190184
current_file_path.as_ref(),
191-
cache.as_ref(),
185+
cache,
192186
&mut buf,
193187
&mut buf2,
194188
&mut stats,
@@ -239,7 +233,7 @@ pub fn file(
239233
&odb,
240234
parent_id,
241235
current_file_path.as_ref(),
242-
cache.as_ref(),
236+
cache,
243237
&mut buf,
244238
&mut buf2,
245239
&mut stats,
@@ -259,12 +253,13 @@ pub fn file(
259253
let more_than_one_parent = parent_ids.len() > 1;
260254
for (index, (parent_id, parent_commit_time)) in parent_ids.iter().enumerate() {
261255
queue.insert(*parent_commit_time, *parent_id);
256+
262257
let changes_for_file_path = tree_diff_at_file_path(
263258
&odb,
264259
current_file_path.as_ref(),
265260
suspect,
266261
*parent_id,
267-
cache.as_ref(),
262+
cache,
268263
&mut stats,
269264
&mut diff_state,
270265
resource_cache,
@@ -292,7 +287,7 @@ pub fn file(
292287
// Do nothing under the assumption that this always (or almost always)
293288
// implies that the file comes from a different parent, compared to which
294289
// it was modified, not added.
295-
} else if unblamed_to_out_is_done(&mut hunks_to_blame, &mut out, suspect) {
290+
} else if unblamed_to_out_is_done(&mut hunks_to_blame, sink, suspect) {
296291
if let Some(ref mut blame_path) = blame_path {
297292
let blame_path_entry = BlamePathEntry {
298293
source_file_path: current_file_path.clone(),
@@ -389,8 +384,8 @@ pub fn file(
389384
// At this point, we have copied blame for every hunk to a parent. Hunks
390385
// that have only `suspect` left in `suspects` have not passed blame to any
391386
// parent, and so they can be converted to a `BlameEntry` and moved to
392-
// `out`.
393-
out.push(entry);
387+
// the sink.
388+
sink.push(entry);
394389
return false;
395390
}
396391
}
@@ -405,17 +400,51 @@ pub fn file(
405400
"only if there is no portion of the file left we have completed the blame"
406401
);
407402

408-
// I don’t know yet whether it would make sense to use a data structure instead that preserves
409-
// order on insertion.
410-
out.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
411-
Ok(Outcome {
412-
entries: coalesce_blame_entries(out),
403+
Ok(IncrementalOutcome {
413404
blob: blamed_file_blob,
414405
statistics: stats,
415406
blame_path,
416407
})
417408
}
418409

410+
/// Produce a list of consecutive [`BlameEntry`] instances to indicate in which commits the ranges of the file
411+
/// at `suspect:<file_path>` originated in.
412+
///
413+
/// This is built on top of [`incremental()`], collecting entries into a [`Vec`] sink.
414+
pub fn file(
415+
odb: impl gix_object::Find + gix_object::FindHeader,
416+
suspect: ObjectId,
417+
cache: Option<gix_commitgraph::Graph>,
418+
resource_cache: &mut gix_diff::blob::Platform,
419+
file_path: &BStr,
420+
options: Options,
421+
) -> Result<Outcome, Error> {
422+
let mut entries = Vec::new();
423+
let IncrementalOutcome {
424+
blob,
425+
statistics,
426+
blame_path,
427+
} = incremental(
428+
odb,
429+
suspect,
430+
cache.as_ref(),
431+
resource_cache,
432+
file_path,
433+
&mut entries,
434+
options,
435+
)?;
436+
437+
// Keep the stable output semantics of `file()` even though `incremental()` emits in generation order.
438+
entries.sort_by(|a, b| a.start_in_blamed_file.cmp(&b.start_in_blamed_file));
439+
440+
Ok(Outcome {
441+
entries: coalesce_blame_entries(entries),
442+
blob,
443+
statistics,
444+
blame_path,
445+
})
446+
}
447+
419448
/// Pass ownership of each unblamed hunk of `from` to `to`.
420449
///
421450
/// This happens when `from` didn't actually change anything in the blamed file.
@@ -425,21 +454,23 @@ fn pass_blame_from_to(from: ObjectId, to: ObjectId, hunks_to_blame: &mut Vec<Unb
425454
}
426455
}
427456

428-
/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process.
457+
/// Convert each of the unblamed hunk in `hunks_to_blame` into a [`BlameEntry`], consuming them in the process,
458+
/// and emit each entry to `sink`.
429459
///
430460
/// Return `true` if we are done because `hunks_to_blame` is empty.
431461
fn unblamed_to_out_is_done(
432462
hunks_to_blame: &mut Vec<UnblamedHunk>,
433-
out: &mut Vec<BlameEntry>,
463+
sink: &mut impl BlameSink,
434464
suspect: ObjectId,
435465
) -> bool {
436466
let mut without_suspect = Vec::new();
437-
out.extend(hunks_to_blame.drain(..).filter_map(|hunk| {
438-
BlameEntry::from_unblamed_hunk(&hunk, suspect).or_else(|| {
467+
for hunk in hunks_to_blame.drain(..) {
468+
if let Some(entry) = BlameEntry::from_unblamed_hunk(&hunk, suspect) {
469+
sink.push(entry);
470+
} else {
439471
without_suspect.push(hunk);
440-
None
441-
})
442-
}));
472+
}
473+
}
443474
*hunks_to_blame = without_suspect;
444475
hunks_to_blame.is_empty()
445476
}

gix-blame/src/lib.rs

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@
1717
mod error;
1818
pub use error::Error;
1919
mod types;
20-
pub use types::{BlameEntry, BlamePathEntry, BlameRanges, Options, Outcome, Statistics};
20+
pub use types::{BlameEntry, BlamePathEntry, BlameRanges, BlameSink, IncrementalOutcome, Options, Outcome, Statistics};
2121

2222
mod file;
23-
pub use file::function::file;
23+
pub use file::function::{file, incremental};

gix-blame/src/types.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,19 @@ use std::{
1010
use crate::file::function::tokens_for_diffing;
1111
use crate::Error;
1212

13+
/// Receives [`BlameEntry`] values incrementally as they are discovered by
14+
/// [`incremental()`](crate::incremental()).
15+
pub trait BlameSink {
16+
/// Receive a single blame chunk in generation order.
17+
fn push(&mut self, entry: BlameEntry);
18+
}
19+
20+
impl BlameSink for Vec<BlameEntry> {
21+
fn push(&mut self, entry: BlameEntry) {
22+
Vec::push(self, entry);
23+
}
24+
}
25+
1326
/// A type to represent one or more line ranges to blame in a file.
1427
///
1528
/// It handles the conversion between git's 1-based inclusive ranges and the internal
@@ -204,6 +217,21 @@ pub struct Outcome {
204217
pub blame_path: Option<Vec<BlamePathEntry>>,
205218
}
206219

220+
/// The outcome of [`incremental()`](crate::incremental()).
221+
///
222+
/// It contains all non-entry information so callers can process [`BlameEntry`] instances
223+
/// incrementally through a [`BlameSink`] while still receiving the metadata that was
224+
/// previously available through [`Outcome`].
225+
#[derive(Debug, Default, Clone)]
226+
pub struct IncrementalOutcome {
227+
/// A buffer with the file content of the *Blamed File*, ready for tokenization.
228+
pub blob: Vec<u8>,
229+
/// Additional information about the amount of work performed to produce the blame.
230+
pub statistics: Statistics,
231+
/// Contains a log of all changes that affected the outcome of this blame.
232+
pub blame_path: Option<Vec<BlamePathEntry>>,
233+
}
234+
207235
/// Additional information about the performed operations.
208236
#[derive(Debug, Default, Copy, Clone)]
209237
pub struct Statistics {

0 commit comments

Comments
 (0)