Skip to content

Commit f031275

Browse files
committed
fix: improve perf of count with smarter handling of half-open range
The BITS paper uses fully inclusive ranges. When this was written I'm not sure I understood that. To get things to match the naive version of count (find -> count) I had a while loop in the bits count method to advance the cursor past the matched start/stop index. This change was found while porting mojo-lapper and removes the while loop. It also has a more effecient branchless binary search. Benchmarks for count improve 24-30% (see PR).
1 parent 828fec7 commit f031275

3 files changed

Lines changed: 16 additions & 31 deletions

File tree

Cargo.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[package]
22
name = "rust-lapper"
3-
version = "1.1.0"
3+
version = "1.2.0"
44
authors = ["Seth Stadick <sstadick@gmail.com>"]
55
edition = "2018"
66
license = "MIT"

src/lib.rs

Lines changed: 14 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -350,7 +350,7 @@ where
350350
if let Some(first) = ivs.next() {
351351
stack.push_back(first);
352352
for interval in ivs {
353-
let mut top = stack.pop_back().unwrap();
353+
let top = stack.pop_back().unwrap();
354354
if top.stop < interval.start {
355355
stack.push_back(top);
356356
stack.push_back(interval);
@@ -422,24 +422,20 @@ where
422422
where
423423
K: PartialEq + PartialOrd,
424424
{
425-
if elems.is_empty() {
425+
if elems.is_empty() || elems[0] >= *key {
426426
return 0;
427+
} else if elems[elems.len() - 1] < *key {
428+
return elems.len();
427429
}
428-
if elems[0] > *key {
429-
return 0;
430-
}
431-
let mut high = elems.len();
432-
let mut low = 0;
433430

434-
while high - low > 1 {
435-
let mid = (high + low) / 2;
436-
if elems[mid] < *key {
437-
low = mid;
438-
} else {
439-
high = mid;
440-
}
431+
let mut cursor = 0;
432+
let mut length = elems.len();
433+
while length > 1 {
434+
let half = length >> 1;
435+
length -= half;
436+
cursor += (usize::from(elems[usize::from(cursor + half - 1)] < *key)) * half;
441437
}
442-
high
438+
cursor
443439
}
444440

445441
/// Find the union and the intersect of two lapper objects.
@@ -587,23 +583,11 @@ where
587583
#[inline]
588584
pub fn count(&self, start: I, stop: I) -> usize {
589585
let len = self.intervals.len();
590-
let mut first = Self::bsearch_seq(start, &self.stops);
586+
// Plus one to account for half-openness of lapper intervals compared to BITS paper
587+
let first = Self::bsearch_seq(start + one::<I>(), &self.stops);
591588
let last = Self::bsearch_seq(stop, &self.starts);
592-
//println!("{}/{}", start, stop);
593-
//println!("pre start found in stops: {}: {}", first, self.stops[first]);
594-
//println!("pre stop found in starts: {}", last);
595-
//while last < len && self.starts[last] == stop {
596-
//last += 1;
597-
//}
598-
while first < len && self.stops[first] == start {
599-
first += 1;
600-
}
601589
let num_cant_after = len - last;
602590
len - first - num_cant_after
603-
//println!("{:#?}", self.starts);
604-
//println!("{:#?}", self.stops);
605-
//println!("start found in stops: {}", first);
606-
//println!("stop found in starts: {}", last);
607591
}
608592

609593
/// Find all intervals that overlap start .. stop
@@ -1022,6 +1006,7 @@ mod tests {
10221006
fn test_query_start_interval_stop() {
10231007
let lapper = setup_nonoverlapping();
10241008
let mut cursor = 0;
1009+
println!("{:?}",lapper);
10251010
assert_eq!(None, lapper.find(30, 35).next());
10261011
assert_eq!(None, lapper.seek(30, 35, &mut cursor).next());
10271012
assert_eq!(lapper.find(30, 35).count(), lapper.count(30, 35));

0 commit comments

Comments
 (0)