Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions crates/geo_filters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ c2.push(2);
c2.push(3);

let estimated_size = c1.size_with_sketch(&c2);
assert!(estimated_size >= 3.0_f32 * 0.9 &&
estimated_size <= 3.0_f32 * 1.1);
assert_eq!(estimated_size, 3);
```

## Background
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ pub(crate) mod tests {
m.push_hash(rnd.next_u64());
}
// Compute the relative error between estimate and actually inserted items.
let high_precision = m.size() / cnt as f32 - 1.0;
let high_precision = m.size_f32() / cnt as f32 - 1.0;
// Take the average over trials many attempts.
avg_precision += high_precision / trials as f32;
avg_var += high_precision.powf(2.0) / trials as f32;
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/diff_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,11 +443,11 @@ impl<C: GeoConfig<Diff>> Count<Diff> for GeoDiffCount<'_, C> {
*self = xor(self, other);
}

fn size(&self) -> f32 {
fn size_f32(&self) -> f32 {
self.estimate_size()
}

fn size_with_sketch(&self, other: &Self) -> f32 {
fn size_with_sketch_f32(&self, other: &Self) -> f32 {
assert!(
self.config == other.config,
"combined filters must have the same configuration"
Expand Down Expand Up @@ -501,7 +501,7 @@ mod tests {
let mut geo_count = GeoDiffCount13::default();

(0..n).for_each(|i| geo_count.push(i));
assert_eq!(result, geo_count.size());
assert_eq!(result, geo_count.size_f32());
}
}

Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/distinct_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
*self = or(self, other)
}

fn size(&self) -> f32 {
fn size_f32(&self) -> f32 {
let lowest_bucket = self.lsb.bit_range().start;
let total = self.msb.len()
+ self
Expand All @@ -159,7 +159,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
}
}

fn size_with_sketch(&self, other: &Self) -> f32 {
fn size_with_sketch_f32(&self, other: &Self) -> f32 {
assert!(
self.config == other.config,
"combined filters must have the same configuration"
Expand Down Expand Up @@ -272,7 +272,7 @@ mod tests {
] {
let mut geo_count = GeoDistinctCount13::default();
(0..n).for_each(|i| geo_count.push(i));
assert_eq!(result, geo_count.size());
assert_eq!(result, geo_count.size_f32());
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/geo_filters/src/evaluation/hll.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ impl<C: HllConfig> Count<Distinct> for Hll<C> {
unimplemented!()
}

fn size(&self) -> f32 {
fn size_f32(&self) -> f32 {
self.inner.borrow_mut().count() as f32
}

fn size_with_sketch(&self, _other: &Self) -> f32 {
fn size_with_sketch_f32(&self, _other: &Self) -> f32 {
unimplemented!()
}

Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/evaluation/simulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<C: GeoConfig<Diff> + Clone> SimulationCount for GeoDiffCount<'_, C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_f32(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand All @@ -36,7 +36,7 @@ impl<C: GeoConfig<Distinct>> SimulationCount for GeoDistinctCount<'_, C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_f32(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand All @@ -47,7 +47,7 @@ impl<C: HllConfig> SimulationCount for Hll<C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_f32(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand Down
35 changes: 31 additions & 4 deletions crates/geo_filters/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,44 @@ pub trait Count<M: Method> {
/// If only the size of the combined set is needed, [`Self::size_with_sketch`] is more efficient and should be used.
fn push_sketch(&mut self, other: &Self);

/// Return the estimated set size.
fn size(&self) -> f32;
/// Return the estimated set size rounded to the nearest unsigned integer.
fn size(&self) -> usize {
let size = self.size_f32().round();
debug_assert_f32s_in_range(size);
size as usize
Comment thread
itsibitzi marked this conversation as resolved.
}

/// Return the estimated set size when combined with the given sketch.
/// Return the estimated set size as a real number.
fn size_f32(&self) -> f32;

/// Return the estimated set size when combined with the given sketch rounded to the nearest unsigned integer.
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
fn size_with_sketch(&self, other: &Self) -> usize {
let size = self.size_with_sketch_f32(other).round();
debug_assert_f32s_in_range(size);
size as usize
Comment thread
itsibitzi marked this conversation as resolved.
}

/// Return the estimated set size when combined with the given sketch as a real number.
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
fn size_with_sketch(&self, other: &Self) -> f32;
fn size_with_sketch_f32(&self, other: &Self) -> f32;

/// Returns the number of bytes in memory used to represent this filter.
fn bytes_in_memory(&self) -> usize;
}

#[inline]
fn debug_assert_f32s_in_range(v: f32) {
// The geometric filter should never produce these values.
// These assertions failing indicates that there is a bug.
debug_assert!(v.is_finite(), "Estimated size must be finite, got {v}");
debug_assert!(v >= 0.0, "Estimated size must be non-negative, got {v}");
debug_assert!(
v <= usize::MAX as f32,
"Estimated size {v} exceeds usize::MAX",
);
}

#[doc = include_str!("../README.md")]
#[cfg(doctest)]
pub struct ReadmeDocTests;