Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions crates/geo_filters/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,7 @@ c2.push(2);
c2.push(3);

let estimated_size = c1.size_with_sketch(&c2);
assert!(estimated_size >= 3.0_f32 * 0.9 &&
estimated_size <= 3.0_f32 * 1.1);
assert_eq!(estimated_size, 3);
```

## Background
Expand Down
2 changes: 1 addition & 1 deletion crates/geo_filters/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -374,7 +374,7 @@ pub(crate) mod tests {
m.push_hash(rnd.next_u64());
}
// Compute the relative error between estimate and actually inserted items.
let high_precision = m.size() / cnt as f32 - 1.0;
let high_precision = m.size_real() / cnt as f32 - 1.0;
// Take the average over trials many attempts.
avg_precision += high_precision / trials as f32;
avg_var += high_precision.powf(2.0) / trials as f32;
Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/diff_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,11 +443,11 @@ impl<C: GeoConfig<Diff>> Count<Diff> for GeoDiffCount<'_, C> {
*self = xor(self, other);
}

fn size(&self) -> f32 {
fn size_real(&self) -> f32 {
self.estimate_size()
}

fn size_with_sketch(&self, other: &Self) -> f32 {
fn size_with_sketch_real(&self, other: &Self) -> f32 {
assert!(
self.config == other.config,
"combined filters must have the same configuration"
Expand Down Expand Up @@ -501,7 +501,7 @@ mod tests {
let mut geo_count = GeoDiffCount13::default();

(0..n).for_each(|i| geo_count.push(i));
assert_eq!(result, geo_count.size());
assert_eq!(result, geo_count.size_real());
}
}

Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/distinct_count.rs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
*self = or(self, other)
}

fn size(&self) -> f32 {
fn size_real(&self) -> f32 {
let lowest_bucket = self.lsb.bit_range().start;
let total = self.msb.len()
+ self
Expand All @@ -159,7 +159,7 @@ impl<C: GeoConfig<Distinct>> Count<Distinct> for GeoDistinctCount<'_, C> {
}
}

fn size_with_sketch(&self, other: &Self) -> f32 {
fn size_with_sketch_real(&self, other: &Self) -> f32 {
assert!(
self.config == other.config,
"combined filters must have the same configuration"
Expand Down Expand Up @@ -272,7 +272,7 @@ mod tests {
] {
let mut geo_count = GeoDistinctCount13::default();
(0..n).for_each(|i| geo_count.push(i));
assert_eq!(result, geo_count.size());
assert_eq!(result, geo_count.size_real());
}
}

Expand Down
4 changes: 2 additions & 2 deletions crates/geo_filters/src/evaluation/hll.rs
Original file line number Diff line number Diff line change
Expand Up @@ -126,11 +126,11 @@ impl<C: HllConfig> Count<Distinct> for Hll<C> {
unimplemented!()
}

fn size(&self) -> f32 {
fn size_real(&self) -> f32 {
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why the suffix real?
Isn't a f32 suffix usually used in rust?

Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

while size_f32 is technical, it is also pretty clear what you will get :)

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess I wanted to encode the semantics relating to the use of the function rather than the technical implementation.

I'll just go with size_f32 so we can sidestep the bike shedding :)

self.inner.borrow_mut().count() as f32
}

fn size_with_sketch(&self, _other: &Self) -> f32 {
fn size_with_sketch_real(&self, _other: &Self) -> f32 {
unimplemented!()
}

Expand Down
6 changes: 3 additions & 3 deletions crates/geo_filters/src/evaluation/simulation.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ impl<C: GeoConfig<Diff> + Clone> SimulationCount for GeoDiffCount<'_, C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_real(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand All @@ -36,7 +36,7 @@ impl<C: GeoConfig<Distinct>> SimulationCount for GeoDistinctCount<'_, C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_real(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand All @@ -47,7 +47,7 @@ impl<C: HllConfig> SimulationCount for Hll<C> {
<Self as Count<_>>::push_hash(self, hash)
}
fn size(&self) -> f32 {
<Self as Count<_>>::size(self)
<Self as Count<_>>::size_real(self)
}
fn bytes_in_memory(&self) -> usize {
<Self as Count<_>>::bytes_in_memory(self)
Expand Down
35 changes: 31 additions & 4 deletions crates/geo_filters/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -43,17 +43,44 @@ pub trait Count<M: Method> {
/// If only the size of the combined set is needed, [`Self::size_with_sketch`] is more efficient and should be used.
fn push_sketch(&mut self, other: &Self);

/// Return the estimated set size.
fn size(&self) -> f32;
/// Return the estimated set size rounded to the nearest unsigned integer.
fn size(&self) -> usize {
let size = self.size_real().round();
debug_assert_f32s_in_range(size);
size as usize
Comment thread
itsibitzi marked this conversation as resolved.
}

/// Return the estimated set size when combined with the given sketch.
/// Return the estimated set size as a real number.
fn size_real(&self) -> f32;

/// Return the estimated set size when combined with the given sketch rounded to the nearest unsigned integer.
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
fn size_with_sketch(&self, other: &Self) -> usize {
let size = self.size_with_sketch_real(other).round();
debug_assert_f32s_in_range(size);
size as usize
Comment thread
itsibitzi marked this conversation as resolved.
}

/// Return the estimated set size when combined with the given sketch as a real number.
/// If the combined set itself is not going to be used, this method is more efficient than using [`Self::push_sketch`] and [`Self::size`].
fn size_with_sketch(&self, other: &Self) -> f32;
fn size_with_sketch_real(&self, other: &Self) -> f32;

/// Returns the number of bytes in memory used to represent this filter.
fn bytes_in_memory(&self) -> usize;
}

#[inline]
fn debug_assert_f32s_in_range(v: f32) {
// The geometric filter should never produce these values.
// These assertions failing indicates that there is a bug.
debug_assert!(v.is_finite(), "Estimated size must be finite, got {v}");
debug_assert!(v >= 0.0, "Estimated size must be non-negative, got {v}");
debug_assert!(
v <= usize::MAX as f32,
"Estimated size {v} exceeds usize::MAX",
);
}

#[doc = include_str!("../README.md")]
#[cfg(doctest)]
pub struct ReadmeDocTests;