@@ -43,6 +43,30 @@ impl<P: Precision, B: Bits, R: Registers<P, B>, H: HasherType> HyperLogLog<P, B,
4343 self . mle_union_from_registers ( other)
4444 }
4545
46+ /// Returns the cardinality estimated with the single-counter Maximum Likelihood Estimation.
47+ ///
48+ /// # Implementative details
49+ /// This is Ertl's secant-method maximum-likelihood estimator over the register
50+ /// multiplicities. It operates on the HyperLogLog register representation, so a hash-list
51+ /// operand is materialized into registers first. It is provided for completeness and
52+ /// comparison: it is less accurate, and substantially slower, than the default
53+ /// [`HyperLogLog::estimate_cardinality`] (HyperLogLog++ corrected) estimate.
54+ #[ inline]
55+ pub fn estimate_cardinality_mle ( & self ) -> f64 {
56+ if self . is_hash_list ( ) {
57+ let mut counter = self . clone ( ) ;
58+ counter. convert_hash_list_to_hyperloglog ( ) . unwrap ( ) ;
59+ return counter. estimate_cardinality_mle ( ) ;
60+ }
61+
62+ mle_cardinality :: < P , B > (
63+ self . registers . iter_registers ( ) ,
64+ self . harmonic_sum ,
65+ self . is_full ( ) ,
66+ 2 ,
67+ )
68+ }
69+
4670 /// Joint MLE union estimate assuming both counters are in HyperLogLog (register) mode.
4771 fn mle_union_from_registers ( & self , other : & Self ) -> f64 {
4872 // Maps a union harmonic sum to the HyperLogLog++ corrected cardinality, exactly as the
@@ -255,6 +279,117 @@ fn mle_union_cardinality<P: Precision, B: Bits, I: ExactSizeIterator<Item = [u8;
255279 phis[ 0 ] . exp ( ) + phis[ 1 ] . exp ( ) + phis[ 2 ] . exp ( )
256280}
257281
282+ #[ allow( clippy:: too_many_lines) ]
283+ /// Single-counter cardinality via Ertl's secant-method Maximum Likelihood Estimation.
284+ ///
285+ /// # Arguments
286+ /// * `registers` - Iterator over the counter's register values.
287+ /// * `harmonic_sum` - The counter's harmonic sum (sum of `2^-register`).
288+ /// * `is_full` - Whether the counter is saturated (returns infinity).
289+ /// * `error_exponent` - The secant method stops once the relative step is below
290+ /// `10^-error_exponent` scaled by the precision.
291+ fn mle_cardinality < P : Precision , B : Bits > (
292+ registers : impl Iterator < Item = u8 > ,
293+ harmonic_sum : f64 ,
294+ is_full : bool ,
295+ error_exponent : i32 ,
296+ ) -> f64 {
297+ if is_full {
298+ return f64:: INFINITY ;
299+ }
300+
301+ let multiplicities_len = 1_usize << B :: NUMBER_OF_BITS ;
302+ let mut multiplicities = vec ! [ f64 :: ZERO ; multiplicities_len] ;
303+ let q = multiplicities_len as u32 - 2 ;
304+
305+ let mut smallest_register_value: u32 = q;
306+ let mut largest_register_value: u32 = 0 ;
307+
308+ for register in registers {
309+ let register = u32:: from ( register) ;
310+ if register > 0 {
311+ smallest_register_value = smallest_register_value. min ( register) ;
312+ }
313+ largest_register_value = largest_register_value. max ( register) ;
314+ multiplicities[ register as usize ] += 1.0 ;
315+ }
316+
317+ smallest_register_value = smallest_register_value. max ( 1 ) ;
318+ largest_register_value = largest_register_value. min ( q) . max ( 1 ) ;
319+
320+ let number_of_registers = f64:: integer_exp2 ( P :: EXPONENT ) ;
321+
322+ let c =
323+ multiplicities[ multiplicities_len - 1 ] + multiplicities[ largest_register_value as usize ] ;
324+
325+ let mut g_prev: f64 = 0.0 ;
326+ let number_of_zero_registers = multiplicities[ 0 ] ;
327+ let reciprocal_saturated_registers = multiplicities[ multiplicities_len - 1 ]
328+ * f64:: integer_exp2_minus ( ( multiplicities_len - 1 ) as u8 ) ;
329+
330+ let harmonic_sum_minus_zero_and_saturated =
331+ harmonic_sum - ( number_of_zero_registers + reciprocal_saturated_registers) ;
332+
333+ let a = harmonic_sum_minus_zero_and_saturated + number_of_zero_registers;
334+ let b = harmonic_sum_minus_zero_and_saturated
335+ + multiplicities[ multiplicities_len - 1 ] * f64:: integer_exp2_minus ( q as u8 ) ;
336+
337+ let number_of_non_zero_registers = number_of_registers - number_of_zero_registers;
338+
339+ let mut x = if b <= 1.5 * a {
340+ number_of_non_zero_registers / ( 0.5 * b + a)
341+ } else {
342+ ( number_of_non_zero_registers / b) * ( b / a) . ln_1p ( )
343+ } ;
344+
345+ // We begin the secant method iterations.
346+ let mut delta_x = x;
347+ let relative_error_limit = 10.0_f64 . powi ( -error_exponent) / number_of_registers. sqrt ( ) ;
348+
349+ let forty_five_recip = 1.0 / 45.0 ;
350+ let four_seventy_two_point_five_recip = 1.0 / 472.5 ;
351+
352+ let taylor = |x_first : f64 , h : f64 | -> f64 { ( x_first + h * ( 1.0 - h) ) / ( x_first + 1.0 - h) } ;
353+
354+ while delta_x > x * relative_error_limit {
355+ // Equivalent to `2 + floor(log2(x))`, saturating non-positive exponents to 0.
356+ let k: u32 = 2 + ( x. log2 ( ) . floor ( ) . max ( 0.0 ) as u32 ) ;
357+
358+ let maximal = largest_register_value. max ( k) ;
359+ let mut x_first = x * f64:: integer_exp2_minus ( ( maximal + 1 ) as u8 ) ;
360+ let x_second = x_first * x_first;
361+ let x_forth = x_second * x_second;
362+ let mut taylor_series_approximation = x_first - x_second / 3.0
363+ + x_forth * ( forty_five_recip - x_second * four_seventy_two_point_five_recip) ;
364+
365+ for _ in largest_register_value..k {
366+ taylor_series_approximation = taylor ( x_first, taylor_series_approximation) ;
367+ x_first *= 2.0 ;
368+ }
369+
370+ let mut g = c * taylor_series_approximation;
371+
372+ for register_value in ( smallest_register_value..largest_register_value) . rev ( ) {
373+ taylor_series_approximation = taylor ( x_first, taylor_series_approximation) ;
374+ g += multiplicities[ register_value as usize ] * taylor_series_approximation;
375+ x_first *= 2.0 ;
376+ }
377+
378+ g += x * a;
379+
380+ if g > g_prev && number_of_non_zero_registers >= g {
381+ delta_x *= ( number_of_non_zero_registers - g) / ( g - g_prev) ;
382+ } else {
383+ delta_x = 0.0 ;
384+ }
385+
386+ x += delta_x;
387+ g_prev = g;
388+ }
389+
390+ number_of_registers * x
391+ }
392+
258393/// Trait for element-wise multiplication.
259394trait ElementWiseMultiplication < Rhs = Self > {
260395 /// Element-wise multiplication.
0 commit comments