Skip to content

Commit 277ef8b

Browse files
committed
fix(approx_fns): use exact percentile when no compression
1 parent 0b60c58 commit 277ef8b

File tree

1 file changed

+18
-0
lines changed
  • datafusion/functions-aggregate-common/src

1 file changed

+18
-0
lines changed

datafusion/functions-aggregate-common/src/tdigest.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -440,6 +440,10 @@ impl TDigest {
440440
return 0.0;
441441
}
442442

443+
// No compression happened since each centroid = one data point, so use exact percentile instead of interpolation
444+
if self.count as usize == self.centroids.len() {
445+
return self.exact_quantile(q);
446+
}
443447
let rank = q * self.count as f64;
444448

445449
let mut pos: usize;
@@ -515,6 +519,20 @@ impl TDigest {
515519
Self::clamp(value, min, max)
516520
}
517521

522+
fn exact_quantile(&self, q: f64) -> f64 {
523+
if q <= 0.0 {
524+
return self.min();
525+
}
526+
if q >= 1.0 {
527+
return self.max();
528+
}
529+
530+
let n = self.centroids.len();
531+
let idx = (q * n as f64).ceil() as usize;
532+
let idx = idx.saturating_sub(1).min(n - 1);
533+
self.centroids[idx].mean()
534+
}
535+
518536
/// This method decomposes the [`TDigest`] and its [`Centroid`] instances
519537
/// into a series of primitive scalar values.
520538
///

0 commit comments

Comments
 (0)