Skip to content

Commit dad9ce1

Browse files
committed
fix(approx_fns): use exact percentile when no compression
1 parent c17c87c commit dad9ce1

File tree

1 file changed

+18
-0
lines changed
  • datafusion/functions-aggregate-common/src

1 file changed

+18
-0
lines changed

datafusion/functions-aggregate-common/src/tdigest.rs

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,10 @@ impl TDigest {
434434
return 0.0;
435435
}
436436

437+
// No compression happened since each centroid = one data point, so use exact percentile instead of interpolation
438+
if self.count as usize == self.centroids.len() {
439+
return self.exact_quantile(q);
440+
}
437441
let rank = q * self.count;
438442

439443
let mut pos: usize;
@@ -509,6 +513,20 @@ impl TDigest {
509513
Self::clamp(value, min, max)
510514
}
511515

516+
fn exact_quantile(&self, q: f64) -> f64 {
517+
if q <= 0.0 {
518+
return self.min();
519+
}
520+
if q >= 1.0 {
521+
return self.max();
522+
}
523+
524+
let n = self.centroids.len();
525+
let idx = (q * n as f64).ceil() as usize;
526+
let idx = idx.saturating_sub(1).min(n - 1);
527+
self.centroids[idx].mean()
528+
}
529+
512530
/// This method decomposes the [`TDigest`] and its [`Centroid`] instances
513531
/// into a series of primitive scalar values.
514532
///

0 commit comments

Comments
 (0)