@@ -51,7 +51,7 @@ trait AggregateFunctions {
5151 /** Compute the cellwise/local count of NoData cells for all Tiles in a column. */
5252 def rf_agg_local_no_data_cells (tile : Column ): TypedColumn [Any , Tile ] = LocalCountAggregate .LocalNoDataCellsUDAF (tile)
5353
54- /** Compute the full column aggregate floating point histogram. */
54+ /** Compute the approximate aggregate floating point histogram using a streaming algorithm, with the default of 80 buckets . */
5555 def rf_agg_approx_histogram (tile : Column ): TypedColumn [Any , CellHistogram ] = HistogramAggregate (tile)
5656
5757 /** Compute the approximate aggregate floating point histogram using a streaming algorithm, with the given number of buckets. */
@@ -60,6 +60,23 @@ trait AggregateFunctions {
6060 HistogramAggregate (col, numBuckets)
6161 }
6262
63+ /**
64+ * Calculates the approximate quantiles of a tile column of a DataFrame.
65+ * @param tile tile column to extract cells from.
66+ * @param probabilities a list of quantile probabilities
67+ * Each number must belong to [0, 1].
68+ * For example 0 is the minimum, 0.5 is the median, 1 is the maximum.
69+ * @param relativeError The relative target precision to achieve (greater than or equal to 0).
70+ * @return the approximate quantiles at the given probabilities of each column
71+ */
72+ def rf_agg_approx_quantiles (
73+ tile : Column ,
74+ probabilities : Seq [Double ],
75+ relativeError : Double = 0.00001 ): TypedColumn [Any , Seq [Double ]] = {
76+ require(probabilities.nonEmpty, " at least one quantile probability is required" )
77+ ApproxCellQuantilesAggregate (tile, probabilities, relativeError)
78+ }
79+
6380 /** Compute the full column aggregate floating point statistics. */
6481 def rf_agg_stats (tile : Column ): TypedColumn [Any , CellStatistics ] = CellStatsAggregate (tile)
6582
0 commit comments