Skip to content

Commit 3a9475f

Browse files
authored
Switch percentile implementation to MergingDigest to align with OpenSearch (opensearch-project#3698)
* Switch percentile implementation to MergingDigest to align with OpenSearch Signed-off-by: Lantao Jin <ltjin@amazon.com> * fix IT Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 3d2c261 commit 3a9475f

6 files changed

Lines changed: 24 additions & 12 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/udf/udaf/PercentileApproxFunction.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
package org.opensearch.sql.calcite.udf.udaf;
77

8-
import com.tdunning.math.stats.AVLTreeDigest;
8+
import com.tdunning.math.stats.MergingDigest;
99
import java.util.ArrayList;
1010
import java.util.List;
1111
import java.util.Objects;
@@ -85,7 +85,7 @@ public void evaluate(double value) {
8585
public Object value(Object... argList) {
8686
double percent = (double) argList[1];
8787
double compression = (double) argList[0];
88-
AVLTreeDigest tree = new AVLTreeDigest(compression);
88+
MergingDigest tree = new MergingDigest(compression);
8989
for (Number num : candidate) {
9090
tree.add(num.doubleValue());
9191
}

core/src/main/java/org/opensearch/sql/expression/aggregation/PercentileApproximateAggregator.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@
88
import static org.opensearch.sql.data.model.ExprValueUtils.doubleValue;
99
import static org.opensearch.sql.utils.ExpressionUtils.format;
1010

11-
import com.tdunning.math.stats.AVLTreeDigest;
11+
import com.tdunning.math.stats.MergingDigest;
1212
import java.util.List;
1313
import org.opensearch.sql.common.utils.StringUtils;
1414
import org.opensearch.sql.data.model.ExprNullValue;
@@ -56,11 +56,11 @@ public String toString() {
5656
}
5757

5858
/**
59-
* PercentileApproximateState is used to store the AVLTreeDigest state for percentile estimation.
59+
* PercentileApproximateState is used to store the MergingDigest state for percentile estimation.
6060
*/
61-
protected static class PercentileApproximateState extends AVLTreeDigest
61+
protected static class PercentileApproximateState extends MergingDigest
6262
implements AggregationState {
63-
// The compression level for the AVLTreeDigest, keep the same default value as OpenSearch core.
63+
// The compression level for the MergingDigest, keep the same default value as OpenSearch core.
6464
public static final double DEFAULT_COMPRESSION = 100.0;
6565
private final double percent;
6666

core/src/test/java/org/opensearch/sql/expression/aggregation/PercentileApproxAggregatorTest.java

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -68,21 +68,21 @@ public void test_percentile_field_expression_with_user_defined_compression() {
6868
aggregation(
6969
DSL.percentile(DSL.ref("integer_value", INTEGER), DSL.literal(50), DSL.literal(0.1)),
7070
tuples);
71-
assertEquals(2.5, result.value());
71+
assertEquals(3.0, result.value());
7272
result =
7373
aggregation(
7474
DSL.percentile(DSL.ref("long_value", LONG), DSL.literal(50), DSL.literal(0.1)), tuples);
75-
assertEquals(2.5, result.value());
75+
assertEquals(3.0, result.value());
7676
result =
7777
aggregation(
7878
DSL.percentile(DSL.ref("double_value", DOUBLE), DSL.literal(50), DSL.literal(0.1)),
7979
tuples);
80-
assertEquals(2.5, result.value());
80+
assertEquals(3.0, result.value());
8181
result =
8282
aggregation(
8383
DSL.percentile(DSL.ref("float_value", FLOAT), DSL.literal(50), DSL.literal(0.1)),
8484
tuples);
85-
assertEquals(2.5, result.value());
85+
assertEquals(3.0, result.value());
8686
}
8787

8888
@Test

docs/user/dql/aggregations.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -378,6 +378,8 @@ Description
378378

379379
Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Returns the approximate percentile value of `expr` at the specified percentage. `percent` must be a constant between 0 and 100.
380380

381+
Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link <https://github.com/opensearch-project/OpenSearch/issues/18122>`_.
382+
381383
Example::
382384

383385
os> SELECT gender, percentile(age, 90) as p90 FROM accounts GROUP BY gender;

docs/user/ppl/cmd/stats.rst

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,8 @@ Usage: PERCENTILE(expr, percent) or PERCENTILE_APPROX(expr, percent). Return the
269269

270270
* percent: The number must be a constant between 0 and 100.
271271

272+
Note: From 3.1.0, the percentile implementation is switched to MergingDigest from AVLTreeDigest. Ref `issue link <https://github.com/opensearch-project/OpenSearch/issues/18122>`_.
273+
272274
Example::
273275

274276
os> source=accounts | stats percentile(age, 90) by gender;

integ-test/src/test/java/org/opensearch/sql/ppl/StatsCommandIT.java

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -436,9 +436,17 @@ public void testStatsPercentileWithNull() throws IOException {
436436
public void testStatsPercentileWithCompression() throws IOException {
437437
JSONObject response =
438438
executeQuery(
439-
String.format("source=%s | stats percentile(balance, 50, 1)", TEST_INDEX_BANK));
440-
verifySchema(response, schema("percentile(balance, 50, 1)", null, "bigint"));
439+
String.format("source=%s | stats percentile(balance, 50, 20)", TEST_INDEX_BANK));
440+
verifySchema(response, schema("percentile(balance, 50, 20)", null, "bigint"));
441441
verifyDataRows(response, rows(32838));
442+
443+
// disable pushdown by adding a eval command
444+
JSONObject responsePushdownDisabled =
445+
executeQuery(
446+
String.format(
447+
"source=%s | eval a = 1 | stats percentile(balance, 50, 20)", TEST_INDEX_BANK));
448+
verifySchema(responsePushdownDisabled, schema("percentile(balance, 50, 20)", null, "bigint"));
449+
verifyDataRows(responsePushdownDisabled, rows(32838));
442450
}
443451

444452
@Test

0 commit comments

Comments
 (0)