Skip to content

Commit 1841139

Browse files
authored
Add DISTINCT_COUNT_APPROX function (opensearch-project#3654)
* add approx distinct count Signed-off-by: xinyual <xinyual@amazon.com> * add IT Signed-off-by: xinyual <xinyual@amazon.com> * fix IT name Signed-off-by: xinyual <xinyual@amazon.com> * change parser logic Signed-off-by: xinyual <xinyual@amazon.com> * apply spotless Signed-off-by: xinyual <xinyual@amazon.com> * add doc Signed-off-by: xinyual <xinyual@amazon.com> * add java doc Signed-off-by: xinyual <xinyual@amazon.com> * fix doc test Signed-off-by: xinyual <xinyual@amazon.com> * revert useless change Signed-off-by: xinyual <xinyual@amazon.com> * add version to doc Signed-off-by: xinyual <xinyual@amazon.com> * change to use opensearch core code Signed-off-by: xinyual <xinyual@amazon.com> * remove dependency Signed-off-by: xinyual <xinyual@amazon.com> * refactor code to move function to opensearch package Signed-off-by: xinyual <xinyual@amazon.com> * refactor code Signed-off-by: xinyual <xinyual@amazon.com> * add Doc and change to concurrency map Signed-off-by: xinyual <xinyual@amazon.com> --------- Signed-off-by: xinyual <xinyual@amazon.com>
1 parent 313dc0f commit 1841139

11 files changed

Lines changed: 306 additions & 76 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 2 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,6 @@
1010
import static org.apache.calcite.rex.RexWindowBounds.UNBOUNDED_PRECEDING;
1111
import static org.apache.calcite.rex.RexWindowBounds.following;
1212
import static org.apache.calcite.rex.RexWindowBounds.preceding;
13-
import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.STDDEV_POP_NULLABLE;
14-
import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.STDDEV_SAMP_NULLABLE;
15-
import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.VAR_POP_NULLABLE;
16-
import static org.opensearch.sql.calcite.utils.CalciteToolsHelper.VAR_SAMP_NULLABLE;
17-
import static org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils.TransferUserDefinedAggFunction;
1813

1914
import com.google.common.collect.ImmutableList;
2015
import java.util.ArrayList;
@@ -25,7 +20,6 @@
2520
import org.apache.calcite.rex.RexVisitorImpl;
2621
import org.apache.calcite.rex.RexWindowBound;
2722
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
28-
import org.apache.calcite.sql.type.ReturnTypes;
2923
import org.apache.calcite.sql.type.SqlTypeName;
3024
import org.apache.calcite.tools.RelBuilder;
3125
import org.opensearch.sql.ast.AbstractNodeVisitor;
@@ -37,9 +31,8 @@
3731
import org.opensearch.sql.ast.tree.Relation;
3832
import org.opensearch.sql.ast.tree.UnresolvedPlan;
3933
import org.opensearch.sql.calcite.CalcitePlanContext;
40-
import org.opensearch.sql.calcite.udf.udaf.PercentileApproxFunction;
41-
import org.opensearch.sql.calcite.udf.udaf.TakeAggFunction;
4234
import org.opensearch.sql.expression.function.BuiltinFunctionName;
35+
import org.opensearch.sql.expression.function.PPLFuncImpTable;
4336

4437
public interface PlanUtils {
4538

@@ -240,56 +233,7 @@ static RelBuilder.AggCall makeAggCall(
240233
boolean distinct,
241234
RexNode field,
242235
List<RexNode> argList) {
243-
switch (functionName) {
244-
case MAX:
245-
return context.relBuilder.max(field);
246-
case MIN:
247-
return context.relBuilder.min(field);
248-
case AVG:
249-
return context.relBuilder.avg(distinct, null, field);
250-
case COUNT:
251-
return context.relBuilder.count(
252-
distinct, null, field == null ? ImmutableList.of() : ImmutableList.of(field));
253-
case SUM:
254-
return context.relBuilder.sum(distinct, null, field);
255-
// case MEAN:
256-
// throw new UnsupportedOperationException("MEAN is not supported in PPL");
257-
// case STDDEV:
258-
// return context.relBuilder.aggregateCall(SqlStdOperatorTable.STDDEV,
259-
// field);
260-
case VARSAMP:
261-
return context.relBuilder.aggregateCall(VAR_SAMP_NULLABLE, field);
262-
case VARPOP:
263-
return context.relBuilder.aggregateCall(VAR_POP_NULLABLE, field);
264-
case STDDEV_POP:
265-
return context.relBuilder.aggregateCall(STDDEV_POP_NULLABLE, field);
266-
case STDDEV_SAMP:
267-
return context.relBuilder.aggregateCall(STDDEV_SAMP_NULLABLE, field);
268-
// case PERCENTILE_APPROX:
269-
// return
270-
// context.relBuilder.aggregateCall(SqlStdOperatorTable.PERCENTILE_CONT, field);
271-
case TAKE:
272-
return TransferUserDefinedAggFunction(
273-
TakeAggFunction.class,
274-
"TAKE",
275-
UserDefinedFunctionUtils.getReturnTypeInferenceForArray(),
276-
List.of(field),
277-
argList,
278-
context.relBuilder);
279-
case PERCENTILE_APPROX:
280-
List<RexNode> newArgList = new ArrayList<>(argList);
281-
newArgList.add(context.rexBuilder.makeFlag(field.getType().getSqlTypeName()));
282-
return TransferUserDefinedAggFunction(
283-
PercentileApproxFunction.class,
284-
"percentile_approx",
285-
ReturnTypes.ARG0_FORCE_NULLABLE,
286-
List.of(field),
287-
newArgList,
288-
context.relBuilder);
289-
default:
290-
throw new UnsupportedOperationException(
291-
"Unexpected aggregation: " + functionName.getName().getFunctionName());
292-
}
236+
return PPLFuncImpTable.INSTANCE.resolveAgg(functionName, distinct, field, argList, context);
293237
}
294238

295239
/** Get all uniq input references from a RexNode. */

core/src/main/java/org/opensearch/sql/calcite/utils/UserDefinedFunctionUtils.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ public static RelBuilder.AggCall TransferUserDefinedAggFunction(
7676
return relBuilder.aggregateCall(sqlUDAF, addArgList);
7777
}
7878

79-
static SqlReturnTypeInference getReturnTypeInferenceForArray() {
79+
public static SqlReturnTypeInference getReturnTypeInferenceForArray() {
8080
return opBinding -> {
8181
RelDataTypeFactory typeFactory = opBinding.getTypeFactory();
8282

core/src/main/java/org/opensearch/sql/expression/function/BuiltinFunctionName.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,7 @@ public enum BuiltinFunctionName {
195195
TAKE(FunctionName.of("take")),
196196
// t-digest percentile which is used in OpenSearch core by default.
197197
PERCENTILE_APPROX(FunctionName.of("percentile_approx")),
198+
DISTINCT_COUNT_APPROX(FunctionName.of("distinct_count_approx")),
198199
// Not always an aggregation query
199200
NESTED(FunctionName.of("nested")),
200201

@@ -337,6 +338,7 @@ public enum BuiltinFunctionName {
337338
.put("take", BuiltinFunctionName.TAKE)
338339
.put("percentile", BuiltinFunctionName.PERCENTILE_APPROX)
339340
.put("percentile_approx", BuiltinFunctionName.PERCENTILE_APPROX)
341+
.put("distinct_count_approx", BuiltinFunctionName.DISTINCT_COUNT_APPROX)
340342
.build();
341343

342344
private static final Map<String, BuiltinFunctionName> WINDOW_FUNC_MAPPING =

0 commit comments

Comments
 (0)