Skip to content

Commit 0d80347

Browse files
authored
Register DISTINCT_COUNT_APPROX logical marker in PPLFuncImpTable (opensearch-project#5466)
PPL parser fails with "Cannot resolve function: DISTINCT_COUNT_APPROX" on any execution path that does not construct OpenSearchExecutionEngine (unified-query / analytics-engine / DataFusion), because the UDAF was only registered to PPLFuncImpTable.aggExternalFunctionRegistry as a side effect of that constructor. Add a logical marker class DistinctCountApproxLogicalAggFunction in core, expose it as PPLBuiltinOperators.DISTINCT_COUNT_APPROX, and register it inside PPLFuncImpTable.AggBuilder.populate() alongside other built-in aggregates. The marker has no JVM execution: init / add / result throw UnsupportedOperationException, mirroring the pattern already used by RelevanceQueryFunction.RelevanceQueryImplementor for match-family functions which similarly have no JVM semantics. Behavior on V3 path is preserved: OpenSearchExecutionEngine still registers the real HyperLogLog++ DistinctCountApproxAggFunction in aggExternalFunctionRegistry, and getImplementation() consults that external registry first, so the marker is overridden whenever the V3 constructor has run. AggregateAnalyzer continues to translate the operator to OpenSearch cardinality DSL via the BuiltinFunctionName switch which is independent of the wrapped SqlAggFunction instance. Operand metadata for the marker is null to match the existing external registration's permissive type policy and avoid introducing new type rejections that would surface as regressions in existing dc IT. Signed-off-by: Songkan Tang <songkant@amazon.com>
1 parent d5ee441 commit 0d80347

3 files changed

Lines changed: 86 additions & 0 deletions

File tree

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.udf.udaf;
7+
8+
import org.opensearch.sql.calcite.udf.UserDefinedAggFunction;
9+
10+
/**
11+
* Logical marker UDAF for {@code DISTINCT_COUNT_APPROX}. Lets the PPL parser produce a RelNode that
12+
* contains the operator without committing to a JVM execution path; backends are expected to push
13+
* it down or rewrite it before execution:
14+
*
15+
* <ul>
16+
* <li>OpenSearch V3 path: {@code OpenSearchExecutionEngine#registerOpenSearchFunctions} registers
17+
* a real HyperLogLog++ implementation in {@code PPLFuncImpTable.aggExternalFunctionRegistry},
18+
* which overrides this marker (external registry has lookup precedence in {@code
19+
* getImplementation}). {@code AggregateAnalyzer} then translates the operator to OpenSearch
20+
* cardinality DSL.
21+
* <li>Unified-query / DataFusion / analytics-engine path: backend planner rewrites the RexOver to
22+
* {@code APPROX_COUNT_DISTINCT} (Calcite stdop) before substrait emission; the DataFusion
23+
* substrait reader consumes that natively.
24+
* </ul>
25+
*
26+
* <p>This class deliberately throws on every method. Reaching a method body means a backend either
27+
* failed to push down or did not register an adapter — that is a configuration bug, not a runtime
28+
* fallback. {@code RelevanceQueryFunction.RelevanceQueryImplementor} (used by {@code match}, {@code
29+
* match_phrase}, etc.) follows the same pattern for relevance search functions that have no JVM
30+
* execution semantics.
31+
*/
32+
public class DistinctCountApproxLogicalAggFunction
33+
implements UserDefinedAggFunction<DistinctCountApproxLogicalAggFunction.MarkerAccumulator> {
34+
35+
private static final String NOT_EXECUTABLE =
36+
"DISTINCT_COUNT_APPROX logical marker reached Enumerable execution; "
37+
+ "an engine-specific implementation must be registered or rewritten before execution.";
38+
39+
@Override
40+
public MarkerAccumulator init() {
41+
throw new UnsupportedOperationException(NOT_EXECUTABLE);
42+
}
43+
44+
@Override
45+
public MarkerAccumulator add(MarkerAccumulator acc, Object... values) {
46+
throw new UnsupportedOperationException(NOT_EXECUTABLE);
47+
}
48+
49+
@Override
50+
public Object result(MarkerAccumulator accumulator) {
51+
throw new UnsupportedOperationException(NOT_EXECUTABLE);
52+
}
53+
54+
/** Placeholder accumulator. Never actually constructed because {@link #init()} throws. */
55+
public static class MarkerAccumulator implements Accumulator {
56+
@Override
57+
public Object value(Object... argList) {
58+
throw new UnsupportedOperationException(NOT_EXECUTABLE);
59+
}
60+
}
61+
}

core/src/main/java/org/opensearch/sql/expression/function/PPLBuiltinOperators.java

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
import org.apache.calcite.sql.type.SqlTypeTransforms;
3030
import org.apache.calcite.sql.util.ReflectiveSqlOperatorTable;
3131
import org.apache.calcite.util.BuiltInMethod;
32+
import org.opensearch.sql.calcite.udf.udaf.DistinctCountApproxLogicalAggFunction;
3233
import org.opensearch.sql.calcite.udf.udaf.FirstAggFunction;
3334
import org.opensearch.sql.calcite.udf.udaf.LastAggFunction;
3435
import org.opensearch.sql.calcite.udf.udaf.ListAggFunction;
@@ -508,6 +509,22 @@ public class PPLBuiltinOperators extends ReflectiveSqlOperatorTable {
508509
PPLReturnTypes.STRING_ARRAY,
509510
PPLOperandTypes.ANY_SCALAR_OPTIONAL_INTEGER);
510511

512+
/**
513+
* Logical marker for {@code DISTINCT_COUNT_APPROX} (also exposed as {@code dc} and {@code
514+
* distinct_count} aliases). PPL parser uses this to produce a RelNode; backends override or
515+
* rewrite it before execution. {@code OpenSearchExecutionEngine} registers a real HyperLogLog++
516+
* implementation in the external registry of {@code PPLFuncImpTable}, which has lookup precedence
517+
* and serves the OpenSearch V3 path. Other backends (DataFusion / analytics-engine) rewrite the
518+
* operator on their own. Operand metadata is {@code null} to match the existing external
519+
* registration's permissive policy and avoid introducing new type rejections.
520+
*/
521+
public static final SqlAggFunction DISTINCT_COUNT_APPROX =
522+
createUserDefinedAggFunction(
523+
DistinctCountApproxLogicalAggFunction.class,
524+
"DISTINCT_COUNT_APPROX",
525+
ReturnTypes.BIGINT_FORCE_NULLABLE,
526+
null);
527+
511528
public static final SqlOperator ENHANCED_COALESCE =
512529
new EnhancedCoalesceFunction().toUDF("COALESCE");
513530

core/src/main/java/org/opensearch/sql/expression/function/PPLFuncImpTable.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_WEEK;
6161
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DAY_OF_YEAR;
6262
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DEGREES;
63+
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DISTINCT_COUNT_APPROX;
6364
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDE;
6465
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DIVIDEFUNCTION;
6566
import static org.opensearch.sql.expression.function.BuiltinFunctionName.DUR2SEC;
@@ -1426,6 +1427,13 @@ void populate() {
14261427
registerOperator(INTERNAL_PATTERN, PPLBuiltinOperators.INTERNAL_PATTERN);
14271428
registerOperator(LIST, PPLBuiltinOperators.LIST);
14281429
registerOperator(VALUES, PPLBuiltinOperators.VALUES);
1430+
// Logical marker so PPL parser succeeds on dc()/distinct_count()/distinct_count_approx()
1431+
// regardless of which execution path the query takes. OpenSearchExecutionEngine registers
1432+
// a real HyperLogLog++ implementation in aggExternalFunctionRegistry which overrides this
1433+
// marker via the external-first lookup precedence in getImplementation(). Other backends
1434+
// (DataFusion / analytics-engine) rewrite the operator before substrait emission and never
1435+
// execute the marker.
1436+
registerOperator(DISTINCT_COUNT_APPROX, PPLBuiltinOperators.DISTINCT_COUNT_APPROX);
14291437

14301438
register(
14311439
AVG,

0 commit comments

Comments
 (0)