Skip to content

Commit f7bd4f4

Browse files
mch2claude
andauthored
Adding sweep of MATH scalar functions to analytics-engine. (opensearch-project#21520)
* Adding sweep of MATH scalar functions. math scalar functions — 32 ITs covering ABS, ACOS, ASIN, ATAN, ATAN2, CBRT, CEIL, COS, COSH, COT, DEGREES, E, EXP, EXPM1, FLOOR, LN, LOG, LOG10, LOG2, PI, POWER, RADIANS, RAND, ROUND, SCALAR_MAX, SCALAR_MIN, SIGN, SIN, SINH, TAN, TRUNCATE — pushed down through analytics-engine → Substrait → DataFusion. Rebased on upstream/main with opensearch-project#21476 landed (AbstractNameMappingAdapter + YEAR / CONVERT_TZ / UNIX_TIMESTAMP). Conflicts resolved by union in: - ScalarFunction.java (math enum entries) - DataFusionAnalyticsBackendPlugin.java (STANDARD_PROJECT_OPS + scalarFunctionAdapters map) - DataFusionFragmentConvertor.java (ADDITIONAL_SCALAR_SIGS) - opensearch_scalar_functions.yaml (cbrt / cot / pi / random / round / signum / trunc signatures) Migrated SCALAR_MAX / SCALAR_MIN / SIGN off the local RewriteOperatorAdapter onto the shared AbstractNameMappingAdapter from opensearch-project#21476. Signed-off-by: Marc Handalian <marc.handalian@gmail.com> Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com> * fix compile Signed-off-by: Marc Handalian <marc.handalian@gmail.com> --------- Signed-off-by: Marc Handalian <marc.handalian@gmail.com> Co-authored-by: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 65bfc20 commit f7bd4f4

18 files changed

Lines changed: 1429 additions & 7 deletions

File tree

sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/AbstractNameMappingAdapter.java

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,16 @@ public RexNode adapt(RexCall original, List<FieldStorageInfo> fieldStorage, RelO
8181
// LogicalProject.create together with the cached rowType, and
8282
// Project.isValid's compatibleTypes check throws an AssertionError that
8383
// breaks fragment conversion.
84+
//
85+
// Exception: polymorphic PPL UDFs (e.g. SCALAR_MAX, SCALAR_MIN) declare
86+
// their return type as SqlTypeName.ANY because they accept heterogeneous
87+
// operand shapes. Substrait cannot serialise ANY, so fall back to the
88+
// target operator's own return-type inference — the result will be a
89+
// concrete type derived from operands (DOUBLE for GREATEST(DOUBLE, DOUBLE),
90+
// etc.) which Substrait can serialise.
91+
if (original.getType().getSqlTypeName() == SqlTypeName.ANY) {
92+
return rexBuilder.makeCall(targetOperator, operands);
93+
}
8494
return rexBuilder.makeCall(original.getType(), targetOperator, operands);
8595
}
8696

sandbox/libs/analytics-framework/src/main/java/org/opensearch/analytics/spi/ScalarFunction.java

Lines changed: 28 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,9 +77,36 @@ public enum ScalarFunction {
7777
DIVIDE(Category.MATH, SqlKind.DIVIDE),
7878
MOD(Category.MATH, SqlKind.MOD),
7979
ABS(Category.MATH, SqlKind.OTHER_FUNCTION),
80-
SIN(Category.MATH, SqlKind.OTHER_FUNCTION),
80+
ACOS(Category.MATH, SqlKind.OTHER_FUNCTION),
81+
ASIN(Category.MATH, SqlKind.OTHER_FUNCTION),
82+
ATAN(Category.MATH, SqlKind.OTHER_FUNCTION),
83+
ATAN2(Category.MATH, SqlKind.OTHER_FUNCTION),
84+
CBRT(Category.MATH, SqlKind.OTHER_FUNCTION),
8185
CEIL(Category.MATH, SqlKind.CEIL),
86+
COS(Category.MATH, SqlKind.OTHER_FUNCTION),
87+
COSH(Category.MATH, SqlKind.OTHER_FUNCTION),
88+
COT(Category.MATH, SqlKind.OTHER_FUNCTION),
89+
DEGREES(Category.MATH, SqlKind.OTHER_FUNCTION),
90+
E(Category.MATH, SqlKind.OTHER_FUNCTION),
91+
EXP(Category.MATH, SqlKind.OTHER_FUNCTION),
92+
EXPM1(Category.MATH, SqlKind.OTHER_FUNCTION),
8293
FLOOR(Category.MATH, SqlKind.FLOOR),
94+
LN(Category.MATH, SqlKind.OTHER_FUNCTION),
95+
LOG(Category.MATH, SqlKind.OTHER_FUNCTION),
96+
LOG10(Category.MATH, SqlKind.OTHER_FUNCTION),
97+
LOG2(Category.MATH, SqlKind.OTHER_FUNCTION),
98+
PI(Category.MATH, SqlKind.OTHER_FUNCTION),
99+
POWER(Category.MATH, SqlKind.OTHER_FUNCTION),
100+
RADIANS(Category.MATH, SqlKind.OTHER_FUNCTION),
101+
RAND(Category.MATH, SqlKind.OTHER_FUNCTION),
102+
ROUND(Category.MATH, SqlKind.OTHER_FUNCTION),
103+
SCALAR_MAX(Category.MATH, SqlKind.OTHER_FUNCTION),
104+
SCALAR_MIN(Category.MATH, SqlKind.OTHER_FUNCTION),
105+
SIGN(Category.MATH, SqlKind.OTHER_FUNCTION),
106+
SIN(Category.MATH, SqlKind.OTHER_FUNCTION),
107+
SINH(Category.MATH, SqlKind.OTHER_FUNCTION),
108+
TAN(Category.MATH, SqlKind.OTHER_FUNCTION),
109+
TRUNCATE(Category.MATH, SqlKind.OTHER_FUNCTION),
83110

84111
// ── Cast / type ──────────────────────────────────────────────────
85112
CAST(Category.SCALAR, SqlKind.CAST),
Lines changed: 153 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,153 @@
1+
/*
2+
* SPDX-License-Identifier: Apache-2.0
3+
*
4+
* The OpenSearch Contributors require contributions made to
5+
* this file be licensed under the Apache-2.0 license or a
6+
* compatible open source license.
7+
*/
8+
9+
package org.opensearch.analytics.spi;
10+
11+
import org.apache.calcite.jdbc.JavaTypeFactoryImpl;
12+
import org.apache.calcite.plan.RelOptCluster;
13+
import org.apache.calcite.plan.volcano.VolcanoPlanner;
14+
import org.apache.calcite.rel.type.RelDataType;
15+
import org.apache.calcite.rel.type.RelDataTypeFactory;
16+
import org.apache.calcite.rex.RexBuilder;
17+
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.rex.RexLiteral;
19+
import org.apache.calcite.rex.RexNode;
20+
import org.apache.calcite.sql.SqlFunction;
21+
import org.apache.calcite.sql.SqlFunctionCategory;
22+
import org.apache.calcite.sql.SqlIdentifier;
23+
import org.apache.calcite.sql.SqlKind;
24+
import org.apache.calcite.sql.fun.SqlLibraryOperators;
25+
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
26+
import org.apache.calcite.sql.parser.SqlParserPos;
27+
import org.apache.calcite.sql.type.OperandTypes;
28+
import org.apache.calcite.sql.type.ReturnTypes;
29+
import org.apache.calcite.sql.type.SqlTypeName;
30+
import org.apache.calcite.sql.validate.SqlUserDefinedFunction;
31+
import org.opensearch.test.OpenSearchTestCase;
32+
33+
import java.util.List;
34+
35+
/**
36+
* Unit tests for {@link AbstractNameMappingAdapter}. Covers the basic rename path, the
37+
* prepend-literal form, and — most importantly — the {@link SqlTypeName#ANY} fallback
38+
* that kicks in when the incoming PPL UDF declares an indeterminate return type (e.g.
39+
* PPL's {@code SCALAR_MAX} / {@code SCALAR_MIN}). Without the fallback, Substrait
40+
* serialisation fails with {@code Unable to convert the type ANY}.
41+
*/
42+
public class AbstractNameMappingAdapterTests extends OpenSearchTestCase {
43+
44+
private final RelDataTypeFactory typeFactory = new JavaTypeFactoryImpl();
45+
private final RexBuilder rexBuilder = new RexBuilder(typeFactory);
46+
private final RelOptCluster cluster = RelOptCluster.create(new VolcanoPlanner(), rexBuilder);
47+
private final RelDataType doubleType = typeFactory.createSqlType(SqlTypeName.DOUBLE);
48+
49+
/** Minimal concrete subclass for tests — pure rename, no prepend/append. */
50+
private static final class TestRenameAdapter extends AbstractNameMappingAdapter {
51+
TestRenameAdapter() {
52+
super(SqlLibraryOperators.GREATEST, List.of(), List.of());
53+
}
54+
}
55+
56+
private SqlUserDefinedFunction pplUdf(String name, RelDataType returnType) {
57+
return new SqlUserDefinedFunction(
58+
new SqlIdentifier(name, SqlParserPos.ZERO),
59+
SqlKind.OTHER_FUNCTION,
60+
opBinding -> returnType,
61+
null,
62+
null,
63+
null
64+
);
65+
}
66+
67+
public void testBasicRename() {
68+
SqlUserDefinedFunction udf = pplUdf("SCALAR_MAX", doubleType);
69+
RexNode a = rexBuilder.makeInputRef(doubleType, 0);
70+
RexNode b = rexBuilder.makeInputRef(doubleType, 1);
71+
RexCall original = (RexCall) rexBuilder.makeCall(udf, List.of(a, b));
72+
73+
RexNode adapted = new TestRenameAdapter().adapt(original, List.of(), cluster);
74+
75+
assertTrue(adapted instanceof RexCall);
76+
RexCall adaptedCall = (RexCall) adapted;
77+
assertSame(SqlLibraryOperators.GREATEST, adaptedCall.getOperator());
78+
assertEquals(2, adaptedCall.getOperands().size());
79+
assertSame(a, adaptedCall.getOperands().get(0));
80+
assertSame(b, adaptedCall.getOperands().get(1));
81+
assertSame("DOUBLE return type must be preserved", SqlTypeName.DOUBLE, adaptedCall.getType().getSqlTypeName());
82+
}
83+
84+
public void testPrependLiteralOperand() {
85+
SqlFunction yearUdf = new SqlFunction(
86+
"YEAR",
87+
SqlKind.OTHER_FUNCTION,
88+
ReturnTypes.BIGINT_NULLABLE,
89+
null,
90+
OperandTypes.ANY,
91+
SqlFunctionCategory.TIMEDATE
92+
);
93+
RexNode ts = rexBuilder.makeInputRef(typeFactory.createSqlType(SqlTypeName.TIMESTAMP), 0);
94+
RexCall original = (RexCall) rexBuilder.makeCall(yearUdf, List.of(ts));
95+
96+
AbstractNameMappingAdapter adapter = new AbstractNameMappingAdapter(SqlLibraryOperators.DATE_PART, List.of("year"), List.of()) {
97+
};
98+
RexNode adapted = adapter.adapt(original, List.of(), cluster);
99+
100+
RexCall adaptedCall = (RexCall) adapted;
101+
assertSame(SqlLibraryOperators.DATE_PART, adaptedCall.getOperator());
102+
assertEquals(2, adaptedCall.getOperands().size());
103+
assertTrue(adaptedCall.getOperands().get(0) instanceof RexLiteral);
104+
assertEquals("year", ((RexLiteral) adaptedCall.getOperands().get(0)).getValueAs(String.class));
105+
assertSame(ts, adaptedCall.getOperands().get(1));
106+
}
107+
108+
/**
109+
* PPL's {@code SCALAR_MAX} / {@code SCALAR_MIN} declare their return type as
110+
* {@link SqlTypeName#ANY}. Substrait cannot serialise ANY; the adapter must fall back to
111+
* letting the target operator's own return-type inference run so the rewritten call
112+
* carries a concrete type derived from the operands.
113+
*/
114+
public void testAdaptFallsBackToTargetInferenceForAnyReturnType() {
115+
RelDataType anyType = typeFactory.createSqlType(SqlTypeName.ANY);
116+
SqlUserDefinedFunction udf = pplUdf("SCALAR_MAX", anyType);
117+
RexNode a = rexBuilder.makeInputRef(doubleType, 0);
118+
RexNode b = rexBuilder.makeInputRef(doubleType, 1);
119+
RexNode c = rexBuilder.makeInputRef(doubleType, 2);
120+
RexCall original = (RexCall) rexBuilder.makeCall(udf, List.of(a, b, c));
121+
assertSame("precondition: UDF return type must be ANY", SqlTypeName.ANY, original.getType().getSqlTypeName());
122+
123+
RexNode adapted = new TestRenameAdapter().adapt(original, List.of(), cluster);
124+
125+
assertTrue(adapted instanceof RexCall);
126+
RexCall adaptedCall = (RexCall) adapted;
127+
assertSame(SqlLibraryOperators.GREATEST, adaptedCall.getOperator());
128+
assertSame(
129+
"ANY return type must be replaced with a concrete operand-derived type after rewrite",
130+
SqlTypeName.DOUBLE,
131+
adaptedCall.getType().getSqlTypeName()
132+
);
133+
}
134+
135+
/**
136+
* Pass-through for SIGN — a standard Calcite operator whose return type is already
137+
* concrete. The adapter still rewrites to the target operator (SignumFunction lives in
138+
* the backend; here we use SqlStdOperatorTable.SQRT as a stand-in target with a
139+
* concrete return type inferrer) and the preserved DOUBLE type proves the happy path.
140+
*/
141+
public void testSignLikeRewritePreservesConcreteType() {
142+
RexNode arg = rexBuilder.makeInputRef(doubleType, 0);
143+
RexCall original = (RexCall) rexBuilder.makeCall(SqlStdOperatorTable.SIGN, List.of(arg));
144+
145+
AbstractNameMappingAdapter adapter = new AbstractNameMappingAdapter(SqlStdOperatorTable.SQRT, List.of(), List.of()) {
146+
};
147+
RexNode adapted = adapter.adapt(original, List.of(), cluster);
148+
149+
RexCall adaptedCall = (RexCall) adapted;
150+
assertSame(SqlStdOperatorTable.SQRT, adaptedCall.getOperator());
151+
assertSame(SqlTypeName.DOUBLE, adaptedCall.getType().getSqlTypeName());
152+
}
153+
}

sandbox/libs/analytics-framework/src/test/java/org/opensearch/analytics/spi/ScalarFunctionTests.java

Lines changed: 98 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,12 @@
99
package org.opensearch.analytics.spi;
1010

1111
import org.apache.calcite.sql.SqlKind;
12+
import org.apache.calcite.sql.fun.SqlLibraryOperators;
1213
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
1314
import org.opensearch.test.OpenSearchTestCase;
1415

1516
import java.util.EnumMap;
17+
import java.util.List;
1618
import java.util.Map;
1719

1820
/**
@@ -101,4 +103,100 @@ public void testFromSqlOperatorReturnsNullForUnknownFunction() {
101103
// both resolution paths miss and the resolver returns null instead of throwing.
102104
assertNull(ScalarFunction.fromSqlOperatorWithFallback(SqlStdOperatorTable.UNARY_MINUS));
103105
}
106+
107+
// ── Group G math functions: name-based lookup via fromSqlFunction ──────────
108+
// PPL emits these as Calcite SqlBasicFunction calls whose name matches the
109+
// enum constant. STANDARD_PROJECT_OPS registration (and adapter dispatch)
110+
// depends on fromSqlFunction resolving them by name, so guard every entry.
111+
112+
public void testMathFunctionsResolveByName() {
113+
assertSame(ScalarFunction.ABS, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ABS));
114+
assertSame(ScalarFunction.ACOS, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ACOS));
115+
assertSame(ScalarFunction.ASIN, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ASIN));
116+
assertSame(ScalarFunction.ATAN, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ATAN));
117+
assertSame(ScalarFunction.ATAN2, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ATAN2));
118+
assertSame(ScalarFunction.CBRT, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.CBRT));
119+
assertSame(ScalarFunction.COS, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.COS));
120+
assertSame(ScalarFunction.COT, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.COT));
121+
assertSame(ScalarFunction.DEGREES, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.DEGREES));
122+
assertSame(ScalarFunction.EXP, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.EXP));
123+
assertSame(ScalarFunction.LN, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.LN));
124+
// 2-arg log: PPL emits SqlLibraryOperators.LOG(x, base); 1-arg log(x) is pre-lowered to
125+
// LOG(x, e) by PPLFuncImpTable, so this single LOG entry covers both arities.
126+
assertSame(ScalarFunction.LOG, ScalarFunction.fromSqlFunction(SqlLibraryOperators.LOG));
127+
assertSame(ScalarFunction.LOG10, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.LOG10));
128+
assertSame(ScalarFunction.LOG2, ScalarFunction.fromSqlFunction(SqlLibraryOperators.LOG2));
129+
assertSame(ScalarFunction.PI, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.PI));
130+
assertSame(ScalarFunction.POWER, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.POWER));
131+
assertSame(ScalarFunction.RADIANS, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.RADIANS));
132+
assertSame(ScalarFunction.RAND, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.RAND));
133+
assertSame(ScalarFunction.ROUND, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.ROUND));
134+
assertSame(ScalarFunction.SIGN, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.SIGN));
135+
assertSame(ScalarFunction.TAN, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.TAN));
136+
assertSame(ScalarFunction.TRUNCATE, ScalarFunction.fromSqlFunction(SqlStdOperatorTable.TRUNCATE));
137+
}
138+
139+
/** PPL's SCALAR_MAX / SCALAR_MIN UDFs resolve by the UDF's declared name — these are the
140+
* PPLBuiltinOperators variants that DataFusionAnalyticsBackendPlugin binds to
141+
* AbstractNameMappingAdapter instances targeting SqlLibraryOperators.GREATEST / LEAST. */
142+
public void testScalarMaxMinResolveByName() {
143+
assertSame(ScalarFunction.SCALAR_MAX, ScalarFunction.valueOf("SCALAR_MAX"));
144+
assertSame(ScalarFunction.SCALAR_MIN, ScalarFunction.valueOf("SCALAR_MIN"));
145+
}
146+
147+
/**
148+
* Tier-2 adapter targets: enum entries exist for PPL UDFs even though the
149+
* upstream isthmus SCALAR_SIGS only recognises SqlLibraryOperators variants.
150+
* The DataFusion adapter rewrites the UDF call to the Calcite-library
151+
* operator before Substrait conversion, but the name-based lookup here
152+
* must still succeed so STANDARD_PROJECT_OPS and adapter dispatch can run.
153+
*/
154+
public void testTier2AdapterTargetFunctionsExistByName() {
155+
// PPL's COSH/SINH UDFs have getName() = "COSH"/"SINH"; valueOf succeeds.
156+
assertSame(ScalarFunction.COSH, ScalarFunction.valueOf("COSH"));
157+
assertSame(ScalarFunction.SINH, ScalarFunction.valueOf("SINH"));
158+
// PPL's E() and EXPM1 UDFs likewise resolve by name.
159+
assertSame(ScalarFunction.E, ScalarFunction.valueOf("E"));
160+
assertSame(ScalarFunction.EXPM1, ScalarFunction.valueOf("EXPM1"));
161+
}
162+
163+
/** Category hygiene: every math enum constant belongs to the MATH category. */
164+
public void testMathFunctionsHaveMathCategory() {
165+
List<ScalarFunction> mathFuncs = List.of(
166+
ScalarFunction.ABS,
167+
ScalarFunction.ACOS,
168+
ScalarFunction.ASIN,
169+
ScalarFunction.ATAN,
170+
ScalarFunction.ATAN2,
171+
ScalarFunction.CBRT,
172+
ScalarFunction.CEIL,
173+
ScalarFunction.COS,
174+
ScalarFunction.COSH,
175+
ScalarFunction.COT,
176+
ScalarFunction.DEGREES,
177+
ScalarFunction.E,
178+
ScalarFunction.EXP,
179+
ScalarFunction.EXPM1,
180+
ScalarFunction.FLOOR,
181+
ScalarFunction.LN,
182+
ScalarFunction.LOG,
183+
ScalarFunction.LOG10,
184+
ScalarFunction.LOG2,
185+
ScalarFunction.PI,
186+
ScalarFunction.POWER,
187+
ScalarFunction.RADIANS,
188+
ScalarFunction.RAND,
189+
ScalarFunction.ROUND,
190+
ScalarFunction.SCALAR_MAX,
191+
ScalarFunction.SCALAR_MIN,
192+
ScalarFunction.SIGN,
193+
ScalarFunction.SIN,
194+
ScalarFunction.SINH,
195+
ScalarFunction.TAN,
196+
ScalarFunction.TRUNCATE
197+
);
198+
for (ScalarFunction func : mathFuncs) {
199+
assertSame("expected MATH category for " + func, ScalarFunction.Category.MATH, func.getCategory());
200+
}
201+
}
104202
}

0 commit comments

Comments
 (0)