Skip to content

Commit 4362926

Browse files
committed
Skip the residual eval max/min divergences on the analytics route
With the ANY-rescue fix, five of CalcitePPLEvalMaxMinFunctionIT's eight tests now pass on the analytics route. Skip the three that still diverge via the assumeNotAnalytics(...) registry plus matching excludeTestsMatching entries: - EVAL_MAX_MIN_MIXED_TYPES: mixed numeric+string operands throw 'Cannot infer return type for GREATEST' (the DataFusion GREATEST/LEAST can't unify heterogeneous operand types) — testEvalMax/MinNumericAndString. - EVAL_MAX_MIN_INT_WIDENING: max() over int operands reports bigint on the route (DataFusion widens integers to Int64) where v2/Calcite reports int — testEvalMaxNumeric. Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLEvalMaxMinFunctionIT: 0/8 -> 5/8 pass, 3 excluded, 0 fail v2/Calcite route unchanged: 8/8 pass. Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent df3f6cf commit 4362926

3 files changed

Lines changed: 37 additions & 30 deletions

File tree

integ-test/build.gradle

Lines changed: 8 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1149,19 +1149,16 @@ task integTestRemote(type: RestIntegTestTask) {
11491149
excludeTestsMatching '*CalcitePPLInSubqueryIT.testSubsearchMaxOut'
11501150

11511151
// === Excludes: CalcitePPLEvalMaxMinFunctionIT route divergences ===
1152-
// Every test exercises eval max()/min() (lowered to GREATEST/LEAST), which has no
1153-
// return-type inference on the AE route: homogeneous operands type the result column
1154-
// 'undefined' (values are correct) and mixed numeric+string operands throw
1155-
// 'Cannot infer return type for GREATEST'. Each test also carries an in-test
1156-
// assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE) (see AnalyticsRouteLimitation).
1157-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumeric'
1158-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxString'
1152+
// The AnalyticsExecutionEngine ANY-rescue (this PR) fixes the homogeneous max()/min()
1153+
// schema types, so most of the class now passes on the AE route. Three tests still
1154+
// diverge; each carries an in-test assumeNotAnalytics(...) (see AnalyticsRouteLimitation).
1155+
// - Mixed numeric+string operands throw 'Cannot infer return type for GREATEST' (the
1156+
// DataFusion GREATEST/LEAST can't unify heterogeneous operand types).
11591157
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumericAndString'
1160-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinNumeric'
1161-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinString'
11621158
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinNumericAndString'
1163-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxIgnoresNulls'
1164-
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinIgnoresNulls'
1159+
// - max() over int operands reports bigint on the AE route (DataFusion widens integers
1160+
// to Int64) where the v2/Calcite path reports int.
1161+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumeric'
11651162
}
11661163
}
11671164

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java

Lines changed: 8 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,8 @@
77

88
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG;
99
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NULL_MISSING;
10-
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_RETURN_TYPE;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_INT_WIDENING;
11+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_MIXED_TYPES;
1112
import static org.opensearch.sql.util.MatcherUtils.rows;
1213
import static org.opensearch.sql.util.MatcherUtils.schema;
1314
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -29,7 +30,8 @@ public void init() throws Exception {
2930

3031
@Test
3132
public void testEvalMaxNumeric() throws Exception {
32-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
33+
// max(1, 3, age) selects an int-valued result; the AE route reports it as bigint.
34+
assumeNotAnalytics(EVAL_MAX_MIN_INT_WIDENING);
3335
JSONObject result =
3436
executeQuery(
3537
String.format(
@@ -40,7 +42,6 @@ public void testEvalMaxNumeric() throws Exception {
4042

4143
@Test
4244
public void testEvalMaxString() throws Exception {
43-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
4445
JSONObject result =
4546
executeQuery(
4647
String.format(
@@ -52,7 +53,8 @@ public void testEvalMaxString() throws Exception {
5253

5354
@Test
5455
public void testEvalMaxNumericAndString() throws Exception {
55-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
56+
// Mixed numeric+string operands -> 'Cannot infer return type for GREATEST' on the AE route.
57+
assumeNotAnalytics(EVAL_MAX_MIN_MIXED_TYPES);
5658
JSONObject result =
5759
executeQuery(
5860
String.format(
@@ -66,7 +68,6 @@ public void testEvalMaxNumericAndString() throws Exception {
6668

6769
@Test
6870
public void testEvalMinNumeric() throws Exception {
69-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
7071
JSONObject result =
7172
executeQuery(
7273
String.format(
@@ -77,7 +78,6 @@ public void testEvalMinNumeric() throws Exception {
7778

7879
@Test
7980
public void testEvalMinString() throws Exception {
80-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
8181
JSONObject result =
8282
executeQuery(
8383
String.format(
@@ -89,7 +89,8 @@ public void testEvalMinString() throws Exception {
8989

9090
@Test
9191
public void testEvalMinNumericAndString() throws Exception {
92-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
92+
// Mixed numeric+string operands -> 'Cannot infer return type for GREATEST' on the AE route.
93+
assumeNotAnalytics(EVAL_MAX_MIN_MIXED_TYPES);
9394
JSONObject result =
9495
executeQuery(
9596
String.format(
@@ -103,7 +104,6 @@ public void testEvalMinNumericAndString() throws Exception {
103104

104105
@Test
105106
public void testEvalMaxIgnoresNulls() throws Exception {
106-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
107107
JSONObject result =
108108
executeQuery(
109109
String.format(
@@ -125,7 +125,6 @@ public void testEvalMaxIgnoresNulls() throws Exception {
125125

126126
@Test
127127
public void testEvalMinIgnoresNulls() throws Exception {
128-
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
129128
JSONObject result =
130129
executeQuery(
131130
String.format(

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 21 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -136,17 +136,28 @@ public enum AnalyticsRouteLimitation {
136136
+ " regardless of the cap."),
137137

138138
/**
139-
* The PPL {@code max(...)} / {@code min(...)} eval functions (lowered to {@code GREATEST} /
140-
* {@code LEAST}) diverge on the analytics-engine route. With homogeneous operands the result
141-
* column comes back typed {@code undefined} instead of the concrete type (the values are
142-
* correct), so schema assertions fail. With mixed numeric+string operands the route throws {@code
143-
* SqlValidatorException: Cannot infer return type for GREATEST}. Both stem from the route not
144-
* inferring a return type for GREATEST/LEAST; the v2/Calcite path types them correctly.
139+
* eval {@code max(...)} / {@code min(...)} over operands that mix numeric and string types (e.g.
140+
* {@code max(14, age, 'Fred', holdersName)}) throws {@code SqlValidatorException: Cannot infer
141+
* return type for GREATEST} on the analytics-engine route. The SCALAR_MAX/SCALAR_MIN UDF is
142+
* converted to a DataFusion {@code GREATEST}/{@code LEAST} during Substrait conversion, and that
143+
* operator can't unify heterogeneous operand types. The v2/Calcite path applies OpenSearch's
144+
* mixed-type comparison (strings sort above numbers) and returns a result.
145145
*/
146-
EVAL_MAX_MIN_RETURN_TYPE(
147-
"eval max()/min() (lowered to GREATEST/LEAST) has no return-type inference on the"
148-
+ " analytics-engine route: the result column is typed undefined for homogeneous operands"
149-
+ " and mixed numeric+string operands throw 'Cannot infer return type for GREATEST'.");
146+
EVAL_MAX_MIN_MIXED_TYPES(
147+
"eval max()/min() over mixed numeric+string operands throws 'Cannot infer return type for"
148+
+ " GREATEST' on the analytics-engine route: the DataFusion GREATEST/LEAST can't unify"
149+
+ " heterogeneous operand types."),
150+
151+
/**
152+
* eval {@code max(...)} / {@code min(...)} over all-integer operands reports the result column as
153+
* {@code bigint} on the analytics-engine route — DataFusion widens integer results to Int64 —
154+
* whereas the v2/Calcite path reports {@code int} when the selected value fits in an int. The
155+
* values match; only the schema's integer width differs.
156+
*/
157+
EVAL_MAX_MIN_INT_WIDENING(
158+
"eval max()/min() over integer operands reports the result column as bigint on the"
159+
+ " analytics-engine route (DataFusion widens integers to Int64), whereas the v2/Calcite"
160+
+ " path reports int.");
150161

151162
private final String reason;
152163

0 commit comments

Comments
 (0)