Skip to content

Commit 9ed9b1a

Browse files
authored
Recover ANY-typed column schema on the analytics route (fixes eval max/min) (#5557)
* Recover concrete schema type for ANY-typed columns on the analytics route The analytics-engine response schema was built purely from the planned RelDataType, so a column whose planned type is ANY (e.g. the SCALAR_MAX / SCALAR_MIN UDFs behind PPL eval max()/min(), which declare ANY because they accept mixed numeric/string operands) was reported as 'undefined'. The v2/Calcite path already rescues this in OpenSearchExecutionEngine.buildResultSet by reading the actual runtime ExprValue type. Mirror that on the analytics path: when a column's converted type is UNDEFINED and there is a result row, take the concrete type from the first row's value. The rows are already converted before the schema is built, so no extra work is needed. Behavior is unchanged on the v2 path. This fixes eval max()/min() over homogeneous operands on the analytics route (e.g. max('apple','sam',dog_name) now reports 'string' instead of 'undefined'). Signed-off-by: Kai Huang <ahkcs@amazon.com> * Skip the residual eval max/min divergences on the analytics route With the ANY-rescue fix, five of CalcitePPLEvalMaxMinFunctionIT's eight tests now pass on the analytics route. Skip the three that still diverge via the assumeNotAnalytics(...) registry plus matching excludeTestsMatching entries: - EVAL_MAX_MIN_MIXED_TYPES: mixed numeric+string operands throw 'Cannot infer return type for GREATEST' (the DataFusion GREATEST/LEAST can't unify heterogeneous operand types) — testEvalMax/MinNumericAndString. - EVAL_MAX_MIN_INT_WIDENING: max() over int operands reports bigint on the route (DataFusion widens integers to Int64) where v2/Calcite reports int — testEvalMaxNumeric. Results (-Dtests.analytics.parquet_indices=true against the analytics route): CalcitePPLEvalMaxMinFunctionIT: 0/8 -> 5/8 pass, 3 excluded, 0 fail v2/Calcite route unchanged: 8/8 pass. Signed-off-by: Kai Huang <ahkcs@amazon.com> --------- Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 4c1165a commit 9ed9b1a

5 files changed

Lines changed: 92 additions & 4 deletions

File tree

core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.opensearch.sql.data.model.ExprTupleValue;
3333
import org.opensearch.sql.data.model.ExprValue;
3434
import org.opensearch.sql.data.model.ExprValueUtils;
35+
import org.opensearch.sql.data.type.ExprCoreType;
3536
import org.opensearch.sql.data.type.ExprType;
3637
import org.opensearch.sql.executor.ExecutionContext;
3738
import org.opensearch.sql.executor.ExecutionEngine;
@@ -123,7 +124,7 @@ public void onResponse(Iterable<Object[]> rows) {
123124
try {
124125
List<RelDataTypeField> fields = plan.getRowType().getFieldList();
125126
List<ExprValue> results = convertRows(rows, fields);
126-
Schema schema = buildSchema(fields);
127+
Schema schema = buildSchema(fields, results);
127128
profileCtx
128129
.getOrCreateMetric(MetricName.EXECUTE)
129130
.set(System.nanoTime() - execStart);
@@ -193,9 +194,9 @@ public void onFailure(Exception e) {
193194

194195
private QueryResponse buildProfiledResponse(RelNode plan, ProfiledResult result) {
195196
List<RelDataTypeField> fields = plan.getRowType().getFieldList();
196-
Schema schema = buildSchema(fields);
197197
List<ExprValue> results =
198198
result.rows() != null ? convertRows(result.rows(), fields) : List.of();
199+
Schema schema = buildSchema(fields, results);
199200
QueryResponse response = new QueryResponse(schema, results, Cursor.None);
200201
response.setProfile(result.profile());
201202
if (!result.isSuccess()) {
@@ -265,10 +266,20 @@ private static List<Object> stripEpochDatePrefixInList(List<?> list) {
265266
return out;
266267
}
267268

268-
private Schema buildSchema(List<RelDataTypeField> fields) {
269+
private Schema buildSchema(List<RelDataTypeField> fields, List<ExprValue> results) {
269270
List<Schema.Column> columns = new ArrayList<>();
270271
for (RelDataTypeField field : fields) {
271272
ExprType exprType = convertType(field.getType());
273+
// A column whose planned type is ANY (e.g. the SCALAR_MAX/SCALAR_MIN UDFs declare ANY since
274+
// they accept mixed numeric/string operands) maps to UNDEFINED. Recover the concrete type
275+
// from the first row's runtime value, mirroring OpenSearchExecutionEngine.buildResultSet on
276+
// the Calcite path so both routes report the same schema type.
277+
if (exprType == ExprCoreType.UNDEFINED && !results.isEmpty()) {
278+
ExprValue cell = results.getFirst().tupleValue().get(field.getName());
279+
if (cell != null && !cell.isNull() && !cell.isMissing()) {
280+
exprType = cell.type();
281+
}
282+
}
272283
columns.add(new Schema.Column(field.getName(), null, exprType));
273284
}
274285
return new Schema(columns);

core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ void executeRelNode_numericTypes() {
167167
6.0, response.getResults().get(0).tupleValue().get("d").value(), "double value. " + dump);
168168
}
169169

170+
/**
171+
* A column whose planned type is ANY (e.g. the SCALAR_MAX/SCALAR_MIN UDFs declare ANY) maps to
172+
* UNDEFINED; recover the concrete type from the first row's runtime value so the schema matches
173+
* the Calcite path instead of reporting "undefined".
174+
*/
175+
@Test
176+
void executeRelNode_anyColumnRecoversTypeFromFirstRow() {
177+
RelNode relNode = mockRelNode("age", SqlTypeName.BIGINT, "new", SqlTypeName.ANY);
178+
Iterable<Object[]> rows = Arrays.asList(new Object[] {2L, 3}, new Object[] {4L, 4});
179+
stubExecutorWith(relNode, rows);
180+
181+
QueryResponse response = executeAndCapture(relNode);
182+
String dump = dumpResponse(response);
183+
184+
// 'new' is ANY in the plan but the first row's value is an Integer → schema reports INTEGER.
185+
assertEquals(ExprCoreType.LONG, response.getSchema().getColumns().get(0).getExprType(), dump);
186+
assertEquals(
187+
ExprCoreType.INTEGER, response.getSchema().getColumns().get(1).getExprType(), dump);
188+
}
189+
190+
/** ANY column with no rows to derive from stays UNDEFINED (mirrors the Calcite path fallback). */
191+
@Test
192+
void executeRelNode_anyColumnEmptyResultsStaysUndefined() {
193+
RelNode relNode = mockRelNode("new", SqlTypeName.ANY);
194+
stubExecutorWith(relNode, Collections.emptyList());
195+
196+
QueryResponse response = executeAndCapture(relNode);
197+
String dump = dumpResponse(response);
198+
199+
assertEquals(
200+
ExprCoreType.UNDEFINED, response.getSchema().getColumns().get(0).getExprType(), dump);
201+
}
202+
170203
@Test
171204
void executeRelNode_temporalTypes() {
172205
RelNode relNode =

integ-test/build.gradle

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1164,6 +1164,18 @@ task integTestRemote(type: RestIntegTestTask) {
11641164
// - earliest('now', utc_timestamp()): 'now' and utc_timestamp() resolve to the same
11651165
// instant on the route (true) but differ on v2 (false).
11661166
excludeTestsMatching '*CalcitePPLConditionBuiltinFunctionIT.testEarliestWithEval'
1167+
1168+
// === Excludes: CalcitePPLEvalMaxMinFunctionIT route divergences ===
1169+
// The AnalyticsExecutionEngine ANY-rescue (this PR) fixes the homogeneous max()/min()
1170+
// schema types, so most of the class now passes on the AE route. Three tests still
1171+
// diverge; each carries an in-test assumeNotAnalytics(...) (see AnalyticsRouteLimitation).
1172+
// - Mixed numeric+string operands throw 'Cannot infer return type for GREATEST' (the
1173+
// DataFusion GREATEST/LEAST can't unify heterogeneous operand types).
1174+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumericAndString'
1175+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinNumericAndString'
1176+
// - max() over int operands reports bigint on the AE route (DataFusion widens integers
1177+
// to Int64) where the v2/Calcite path reports int.
1178+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumeric'
11671179
}
11681180
}
11691181

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,8 @@
77

88
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG;
99
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NULL_MISSING;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_INT_WIDENING;
11+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_MIXED_TYPES;
1012
import static org.opensearch.sql.util.MatcherUtils.rows;
1113
import static org.opensearch.sql.util.MatcherUtils.schema;
1214
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -28,6 +30,8 @@ public void init() throws Exception {
2830

2931
@Test
3032
public void testEvalMaxNumeric() throws Exception {
33+
// max(1, 3, age) selects an int-valued result; the AE route reports it as bigint.
34+
assumeNotAnalytics(EVAL_MAX_MIN_INT_WIDENING);
3135
JSONObject result =
3236
executeQuery(
3337
String.format(
@@ -49,6 +53,8 @@ public void testEvalMaxString() throws Exception {
4953

5054
@Test
5155
public void testEvalMaxNumericAndString() throws Exception {
56+
// Mixed numeric+string operands -> 'Cannot infer return type for GREATEST' on the AE route.
57+
assumeNotAnalytics(EVAL_MAX_MIN_MIXED_TYPES);
5258
JSONObject result =
5359
executeQuery(
5460
String.format(
@@ -83,6 +89,8 @@ public void testEvalMinString() throws Exception {
8389

8490
@Test
8591
public void testEvalMinNumericAndString() throws Exception {
92+
// Mixed numeric+string operands -> 'Cannot infer return type for GREATEST' on the AE route.
93+
assumeNotAnalytics(EVAL_MAX_MIN_MIXED_TYPES);
8694
JSONObject result =
8795
executeQuery(
8896
String.format(

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 25 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,31 @@ public enum AnalyticsRouteLimitation {
168168
EARLIEST_LATEST_NOW_CLOCK(
169169
"earliest/latest with relative-time 'now' against utc_timestamp() diverges on the"
170170
+ " analytics-engine route: 'now' and utc_timestamp() resolve to the same instant"
171-
+ " (earliest('now', now) is true), but differ on the v2/Calcite path (false).");
171+
+ " (earliest('now', now) is true), but differ on the v2/Calcite path (false)."),
172+
173+
/**
174+
* eval {@code max(...)} / {@code min(...)} over operands that mix numeric and string types (e.g.
175+
* {@code max(14, age, 'Fred', holdersName)}) throws {@code SqlValidatorException: Cannot infer
176+
* return type for GREATEST} on the analytics-engine route. The SCALAR_MAX/SCALAR_MIN UDF is
177+
* converted to a DataFusion {@code GREATEST}/{@code LEAST} during Substrait conversion, and that
178+
* operator can't unify heterogeneous operand types. The v2/Calcite path applies OpenSearch's
179+
* mixed-type comparison (strings sort above numbers) and returns a result.
180+
*/
181+
EVAL_MAX_MIN_MIXED_TYPES(
182+
"eval max()/min() over mixed numeric+string operands throws 'Cannot infer return type for"
183+
+ " GREATEST' on the analytics-engine route: the DataFusion GREATEST/LEAST can't unify"
184+
+ " heterogeneous operand types."),
185+
186+
/**
187+
* eval {@code max(...)} / {@code min(...)} over all-integer operands reports the result column as
188+
* {@code bigint} on the analytics-engine route — DataFusion widens integer results to Int64 —
189+
* whereas the v2/Calcite path reports {@code int} when the selected value fits in an int. The
190+
* values match; only the schema's integer width differs.
191+
*/
192+
EVAL_MAX_MIN_INT_WIDENING(
193+
"eval max()/min() over integer operands reports the result column as bigint on the"
194+
+ " analytics-engine route (DataFusion widens integers to Int64), whereas the v2/Calcite"
195+
+ " path reports int.");
172196

173197
private final String reason;
174198

0 commit comments

Comments
 (0)