Skip to content

Commit df3f6cf

Browse files
committed
Recover concrete schema type for ANY-typed columns on the analytics route
The analytics-engine response schema was built purely from the planned RelDataType, so a column whose planned type is ANY (e.g. the SCALAR_MAX / SCALAR_MIN UDFs behind PPL eval max()/min(), which declare ANY because they accept mixed numeric/string operands) was reported as 'undefined'. The v2/Calcite path already rescues this in OpenSearchExecutionEngine.buildResultSet by reading the actual runtime ExprValue type. Mirror that on the analytics path: when a column's converted type is UNDEFINED and there is a result row, take the concrete type from the first row's value. The rows are already converted before the schema is built, so no extra work is needed. Behavior is unchanged on the v2 path. This fixes eval max()/min() over homogeneous operands on the analytics route (e.g. max('apple','sam',dog_name) now reports 'string' instead of 'undefined'). Signed-off-by: Kai Huang <ahkcs@amazon.com>
1 parent 3829dfa commit df3f6cf

5 files changed

Lines changed: 85 additions & 4 deletions

File tree

core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@
3232
import org.opensearch.sql.data.model.ExprTupleValue;
3333
import org.opensearch.sql.data.model.ExprValue;
3434
import org.opensearch.sql.data.model.ExprValueUtils;
35+
import org.opensearch.sql.data.type.ExprCoreType;
3536
import org.opensearch.sql.data.type.ExprType;
3637
import org.opensearch.sql.executor.ExecutionContext;
3738
import org.opensearch.sql.executor.ExecutionEngine;
@@ -123,7 +124,7 @@ public void onResponse(Iterable<Object[]> rows) {
123124
try {
124125
List<RelDataTypeField> fields = plan.getRowType().getFieldList();
125126
List<ExprValue> results = convertRows(rows, fields);
126-
Schema schema = buildSchema(fields);
127+
Schema schema = buildSchema(fields, results);
127128
profileCtx
128129
.getOrCreateMetric(MetricName.EXECUTE)
129130
.set(System.nanoTime() - execStart);
@@ -193,9 +194,9 @@ public void onFailure(Exception e) {
193194

194195
private QueryResponse buildProfiledResponse(RelNode plan, ProfiledResult result) {
195196
List<RelDataTypeField> fields = plan.getRowType().getFieldList();
196-
Schema schema = buildSchema(fields);
197197
List<ExprValue> results =
198198
result.rows() != null ? convertRows(result.rows(), fields) : List.of();
199+
Schema schema = buildSchema(fields, results);
199200
QueryResponse response = new QueryResponse(schema, results, Cursor.None);
200201
response.setProfile(result.profile());
201202
if (!result.isSuccess()) {
@@ -265,10 +266,20 @@ private static List<Object> stripEpochDatePrefixInList(List<?> list) {
265266
return out;
266267
}
267268

268-
private Schema buildSchema(List<RelDataTypeField> fields) {
269+
private Schema buildSchema(List<RelDataTypeField> fields, List<ExprValue> results) {
269270
List<Schema.Column> columns = new ArrayList<>();
270271
for (RelDataTypeField field : fields) {
271272
ExprType exprType = convertType(field.getType());
273+
// A column whose planned type is ANY (e.g. the SCALAR_MAX/SCALAR_MIN UDFs declare ANY since
274+
// they accept mixed numeric/string operands) maps to UNDEFINED. Recover the concrete type
275+
// from the first row's runtime value, mirroring OpenSearchExecutionEngine.buildResultSet on
276+
// the Calcite path so both routes report the same schema type.
277+
if (exprType == ExprCoreType.UNDEFINED && !results.isEmpty()) {
278+
ExprValue cell = results.getFirst().tupleValue().get(field.getName());
279+
if (cell != null && !cell.isNull() && !cell.isMissing()) {
280+
exprType = cell.type();
281+
}
282+
}
272283
columns.add(new Schema.Column(field.getName(), null, exprType));
273284
}
274285
return new Schema(columns);

core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,39 @@ void executeRelNode_numericTypes() {
167167
6.0, response.getResults().get(0).tupleValue().get("d").value(), "double value. " + dump);
168168
}
169169

170+
/**
171+
* A column whose planned type is ANY (e.g. the SCALAR_MAX/SCALAR_MIN UDFs declare ANY) maps to
172+
* UNDEFINED; recover the concrete type from the first row's runtime value so the schema matches
173+
* the Calcite path instead of reporting "undefined".
174+
*/
175+
@Test
176+
void executeRelNode_anyColumnRecoversTypeFromFirstRow() {
177+
RelNode relNode = mockRelNode("age", SqlTypeName.BIGINT, "new", SqlTypeName.ANY);
178+
Iterable<Object[]> rows = Arrays.asList(new Object[] {2L, 3}, new Object[] {4L, 4});
179+
stubExecutorWith(relNode, rows);
180+
181+
QueryResponse response = executeAndCapture(relNode);
182+
String dump = dumpResponse(response);
183+
184+
// 'new' is ANY in the plan but the first row's value is an Integer → schema reports INTEGER.
185+
assertEquals(ExprCoreType.LONG, response.getSchema().getColumns().get(0).getExprType(), dump);
186+
assertEquals(
187+
ExprCoreType.INTEGER, response.getSchema().getColumns().get(1).getExprType(), dump);
188+
}
189+
190+
/** ANY column with no rows to derive from stays UNDEFINED (mirrors the Calcite path fallback). */
191+
@Test
192+
void executeRelNode_anyColumnEmptyResultsStaysUndefined() {
193+
RelNode relNode = mockRelNode("new", SqlTypeName.ANY);
194+
stubExecutorWith(relNode, Collections.emptyList());
195+
196+
QueryResponse response = executeAndCapture(relNode);
197+
String dump = dumpResponse(response);
198+
199+
assertEquals(
200+
ExprCoreType.UNDEFINED, response.getSchema().getColumns().get(0).getExprType(), dump);
201+
}
202+
170203
@Test
171204
void executeRelNode_temporalTypes() {
172205
RelNode relNode =

integ-test/build.gradle

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,21 @@ task integTestRemote(type: RestIntegTestTask) {
11471147
// - subsearch.maxout is lowered as a LIMIT on the in-subquery semi-join's right side,
11481148
// which the AE route does not honor, so the subsearch returns all rows.
11491149
excludeTestsMatching '*CalcitePPLInSubqueryIT.testSubsearchMaxOut'
1150+
1151+
// === Excludes: CalcitePPLEvalMaxMinFunctionIT route divergences ===
1152+
// Every test exercises eval max()/min() (lowered to GREATEST/LEAST), which has no
1153+
// return-type inference on the AE route: homogeneous operands type the result column
1154+
// 'undefined' (values are correct) and mixed numeric+string operands throw
1155+
// 'Cannot infer return type for GREATEST'. Each test also carries an in-test
1156+
// assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE) (see AnalyticsRouteLimitation).
1157+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumeric'
1158+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxString'
1159+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxNumericAndString'
1160+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinNumeric'
1161+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinString'
1162+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinNumericAndString'
1163+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMaxIgnoresNulls'
1164+
excludeTestsMatching '*CalcitePPLEvalMaxMinFunctionIT.testEvalMinIgnoresNulls'
11501165
}
11511166
}
11521167

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLEvalMaxMinFunctionIT.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG;
99
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NULL_MISSING;
10+
import static org.opensearch.sql.util.AnalyticsRouteLimitation.EVAL_MAX_MIN_RETURN_TYPE;
1011
import static org.opensearch.sql.util.MatcherUtils.rows;
1112
import static org.opensearch.sql.util.MatcherUtils.schema;
1213
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
@@ -28,6 +29,7 @@ public void init() throws Exception {
2829

2930
@Test
3031
public void testEvalMaxNumeric() throws Exception {
32+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
3133
JSONObject result =
3234
executeQuery(
3335
String.format(
@@ -38,6 +40,7 @@ public void testEvalMaxNumeric() throws Exception {
3840

3941
@Test
4042
public void testEvalMaxString() throws Exception {
43+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
4144
JSONObject result =
4245
executeQuery(
4346
String.format(
@@ -49,6 +52,7 @@ public void testEvalMaxString() throws Exception {
4952

5053
@Test
5154
public void testEvalMaxNumericAndString() throws Exception {
55+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
5256
JSONObject result =
5357
executeQuery(
5458
String.format(
@@ -62,6 +66,7 @@ public void testEvalMaxNumericAndString() throws Exception {
6266

6367
@Test
6468
public void testEvalMinNumeric() throws Exception {
69+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
6570
JSONObject result =
6671
executeQuery(
6772
String.format(
@@ -72,6 +77,7 @@ public void testEvalMinNumeric() throws Exception {
7277

7378
@Test
7479
public void testEvalMinString() throws Exception {
80+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
7581
JSONObject result =
7682
executeQuery(
7783
String.format(
@@ -83,6 +89,7 @@ public void testEvalMinString() throws Exception {
8389

8490
@Test
8591
public void testEvalMinNumericAndString() throws Exception {
92+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
8693
JSONObject result =
8794
executeQuery(
8895
String.format(
@@ -96,6 +103,7 @@ public void testEvalMinNumericAndString() throws Exception {
96103

97104
@Test
98105
public void testEvalMaxIgnoresNulls() throws Exception {
106+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
99107
JSONObject result =
100108
executeQuery(
101109
String.format(
@@ -117,6 +125,7 @@ public void testEvalMaxIgnoresNulls() throws Exception {
117125

118126
@Test
119127
public void testEvalMinIgnoresNulls() throws Exception {
128+
assumeNotAnalytics(EVAL_MAX_MIN_RETURN_TYPE);
120129
JSONObject result =
121130
executeQuery(
122131
String.format(

integ-test/src/test/java/org/opensearch/sql/util/AnalyticsRouteLimitation.java

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -133,7 +133,20 @@ public enum AnalyticsRouteLimitation {
133133
SUBSEARCH_MAXOUT_IN_SUBQUERY(
134134
"subsearch.maxout is not honored on the analytics-engine route: the LIMIT lowered onto the"
135135
+ " in-subquery semi-join's right side is dropped, so the subsearch returns all rows"
136-
+ " regardless of the cap.");
136+
+ " regardless of the cap."),
137+
138+
/**
139+
* The PPL {@code max(...)} / {@code min(...)} eval functions (lowered to {@code GREATEST} /
140+
* {@code LEAST}) diverge on the analytics-engine route. With homogeneous operands the result
141+
* column comes back typed {@code undefined} instead of the concrete type (the values are
142+
* correct), so schema assertions fail. With mixed numeric+string operands the route throws {@code
143+
* SqlValidatorException: Cannot infer return type for GREATEST}. Both stem from the route not
144+
* inferring a return type for GREATEST/LEAST; the v2/Calcite path types them correctly.
145+
*/
146+
EVAL_MAX_MIN_RETURN_TYPE(
147+
"eval max()/min() (lowered to GREATEST/LEAST) has no return-type inference on the"
148+
+ " analytics-engine route: the result column is typed undefined for homogeneous operands"
149+
+ " and mixed numeric+string operands throw 'Cannot infer return type for GREATEST'.");
137150

138151
private final String reason;
139152

0 commit comments

Comments
 (0)