Skip to content

Commit cf14aba

Browse files
authored
Coerce temporal operands in IN and BETWEEN when leastRestrictive finds no common type (#5513)
* Coerce temporal operands in IN and BETWEEN when leastRestrictive finds no common type visitIn and visitBetween call leastRestrictive directly instead of going through the CoercionUtils path that comparison operators use. leastRestrictive returns no common type when temporal operands are represented differently — e.g. a standard Calcite TIMESTAMP field against EXPR_DATE UDT bounds/values (`ts between date('...') and date('...')`, `ts in (DATE '...', ...)`) — so both predicates rejected such queries with "expression types are incompatible" even though comparisons coerce the same mix. Both now fall back to CoercionUtils.widenArguments (the comparison-operator path) when leastRestrictive yields null, scoped to all-temporal operands so genuinely incompatible mixes (e.g. `age between '35' and 38.5`) still raise SemanticCheckException. Enables the previously-undiscovered testDateBetween (was missing its @test annotation) and adds testDateIn plus a CoercionUtils unit test covering the plain-TIMESTAMP + EXPR_DATE mix. Signed-off-by: Lantao Jin <ltjin@amazon.com> * Compare temporal IN/NOT IN values in the field's timestamp domain instead of string-collapsing them Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 2c4215f commit cf14aba

4 files changed

Lines changed: 193 additions & 17 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 66 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
import java.util.List;
1919
import java.util.Locale;
2020
import java.util.Map;
21+
import java.util.Set;
2122
import java.util.stream.Collectors;
2223
import java.util.stream.IntStream;
2324
import javax.annotation.Nullable;
@@ -84,11 +85,13 @@
8485
import org.opensearch.sql.calcite.utils.PlanUtils;
8586
import org.opensearch.sql.calcite.utils.SubsearchUtils;
8687
import org.opensearch.sql.common.utils.StringUtils;
88+
import org.opensearch.sql.data.type.ExprCoreType;
8789
import org.opensearch.sql.data.type.ExprType;
8890
import org.opensearch.sql.exception.CalciteUnsupportedException;
8991
import org.opensearch.sql.exception.ExpressionEvaluationException;
9092
import org.opensearch.sql.exception.SemanticCheckException;
9193
import org.opensearch.sql.expression.function.BuiltinFunctionName;
94+
import org.opensearch.sql.expression.function.CoercionUtils;
9295
import org.opensearch.sql.expression.function.PPLFuncImpTable;
9396

9497
@RequiredArgsConstructor
@@ -225,24 +228,41 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
225228
final RexNode field = analyze(node.getField(), context);
226229
final List<RexNode> valueList =
227230
node.getValueList().stream().map(value -> analyze(value, context)).toList();
231+
// When the field is a temporal type, do NOT use leastRestrictive + rexBuilder.makeIn. For a
232+
// temporal field tested against string/date literals, leastRestrictive collapses the common
233+
// type to VARCHAR (the EXPR_DATE / EXPR_TIMESTAMP UDTs are VARCHAR-backed), so makeIn casts the
234+
// field DOWN to VARCHAR and string-compares mismatched renderings (e.g. '2018-06-23 00:00:00'
235+
// against '2018-06-23') — silently matching nothing. Rewrite the membership test as an OR of
236+
// PPL `=` comparisons, the same temporal-aware comparison path visitCompare takes for `=`, so
237+
// each value is coerced to the field's timestamp domain before comparison.
238+
ExprType fieldExprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(field.getType());
239+
if (TEMPORAL_TYPES.contains(fieldExprType)) {
240+
List<RexNode> equalities =
241+
valueList.stream()
242+
.map(value -> PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, "=", field, value))
243+
.toList();
244+
return context.relBuilder.or(equalities);
245+
}
228246
final List<RelDataType> dataTypes =
229-
new java.util.ArrayList<>(valueList.stream().map(RexNode::getType).toList());
247+
new ArrayList<>(valueList.stream().map(RexNode::getType).toList());
230248
dataTypes.add(field.getType());
231249
RelDataType commonType = context.rexBuilder.getTypeFactory().leastRestrictive(dataTypes);
232250
if (commonType != null) {
233251
List<RexNode> newValueList =
234252
valueList.stream().map(value -> context.rexBuilder.makeCast(commonType, value)).toList();
235253
return context.rexBuilder.makeIn(field, newValueList);
236-
} else {
237-
List<ExprType> exprTypes =
238-
dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList();
239-
throw new SemanticCheckException(
240-
StringUtils.format(
241-
"In expression types are incompatible: fields type %s, values type %s",
242-
exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1)));
243254
}
255+
List<ExprType> exprTypes =
256+
dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList();
257+
throw new SemanticCheckException(
258+
StringUtils.format(
259+
"In expression types are incompatible: fields type %s, values type %s",
260+
exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1)));
244261
}
245262

263+
private static final Set<ExprType> TEMPORAL_TYPES =
264+
Set.of(ExprCoreType.DATE, ExprCoreType.TIME, ExprCoreType.TIMESTAMP);
265+
246266
@Override
247267
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
248268
RexNode left = analyze(node.getLeft(), context);
@@ -258,6 +278,25 @@ public RexNode visitCompare(Compare node, CalcitePlanContext context) {
258278
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
259279
}
260280

281+
/**
282+
* Widens a set of operands to a common temporal type when, and only when, every operand is a
283+
* temporal type (DATE / TIME / TIMESTAMP), including the EXPR_DATE / EXPR_TIME / EXPR_TIMESTAMP
284+
* UDTs. Returns {@code null} otherwise so non-temporal incompatible mixes still fail the type
285+
* check. The widening reuses {@link CoercionUtils#widenArguments} — the same path comparison
286+
* operators take — which resolves DATE / TIME to TIMESTAMP via the shared widening graph.
287+
*/
288+
private static @Nullable List<RexNode> widenTemporalOperands(
289+
CalcitePlanContext context, List<RexNode> operands) {
290+
boolean allTemporal =
291+
operands.stream()
292+
.map(node -> OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType()))
293+
.allMatch(TEMPORAL_TYPES::contains);
294+
if (!allTemporal) {
295+
return null;
296+
}
297+
return CoercionUtils.widenArguments(context.rexBuilder, operands);
298+
}
299+
261300
@Override
262301
public RexNode visitBetween(Between node, CalcitePlanContext context) {
263302
RexNode value = analyze(node.getValue(), context);
@@ -268,12 +307,25 @@ public RexNode visitBetween(Between node, CalcitePlanContext context) {
268307
lowerBound = context.rexBuilder.makeCast(commonType, lowerBound);
269308
upperBound = context.rexBuilder.makeCast(commonType, upperBound);
270309
} else {
271-
throw new SemanticCheckException(
272-
StringUtils.format(
273-
"BETWEEN expression types are incompatible: [%s, %s, %s]",
274-
OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()),
275-
OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()),
276-
OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType())));
310+
// leastRestrictive() has no common type for mixed temporal representations — e.g. a standard
311+
// Calcite TIMESTAMP field compared against EXPR_DATE UDT bounds (`ts between date('...') and
312+
// date('...')`). Comparison operators coerce these through CoercionUtils; BETWEEN calls
313+
// leastRestrictive directly and would otherwise reject them. Fall back to the same temporal
314+
// widening, scoped to all-temporal operands so genuinely incompatible mixes (e.g.
315+
// `age between '35' and 38.5`) still raise SemanticCheckException.
316+
List<RexNode> widened =
317+
widenTemporalOperands(context, List.of(value, lowerBound, upperBound));
318+
if (widened == null) {
319+
throw new SemanticCheckException(
320+
StringUtils.format(
321+
"BETWEEN expression types are incompatible: [%s, %s, %s]",
322+
OpenSearchTypeFactory.convertRelDataTypeToExprType(value.getType()),
323+
OpenSearchTypeFactory.convertRelDataTypeToExprType(lowerBound.getType()),
324+
OpenSearchTypeFactory.convertRelDataTypeToExprType(upperBound.getType())));
325+
}
326+
value = widened.get(0);
327+
lowerBound = widened.get(1);
328+
upperBound = widened.get(2);
277329
}
278330
return context.relBuilder.between(value, lowerBound, upperBound);
279331
}

core/src/test/java/org/opensearch/sql/expression/function/CoercionUtilsTest.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
import org.apache.calcite.rel.type.RelDataType;
1818
import org.apache.calcite.rex.RexBuilder;
1919
import org.apache.calcite.rex.RexNode;
20+
import org.apache.calcite.sql.type.SqlTypeName;
2021
import org.junit.jupiter.api.Test;
2122
import org.junit.jupiter.params.ParameterizedTest;
2223
import org.junit.jupiter.params.provider.Arguments;
@@ -51,6 +52,40 @@ public void findCommonWidestType(
5152
expectedCommonType, CoercionUtils.resolveCommonType(left, right).orElseGet(() -> null));
5253
}
5354

55+
@Test
56+
void widenArgumentsUnifiesPlainTimestampWithDateUdtBounds() {
57+
// Reproduces the BETWEEN/IN operand set seen on a non-UDT path (e.g. the analytics engine's
58+
// parquet scan): a standard Calcite TIMESTAMP field against EXPR_DATE UDT bounds.
59+
// leastRestrictive
60+
// returns no common type for this mix, but the temporal widening used by comparison operators
61+
// must resolve all three to a single timestamp type so the predicate can run.
62+
RexNode plainTimestampField =
63+
REX_BUILDER.makeInputRef(
64+
OpenSearchTypeFactory.TYPE_FACTORY.createTypeWithNullability(
65+
OpenSearchTypeFactory.TYPE_FACTORY.createSqlType(SqlTypeName.TIMESTAMP), true),
66+
0);
67+
RexNode dateLower =
68+
REX_BUILDER.makeNullLiteral(
69+
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(
70+
OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true));
71+
RexNode dateUpper =
72+
REX_BUILDER.makeNullLiteral(
73+
OpenSearchTypeFactory.TYPE_FACTORY.createUDT(
74+
OpenSearchTypeFactory.ExprUDT.EXPR_DATE, true));
75+
76+
List<RexNode> widened =
77+
CoercionUtils.widenArguments(
78+
REX_BUILDER, List.of(plainTimestampField, dateLower, dateUpper));
79+
80+
assertNotNull(widened);
81+
assertEquals(3, widened.size());
82+
for (RexNode node : widened) {
83+
assertEquals(
84+
ExprCoreType.TIMESTAMP,
85+
OpenSearchTypeFactory.convertRelDataTypeToExprType(node.getType()));
86+
}
87+
}
88+
5489
@Test
5590
void castArgumentsReturnsExactMatchWhenAvailable() {
5691
PPLTypeChecker typeChecker = new StubTypeChecker(List.of(List.of(INTEGER), List.of(DOUBLE)));

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,10 @@ public void testNotBetween3() throws IOException {
500500
verifyDataRows(actual, rows("Hattie", 36), rows("Elinor", 36));
501501
}
502502

503+
@Test
503504
public void testDateBetween() throws IOException {
505+
// birthdate is a TIMESTAMP-typed field; the bounds are DATE literals. BETWEEN must coerce the
506+
// mixed temporal operands rather than reject them with "expression types are incompatible".
504507
JSONObject actual =
505508
executeQuery(
506509
String.format(
@@ -512,6 +515,55 @@ public void testDateBetween() throws IOException {
512515
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
513516
}
514517

518+
@Test
519+
public void testDateIn() throws IOException {
520+
// birthdate is a TIMESTAMP-typed field; the IN values are DATE literals. visitIn must compare
521+
// each value in the field's temporal domain (via PPL `=`) rather than letting leastRestrictive
522+
// collapse the common type to VARCHAR and string-compare mismatched renderings — which silently
523+
// matched nothing without pushdown. See visitIn in CalciteRexNodeVisitor.
524+
JSONObject actual =
525+
executeQuery(
526+
String.format(
527+
"source=%s | where birthdate in (DATE '2018-06-23', DATE '2018-06-27') |"
528+
+ " fields firstname, birthdate",
529+
TEST_INDEX_BANK));
530+
verifySchema(actual, schema("firstname", "string"), schema("birthdate", "timestamp"));
531+
verifyDataRows(
532+
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
533+
}
534+
535+
@Test
536+
public void testDateNotIn() throws IOException {
537+
// Complement of testDateIn: NOT IN over a temporal field. Exercises the complemented-points
538+
// pushdown branch, which must also format timestamp values rather than emit a flat terms query.
539+
JSONObject actual =
540+
executeQuery(
541+
String.format(
542+
"source=%s | where birthdate not in (DATE '2018-06-23', DATE '2018-06-27') |"
543+
+ " fields firstname | sort firstname",
544+
TEST_INDEX_BANK));
545+
verifySchema(actual, schema("firstname", "string"));
546+
verifyDataRows(
547+
actual,
548+
rows("Amber JOHnny"),
549+
rows("Dale"),
550+
rows("Dillard"),
551+
rows("Hattie"),
552+
rows("Virginia"));
553+
}
554+
555+
@Test
556+
public void testIn() throws IOException {
557+
// Non-temporal IN keeps the leastRestrictive + makeIn path; guards against the temporal-field
558+
// special-case in visitIn regressing membership tests over ordinary columns.
559+
JSONObject actual =
560+
executeQuery(
561+
String.format(
562+
"source=%s | where age in (32, 36) | fields firstname, age", TEST_INDEX_BANK));
563+
verifySchema(actual, schema("firstname", "string"), schema("age", "int"));
564+
verifyDataRows(actual, rows("Amber JOHnny", 32), rows("Hattie", 36), rows("Elinor", 36));
565+
}
566+
515567
@Test
516568
public void testXor() throws IOException {
517569
JSONObject result =

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -784,16 +784,29 @@ private QueryExpression like(RexCall call) {
784784

785785
private static QueryExpression constructQueryExpressionForSearch(
786786
RexCall call, SwapResult pair) {
787+
boolean isTimeStamp =
788+
(pair.getKey() instanceof NamedFieldExpression namedField)
789+
&& namedField.isTimeStampType();
787790
if (isSearchWithComplementedPoints(call)) {
791+
// A NOT IN over a timestamp field cannot use a flat terms query: timestamp values must be
792+
// normalized + formatted (see in()/termsQuery, which does neither). Decompose into the
793+
// complement of an OR of formatted per-point range queries, mirroring the range branch.
794+
if (isTimeStamp) {
795+
return pointsAsTimestampOr(call, pair).not();
796+
}
788797
return QueryExpression.create(pair.getKey()).notIn(pair.getValue());
789798
} else if (isSearchWithPoints(call)) {
799+
// IN over a timestamp field: the flat terms query in in() emits unformatted values like
800+
// '2018-06-23 00:00:00' that the date field cannot parse. Decompose into an OR of formatted
801+
// per-point range queries (equals(point, true) builds gte/lte + format="date_time"), the
802+
// same path the range branch below and visitCompare's `=` already take.
803+
if (isTimeStamp) {
804+
return pointsAsTimestampOr(call, pair);
805+
}
790806
return QueryExpression.create(pair.getKey()).in(pair.getValue());
791807
} else {
792808
Sarg<?> sarg = pair.getValue().literal.getValueAs(Sarg.class);
793809
Set<? extends Range<?>> rangeSet = requireNonNull(sarg).rangeSet.asRanges();
794-
boolean isTimeStamp =
795-
(pair.getKey() instanceof NamedFieldExpression namedField)
796-
&& namedField.isTimeStampType();
797810
List<QueryExpression> queryExpressions =
798811
rangeSet.stream()
799812
.map(
@@ -811,6 +824,30 @@ private static QueryExpression constructQueryExpressionForSearch(
811824
}
812825
}
813826

827+
/**
828+
* Builds an OR of per-point {@code equals(point, true)} query expressions for a points-only
829+
* Sarg over a timestamp field. Each point becomes a {@code gte/lte} range query with {@code
830+
* format="date_time"}, so the timestamp value is normalized to the date field's accepted format
831+
* — unlike a flat terms query, which emits unformatted values the date field rejects.
832+
*/
833+
private static QueryExpression pointsAsTimestampOr(RexCall call, SwapResult pair) {
834+
RexLiteral literal = (RexLiteral) call.getOperands().get(1);
835+
Sarg<?> sarg = requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
836+
Set<? extends Range<?>> ranges =
837+
(sarg.isComplementedPoints() ? sarg.negate() : sarg).rangeSet.asRanges();
838+
List<QueryExpression> queryExpressions =
839+
ranges.stream()
840+
.map(
841+
range ->
842+
QueryExpression.create(pair.getKey())
843+
.equals(sargPointValue(range.lowerEndpoint()), true))
844+
.toList();
845+
if (queryExpressions.size() == 1) {
846+
return queryExpressions.getFirst();
847+
}
848+
return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0]));
849+
}
850+
814851
private QueryExpression andOr(RexCall call) {
815852
QueryExpression[] expressions = new QueryExpression[call.getOperands().size()];
816853
PredicateAnalyzerException firstError = null;

0 commit comments

Comments
 (0)