Skip to content

Commit d0faf70

Browse files
committed
Compare temporal IN/NOT IN values in the field's timestamp domain instead of string-collapsing them
Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent b0a75e2 commit d0faf70

3 files changed

Lines changed: 110 additions & 36 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 34 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -228,27 +228,30 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
228228
final RexNode field = analyze(node.getField(), context);
229229
final List<RexNode> valueList =
230230
node.getValueList().stream().map(value -> analyze(value, context)).toList();
231+
// When the field is a temporal type, do NOT use leastRestrictive + rexBuilder.makeIn. For a
232+
// temporal field tested against string/date literals, leastRestrictive collapses the common
233+
// type to VARCHAR (the EXPR_DATE / EXPR_TIMESTAMP UDTs are VARCHAR-backed), so makeIn casts the
234+
// field DOWN to VARCHAR and string-compares mismatched renderings (e.g. '2018-06-23 00:00:00'
235+
// against '2018-06-23') — silently matching nothing. Rewrite the membership test as an OR of
236+
// PPL `=` comparisons, the same temporal-aware comparison path visitCompare takes for `=`, so
237+
// each value is coerced to the field's timestamp domain before comparison.
238+
ExprType fieldExprType = OpenSearchTypeFactory.convertRelDataTypeToExprType(field.getType());
239+
if (TEMPORAL_TYPES.contains(fieldExprType)) {
240+
List<RexNode> equalities =
241+
valueList.stream()
242+
.map(value -> PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, "=", field, value))
243+
.toList();
244+
return context.relBuilder.or(equalities);
245+
}
231246
final List<RelDataType> dataTypes =
232-
new java.util.ArrayList<>(valueList.stream().map(RexNode::getType).toList());
247+
new ArrayList<>(valueList.stream().map(RexNode::getType).toList());
233248
dataTypes.add(field.getType());
234249
RelDataType commonType = context.rexBuilder.getTypeFactory().leastRestrictive(dataTypes);
235250
if (commonType != null) {
236251
List<RexNode> newValueList =
237252
valueList.stream().map(value -> context.rexBuilder.makeCast(commonType, value)).toList();
238253
return context.rexBuilder.makeIn(field, newValueList);
239254
}
240-
// leastRestrictive() has no common type for mixed temporal representations — e.g. a standard
241-
// Calcite TIMESTAMP field tested against EXPR_DATE UDT values (`ts in (date('...'), ...)`).
242-
// Mirror visitBetween: widen all-temporal operands through CoercionUtils so the comparison
243-
// runs, while leaving genuinely incompatible mixes to raise SemanticCheckException below.
244-
List<RexNode> operands = new ArrayList<>(valueList);
245-
operands.add(field);
246-
List<RexNode> widened = widenTemporalOperands(context, operands);
247-
if (widened != null) {
248-
RexNode widenedField = widened.get(widened.size() - 1);
249-
List<RexNode> widenedValues = widened.subList(0, widened.size() - 1);
250-
return context.rexBuilder.makeIn(widenedField, widenedValues);
251-
}
252255
List<ExprType> exprTypes =
253256
dataTypes.stream().map(OpenSearchTypeFactory::convertRelDataTypeToExprType).toList();
254257
throw new SemanticCheckException(
@@ -257,6 +260,24 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
257260
exprTypes.getLast(), exprTypes.subList(0, exprTypes.size() - 1)));
258261
}
259262

263+
private static final Set<ExprType> TEMPORAL_TYPES =
264+
Set.of(ExprCoreType.DATE, ExprCoreType.TIME, ExprCoreType.TIMESTAMP);
265+
266+
@Override
267+
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
268+
RexNode left = analyze(node.getLeft(), context);
269+
RexNode right = analyze(node.getRight(), context);
270+
String op = node.getOperator();
271+
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
272+
if ("!=".equals(op) || "<>".equals(op)) {
273+
RexNode result = tryMakeBooleanNotEquals(left, right, context);
274+
if (result != null) {
275+
return result;
276+
}
277+
}
278+
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
279+
}
280+
260281
/**
261282
* Widens a set of operands to a common temporal type when, and only when, every operand is a
262283
* temporal type (DATE / TIME / TIMESTAMP), including the EXPR_DATE / EXPR_TIME / EXPR_TIMESTAMP
@@ -276,24 +297,6 @@ public RexNode visitIn(In node, CalcitePlanContext context) {
276297
return CoercionUtils.widenArguments(context.rexBuilder, operands);
277298
}
278299

279-
private static final Set<ExprType> TEMPORAL_TYPES =
280-
Set.of(ExprCoreType.DATE, ExprCoreType.TIME, ExprCoreType.TIMESTAMP);
281-
282-
@Override
283-
public RexNode visitCompare(Compare node, CalcitePlanContext context) {
284-
RexNode left = analyze(node.getLeft(), context);
285-
RexNode right = analyze(node.getRight(), context);
286-
String op = node.getOperator();
287-
// Handle boolean_field != literal -> IS_NOT_TRUE/IS_NOT_FALSE
288-
if ("!=".equals(op) || "<>".equals(op)) {
289-
RexNode result = tryMakeBooleanNotEquals(left, right, context);
290-
if (result != null) {
291-
return result;
292-
}
293-
}
294-
return PPLFuncImpTable.INSTANCE.resolve(context.rexBuilder, op, left, right);
295-
}
296-
297300
@Override
298301
public RexNode visitBetween(Between node, CalcitePlanContext context) {
299302
RexNode value = analyze(node.getValue(), context);

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -517,8 +517,10 @@ public void testDateBetween() throws IOException {
517517

518518
@Test
519519
public void testDateIn() throws IOException {
520-
// birthdate is a TIMESTAMP-typed field; the IN values are DATE literals — same mixed-temporal
521-
// coercion path as testDateBetween, exercised through visitIn.
520+
// birthdate is a TIMESTAMP-typed field; the IN values are DATE literals. visitIn must compare
521+
// each value in the field's temporal domain (via PPL `=`) rather than letting leastRestrictive
522+
// collapse the common type to VARCHAR and string-compare mismatched renderings — which silently
523+
// matched nothing without pushdown. See visitIn in CalciteRexNodeVisitor.
522524
JSONObject actual =
523525
executeQuery(
524526
String.format(
@@ -530,6 +532,38 @@ public void testDateIn() throws IOException {
530532
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
531533
}
532534

535+
@Test
536+
public void testDateNotIn() throws IOException {
537+
// Complement of testDateIn: NOT IN over a temporal field. Exercises the complemented-points
538+
// pushdown branch, which must also format timestamp values rather than emit a flat terms query.
539+
JSONObject actual =
540+
executeQuery(
541+
String.format(
542+
"source=%s | where birthdate not in (DATE '2018-06-23', DATE '2018-06-27') |"
543+
+ " fields firstname | sort firstname",
544+
TEST_INDEX_BANK));
545+
verifySchema(actual, schema("firstname", "string"));
546+
verifyDataRows(
547+
actual,
548+
rows("Amber JOHnny"),
549+
rows("Dale"),
550+
rows("Dillard"),
551+
rows("Hattie"),
552+
rows("Virginia"));
553+
}
554+
555+
@Test
556+
public void testIn() throws IOException {
557+
// Non-temporal IN keeps the leastRestrictive + makeIn path; guards against the temporal-field
558+
// special-case in visitIn regressing membership tests over ordinary columns.
559+
JSONObject actual =
560+
executeQuery(
561+
String.format(
562+
"source=%s | where age in (32, 36) | fields firstname, age", TEST_INDEX_BANK));
563+
verifySchema(actual, schema("firstname", "string"), schema("age", "int"));
564+
verifyDataRows(actual, rows("Amber JOHnny", 32), rows("Hattie", 36), rows("Elinor", 36));
565+
}
566+
533567
@Test
534568
public void testXor() throws IOException {
535569
JSONObject result =

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -784,16 +784,29 @@ private QueryExpression like(RexCall call) {
784784

785785
private static QueryExpression constructQueryExpressionForSearch(
786786
RexCall call, SwapResult pair) {
787+
boolean isTimeStamp =
788+
(pair.getKey() instanceof NamedFieldExpression namedField)
789+
&& namedField.isTimeStampType();
787790
if (isSearchWithComplementedPoints(call)) {
791+
// A NOT IN over a timestamp field cannot use a flat terms query: timestamp values must be
792+
// normalized + formatted (see in()/termsQuery, which does neither). Decompose into the
793+
// complement of an OR of formatted per-point range queries, mirroring the range branch.
794+
if (isTimeStamp) {
795+
return pointsAsTimestampOr(call, pair).not();
796+
}
788797
return QueryExpression.create(pair.getKey()).notIn(pair.getValue());
789798
} else if (isSearchWithPoints(call)) {
799+
// IN over a timestamp field: the flat terms query in in() emits unformatted values like
800+
// '2018-06-23 00:00:00' that the date field cannot parse. Decompose into an OR of formatted
801+
// per-point range queries (equals(point, true) builds gte/lte + format="date_time"), the
802+
// same path the range branch below and visitCompare's `=` already take.
803+
if (isTimeStamp) {
804+
return pointsAsTimestampOr(call, pair);
805+
}
790806
return QueryExpression.create(pair.getKey()).in(pair.getValue());
791807
} else {
792808
Sarg<?> sarg = pair.getValue().literal.getValueAs(Sarg.class);
793809
Set<? extends Range<?>> rangeSet = requireNonNull(sarg).rangeSet.asRanges();
794-
boolean isTimeStamp =
795-
(pair.getKey() instanceof NamedFieldExpression namedField)
796-
&& namedField.isTimeStampType();
797810
List<QueryExpression> queryExpressions =
798811
rangeSet.stream()
799812
.map(
@@ -811,6 +824,30 @@ private static QueryExpression constructQueryExpressionForSearch(
811824
}
812825
}
813826

827+
/**
828+
* Builds an OR of per-point {@code equals(point, true)} query expressions for a points-only
829+
* Sarg over a timestamp field. Each point becomes a {@code gte/lte} range query with {@code
830+
* format="date_time"}, so the timestamp value is normalized to the date field's accepted format
831+
* — unlike a flat terms query, which emits unformatted values the date field rejects.
832+
*/
833+
private static QueryExpression pointsAsTimestampOr(RexCall call, SwapResult pair) {
834+
RexLiteral literal = (RexLiteral) call.getOperands().get(1);
835+
Sarg<?> sarg = requireNonNull(literal.getValueAs(Sarg.class), "Sarg");
836+
Set<? extends Range<?>> ranges =
837+
(sarg.isComplementedPoints() ? sarg.negate() : sarg).rangeSet.asRanges();
838+
List<QueryExpression> queryExpressions =
839+
ranges.stream()
840+
.map(
841+
range ->
842+
QueryExpression.create(pair.getKey())
843+
.equals(sargPointValue(range.lowerEndpoint()), true))
844+
.toList();
845+
if (queryExpressions.size() == 1) {
846+
return queryExpressions.getFirst();
847+
}
848+
return CompoundQueryExpression.or(queryExpressions.toArray(new QueryExpression[0]));
849+
}
850+
814851
private QueryExpression andOr(RexCall call) {
815852
QueryExpression[] expressions = new QueryExpression[call.getOperands().size()];
816853
PredicateAnalyzerException firstError = null;

0 commit comments

Comments
 (0)