Skip to content

Commit 9a32644

Browse files
authored
[BugFix] Key date_time pushdown on field type, not literal UDT (#5481) (#5515)
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent c6360e9 commit 9a32644

4 files changed

Lines changed: 271 additions & 22 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -515,6 +515,25 @@ public void testDateBetween() throws IOException {
515515
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
516516
}
517517

518+
/**
519+
* A timestamp range comparison AND'd with an {@code IN} clause must push down and return rows.
520+
*/
521+
@Test
522+
public void testTimestampRangeWithInClausePushDown() throws IOException {
523+
JSONObject actual =
524+
executeQuery(
525+
String.format(
526+
"source=%s | where birthdate > timestamp('2018-06-01 00:00:00') | where state in"
527+
+ " ('IL', 'TN', 'WA') | fields firstname, state, birthdate",
528+
TEST_INDEX_BANK));
529+
verifySchema(
530+
actual,
531+
schema("firstname", "string"),
532+
schema("state", "string"),
533+
schema("birthdate", "timestamp"));
534+
verifyDataRows(actual, rows("Elinor", "WA", "2018-06-27 00:00:00"));
535+
}
536+
518537
@Test
519538
public void testDateIn() throws IOException {
520539
// birthdate is a TIMESTAMP-typed field; the IN values are DATE literals. visitIn must compare
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Issue: https://github.com/opensearch-project/sql/issues/5481
2+
# A timestamp range comparison AND'd with an IN clause on another field must push down and
3+
# return rows. Calcite folds the IN into a Sarg and strips the timestamp literal's UDT; without
4+
# the field-type-keyed fix the range query ships an unformatted date and the shard rejects it.
5+
setup:
6+
- do:
7+
query.settings:
8+
body:
9+
transient:
10+
plugins.calcite.enabled: true
11+
12+
- do:
13+
indices.create:
14+
index: issue5481
15+
body:
16+
settings:
17+
number_of_shards: 1
18+
number_of_replicas: 0
19+
mappings:
20+
properties:
21+
event_time:
22+
type: date
23+
severity:
24+
type: keyword
25+
26+
- do:
27+
bulk:
28+
refresh: true
29+
body:
30+
- '{"index": {"_index": "issue5481", "_id": "1"}}'
31+
- '{"event_time": "2026-05-28T10:00:00Z", "severity": "ERROR"}'
32+
- '{"index": {"_index": "issue5481", "_id": "2"}}'
33+
- '{"event_time": "2026-05-28T10:05:00Z", "severity": "WARN"}'
34+
- '{"index": {"_index": "issue5481", "_id": "3"}}'
35+
- '{"event_time": "2026-05-28T10:10:00Z", "severity": "INFO"}'
36+
- '{"index": {"_index": "issue5481", "_id": "4"}}'
37+
- '{"event_time": "2026-05-28T10:15:00Z", "severity": "ERROR"}'
38+
- '{"index": {"_index": "issue5481", "_id": "5"}}'
39+
- '{"event_time": "2026-05-28T10:20:00Z", "severity": "WARN"}'
40+
- '{"index": {"_index": "issue5481", "_id": "6"}}'
41+
- '{"event_time": "2026-05-28T10:25:00Z", "severity": "DEBUG"}'
42+
43+
---
44+
teardown:
45+
- do:
46+
indices.delete:
47+
index: issue5481
48+
ignore_unavailable: true
49+
- do:
50+
query.settings:
51+
body:
52+
transient:
53+
plugins.calcite.enabled: false
54+
55+
---
56+
"Issue 5481: timestamp range AND keyword IN pushes down and returns rows":
57+
- skip:
58+
features:
59+
- headers
60+
- do:
61+
headers:
62+
Content-Type: 'application/json'
63+
ppl:
64+
body:
65+
query: source=issue5481 | where event_time > timestamp('2026-05-28 10:08:00') | where severity in ('ERROR', 'WARN') | fields severity, event_time | sort event_time
66+
67+
- match: { total: 2 }
68+
- match: { schema: [ { name: severity, type: "string" }, { name: event_time, type: "timestamp" } ] }
69+
- match: { datarows: [ [ "ERROR", "2026-05-28 10:15:00" ], [ "WARN", "2026-05-28 10:20:00" ] ] }

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1415,10 +1415,12 @@ public QueryExpression notLike(LiteralExpression literal) {
14151415

14161416
@Override
14171417
public QueryExpression equals(LiteralExpression literal) {
1418-
Object value = literal.value();
1419-
if (literal.isDateTime()) {
1418+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1419+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1420+
if (isTimeStamp) {
14201421
builder =
1421-
addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value).lte(value));
1422+
addFormatIfNecessary(
1423+
isTimeStamp, rangeQuery(getFieldReference()).gte(value).lte(value));
14221424
} else {
14231425
builder = termQuery(getFieldReferenceForTermQuery(), value);
14241426
}
@@ -1427,12 +1429,15 @@ public QueryExpression equals(LiteralExpression literal) {
14271429

14281430
@Override
14291431
public QueryExpression notEquals(LiteralExpression literal) {
1430-
Object value = literal.value();
1431-
if (literal.isDateTime()) {
1432+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1433+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1434+
if (isTimeStamp) {
14321435
builder =
14331436
boolQuery()
1434-
.should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value)))
1435-
.should(addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value)));
1437+
.should(
1438+
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value)))
1439+
.should(
1440+
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value)));
14361441
} else {
14371442
builder =
14381443
boolQuery()
@@ -1445,32 +1450,48 @@ public QueryExpression notEquals(LiteralExpression literal) {
14451450

14461451
@Override
14471452
public QueryExpression gt(LiteralExpression literal) {
1448-
Object value = literal.value();
1449-
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gt(value));
1453+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1454+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1455+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value));
14501456
return this;
14511457
}
14521458

14531459
@Override
14541460
public QueryExpression gte(LiteralExpression literal) {
1455-
Object value = literal.value();
1456-
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).gte(value));
1461+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1462+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1463+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gte(value));
14571464
return this;
14581465
}
14591466

14601467
@Override
14611468
public QueryExpression lt(LiteralExpression literal) {
1462-
Object value = literal.value();
1463-
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lt(value));
1469+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1470+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1471+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value));
14641472
return this;
14651473
}
14661474

14671475
@Override
14681476
public QueryExpression lte(LiteralExpression literal) {
1469-
Object value = literal.value();
1470-
builder = addFormatIfNecessary(literal, rangeQuery(getFieldReference()).lte(value));
1477+
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1478+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1479+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lte(value));
14711480
return this;
14721481
}
14731482

1483+
/**
1484+
* Whether the comparison is a timestamp/date range. The field type is the reliable signal:
1485+
* {@code literal.isDateTime()} reads the literal's UDT, which {@link
1486+
* org.apache.calcite.rex.RexSimplify} can strip (to VARCHAR) when a sibling clause is folded
1487+
* into a {@code Sarg}, e.g. {@code @timestamp > X AND severityText IN (...)}. Falling back to
1488+
* {@code rel.isTimeStampType()} keeps ISO-8601 normalization and the {@code "date_time"} format
1489+
* hint on the range query.
1490+
*/
1491+
private boolean isFieldOrLiteralDateTime(LiteralExpression literal) {
1492+
return literal.isDateTime() || (rel != null && rel.isTimeStampType());
1493+
}
1494+
14741495
@Override
14751496
public QueryExpression match(String query, Map<String, String> optionalArguments) {
14761497
builder = new MatchQuery().build(getFieldReference(), query, optionalArguments);
@@ -1617,6 +1638,11 @@ public QueryExpression between(Range<?> range, boolean isTimeStamp) {
16171638
}
16181639

16191640
private Object convertEndpointValue(Object value, boolean isTimeStamp) {
1641+
// Shared normalization entry point: guard a null endpoint so the timestamp branch's
1642+
// value.toString() cannot NPE. sargPointValue never produces null from a non-null input.
1643+
if (value == null) {
1644+
return null;
1645+
}
16201646
value = sargPointValue(value);
16211647
return isTimeStamp ? timestampValueForPushDown(value.toString()) : value;
16221648
}
@@ -1749,16 +1775,19 @@ public static ScriptSortBuilder.ScriptSortType getScriptSortType(RelDataType rel
17491775
}
17501776

17511777
/**
1752-
* By default, range queries on date/time need use the format of the source to parse the literal.
1753-
* So we need to specify that the literal has "date_time" format
1778+
* Range queries on date/time fields need the source format to parse the literal, so we attach the
1779+
* {@code "date_time"} format. The caller resolves whether the comparison is a timestamp range
1780+
* from the field type (see {@link SimpleQueryExpression#isFieldOrLiteralDateTime}) rather than
1781+
* the literal's UDT, which {@link org.apache.calcite.rex.RexSimplify} can strip when a sibling
1782+
* clause is folded into a {@code Sarg}.
17541783
*
1755-
* @param literal literal value
1756-
* @param rangeQueryBuilder query builder to optionally add {@code format} expression
1757-
* @return existing builder with possible {@code format} attribute
1784+
* @param isTimeStamp whether the comparison endpoint is a timestamp/date range endpoint
1785+
* @param rangeQueryBuilder query builder to optionally add the {@code format} attribute
1786+
* @return the same builder, with {@code format("date_time")} added when {@code isTimeStamp}
17581787
*/
17591788
private static RangeQueryBuilder addFormatIfNecessary(
1760-
LiteralExpression literal, RangeQueryBuilder rangeQueryBuilder) {
1761-
if (literal.isDateTime()) {
1789+
boolean isTimeStamp, RangeQueryBuilder rangeQueryBuilder) {
1790+
if (isTimeStamp) {
17621791
rangeQueryBuilder.format("date_time");
17631792
}
17641793
return rangeQueryBuilder;

opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java

Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1147,6 +1147,138 @@ void notEquals_generatesBoolQueryForDateTime() throws ExpressionNotAnalyzableExc
11471147
result.toString());
11481148
}
11491149

1150+
/**
1151+
* RexSimplify can strip the EXPR_TIMESTAMP UDT off a literal when a sibling clause is folded into
1152+
* a Sarg (e.g. {@code @timestamp > X AND severityText IN (...)}), leaving the literal as plain
1153+
* VARCHAR. The comparison must still emit a {@code format("date_time")} range query keyed off the
1154+
* field's type so the shard's default date parser accepts the value.
1155+
*/
1156+
@Test
1157+
void gt_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException {
1158+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1159+
RexNode call = builder.makeCall(SqlStdOperatorTable.GREATER_THAN, field4, varcharLiteral);
1160+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1161+
1162+
assertInstanceOf(RangeQueryBuilder.class, result);
1163+
assertEquals(
1164+
"""
1165+
{
1166+
"range" : {
1167+
"d" : {
1168+
"from" : "1987-02-03T04:34:56.000Z",
1169+
"to" : null,
1170+
"include_lower" : false,
1171+
"include_upper" : true,
1172+
"format" : "date_time",
1173+
"boost" : 1.0
1174+
}
1175+
}
1176+
}\
1177+
""",
1178+
result.toString());
1179+
}
1180+
1181+
// Companion stripped-VARCHAR-literal tests for the remaining range shapes (equals -> gte+lte,
1182+
// notEquals -> two-should bool, lte -> single range). Each must produce the same DSL as its
1183+
// intact-UDT counterpart, proving the field-type fallback in isFieldOrLiteralDateTime keeps
1184+
// ISO-8601 normalization + format("date_time") on every comparison op, not just gt. See #5481.
1185+
@Test
1186+
void equals_normalizesVarcharLiteralAgainstTimestampField()
1187+
throws ExpressionNotAnalyzableException {
1188+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1189+
RexNode call = builder.makeCall(SqlStdOperatorTable.EQUALS, field4, varcharLiteral);
1190+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1191+
1192+
assertInstanceOf(RangeQueryBuilder.class, result);
1193+
assertEquals(
1194+
"""
1195+
{
1196+
"range" : {
1197+
"d" : {
1198+
"from" : "1987-02-03T04:34:56.000Z",
1199+
"to" : "1987-02-03T04:34:56.000Z",
1200+
"include_lower" : true,
1201+
"include_upper" : true,
1202+
"format" : "date_time",
1203+
"boost" : 1.0
1204+
}
1205+
}
1206+
}\
1207+
""",
1208+
result.toString());
1209+
}
1210+
1211+
@Test
1212+
void notEquals_normalizesVarcharLiteralAgainstTimestampField()
1213+
throws ExpressionNotAnalyzableException {
1214+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1215+
RexNode call = builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, field4, varcharLiteral);
1216+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1217+
1218+
assertInstanceOf(BoolQueryBuilder.class, result);
1219+
assertEquals(
1220+
"""
1221+
{
1222+
"bool" : {
1223+
"should" : [
1224+
{
1225+
"range" : {
1226+
"d" : {
1227+
"from" : "1987-02-03T04:34:56.000Z",
1228+
"to" : null,
1229+
"include_lower" : false,
1230+
"include_upper" : true,
1231+
"format" : "date_time",
1232+
"boost" : 1.0
1233+
}
1234+
}
1235+
},
1236+
{
1237+
"range" : {
1238+
"d" : {
1239+
"from" : null,
1240+
"to" : "1987-02-03T04:34:56.000Z",
1241+
"include_lower" : true,
1242+
"include_upper" : false,
1243+
"format" : "date_time",
1244+
"boost" : 1.0
1245+
}
1246+
}
1247+
}
1248+
],
1249+
"adjust_pure_negative" : true,
1250+
"boost" : 1.0
1251+
}
1252+
}\
1253+
""",
1254+
result.toString());
1255+
}
1256+
1257+
@Test
1258+
void lte_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException {
1259+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1260+
RexNode call = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, field4, varcharLiteral);
1261+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1262+
1263+
assertInstanceOf(RangeQueryBuilder.class, result);
1264+
assertEquals(
1265+
"""
1266+
{
1267+
"range" : {
1268+
"d" : {
1269+
"from" : null,
1270+
"to" : "1987-02-03T04:34:56.000Z",
1271+
"include_lower" : true,
1272+
"include_upper" : true,
1273+
"format" : "date_time",
1274+
"boost" : 1.0
1275+
}
1276+
}
1277+
}\
1278+
""",
1279+
result.toString());
1280+
}
1281+
11501282
@Test
11511283
void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzableException {
11521284
RexNode call =

0 commit comments

Comments
 (0)