Skip to content

Commit 311d38f

Browse files
committed
Address review: reuse addFormatIfNecessary + value(), add e2e/yaml tests
Refactor the opensearch-project#5481 fix per review: - Reuse the existing addFormatIfNecessary helper (re-keyed on the resolved isTimeStamp boolean) instead of inlining format("date_time") across the six comparison methods; the helper was orphaned by the prior commit. - Reuse LiteralExpression.value() + the existing Sarg-path convertEndpointValue for endpoint normalization, removing the parallel endpointValue helper. - Guard convertEndpointValue against a null endpoint (shared entry point). Tests: - Add stripped-VARCHAR unit cases for equals/notEquals/lte (gt already covered) so every range shape pins the field-type fallback, not just gt. - Add end-to-end IT CalcitePPLBasicIT.testTimestampRangeWithInClausePushDown (timestamp range AND keyword IN), confirmed to reproduce the shard date-parse error when the fix is reverted. - Add yamlRest test issues/5481.yml for the same shape. Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent 1dfe1b7 commit 311d38f

4 files changed

Lines changed: 232 additions & 37 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLBasicIT.java

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -512,6 +512,28 @@ public void testDateBetween() throws IOException {
512512
actual, rows("Nanette", "2018-06-23 00:00:00"), rows("Elinor", "2018-06-27 00:00:00"));
513513
}
514514

515+
/**
516+
* Issue #5481: a timestamp range comparison AND'd with an {@code IN} clause on another field must
517+
* push down and return rows. Calcite folds the {@code IN} into a {@code Sarg} and strips the
518+
* timestamp literal's UDT; without the field-type-keyed fix the range query ships an unformatted
519+
* date to the shard, which rejects it with a date-parse error (HTTP 500).
520+
*/
521+
@Test
522+
public void testTimestampRangeWithInClausePushDown() throws IOException {
523+
JSONObject actual =
524+
executeQuery(
525+
String.format(
526+
"source=%s | where birthdate > timestamp('2018-06-01 00:00:00') | where state in"
527+
+ " ('IL', 'TN', 'WA') | fields firstname, state, birthdate",
528+
TEST_INDEX_BANK));
529+
verifySchema(
530+
actual,
531+
schema("firstname", "string"),
532+
schema("state", "string"),
533+
schema("birthdate", "timestamp"));
534+
verifyDataRows(actual, rows("Elinor", "WA", "2018-06-27 00:00:00"));
535+
}
536+
515537
@Test
516538
public void testXor() throws IOException {
517539
JSONObject result =
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
# Issue: https://github.com/opensearch-project/sql/issues/5481
2+
# A timestamp range comparison AND'd with an IN clause on another field must push down and
3+
# return rows. Calcite folds the IN into a Sarg and strips the timestamp literal's UDT; without
4+
# the field-type-keyed fix the range query ships an unformatted date and the shard rejects it.
5+
setup:
6+
- do:
7+
query.settings:
8+
body:
9+
transient:
10+
plugins.calcite.enabled: true
11+
12+
- do:
13+
indices.create:
14+
index: issue5481
15+
body:
16+
settings:
17+
number_of_shards: 1
18+
number_of_replicas: 0
19+
mappings:
20+
properties:
21+
event_time:
22+
type: date
23+
severity:
24+
type: keyword
25+
26+
- do:
27+
bulk:
28+
refresh: true
29+
body:
30+
- '{"index": {"_index": "issue5481", "_id": "1"}}'
31+
- '{"event_time": "2026-05-28T10:00:00Z", "severity": "ERROR"}'
32+
- '{"index": {"_index": "issue5481", "_id": "2"}}'
33+
- '{"event_time": "2026-05-28T10:05:00Z", "severity": "WARN"}'
34+
- '{"index": {"_index": "issue5481", "_id": "3"}}'
35+
- '{"event_time": "2026-05-28T10:10:00Z", "severity": "INFO"}'
36+
- '{"index": {"_index": "issue5481", "_id": "4"}}'
37+
- '{"event_time": "2026-05-28T10:15:00Z", "severity": "ERROR"}'
38+
- '{"index": {"_index": "issue5481", "_id": "5"}}'
39+
- '{"event_time": "2026-05-28T10:20:00Z", "severity": "WARN"}'
40+
- '{"index": {"_index": "issue5481", "_id": "6"}}'
41+
- '{"event_time": "2026-05-28T10:25:00Z", "severity": "DEBUG"}'
42+
43+
---
44+
teardown:
45+
- do:
46+
indices.delete:
47+
index: issue5481
48+
ignore_unavailable: true
49+
- do:
50+
query.settings:
51+
body:
52+
transient:
53+
plugins.calcite.enabled: false
54+
55+
---
56+
"Issue 5481: timestamp range AND keyword IN pushes down and returns rows":
57+
- skip:
58+
features:
59+
- headers
60+
- do:
61+
headers:
62+
Content-Type: 'application/json'
63+
ppl:
64+
body:
65+
query: source=issue5481 | where event_time > timestamp('2026-05-28 10:08:00') | where severity in ('ERROR', 'WARN') | fields severity, event_time | sort event_time
66+
67+
- match: { total: 2 }
68+
- match: { schema: [ { name: severity, type: "string" }, { name: event_time, type: "timestamp" } ] }
69+
- match: { datarows: [ [ "ERROR", "2026-05-28 10:15:00" ], [ "WARN", "2026-05-28 10:20:00" ] ] }

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 40 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1379,9 +1379,11 @@ public QueryExpression notLike(LiteralExpression literal) {
13791379
@Override
13801380
public QueryExpression equals(LiteralExpression literal) {
13811381
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1382-
Object value = endpointValue(literal, isTimeStamp);
1382+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
13831383
if (isTimeStamp) {
1384-
builder = rangeQuery(getFieldReference()).gte(value).lte(value).format("date_time");
1384+
builder =
1385+
addFormatIfNecessary(
1386+
isTimeStamp, rangeQuery(getFieldReference()).gte(value).lte(value));
13851387
} else {
13861388
builder = termQuery(getFieldReferenceForTermQuery(), value);
13871389
}
@@ -1391,12 +1393,14 @@ public QueryExpression equals(LiteralExpression literal) {
13911393
@Override
13921394
public QueryExpression notEquals(LiteralExpression literal) {
13931395
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1394-
Object value = endpointValue(literal, isTimeStamp);
1396+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
13951397
if (isTimeStamp) {
13961398
builder =
13971399
boolQuery()
1398-
.should(rangeQuery(getFieldReference()).gt(value).format("date_time"))
1399-
.should(rangeQuery(getFieldReference()).lt(value).format("date_time"));
1400+
.should(
1401+
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value)))
1402+
.should(
1403+
addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value)));
14001404
} else {
14011405
builder =
14021406
boolQuery()
@@ -1410,56 +1414,47 @@ public QueryExpression notEquals(LiteralExpression literal) {
14101414
@Override
14111415
public QueryExpression gt(LiteralExpression literal) {
14121416
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1413-
Object value = endpointValue(literal, isTimeStamp);
1414-
RangeQueryBuilder rq = rangeQuery(getFieldReference()).gt(value);
1415-
if (isTimeStamp) rq.format("date_time");
1416-
builder = rq;
1417+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1418+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gt(value));
14171419
return this;
14181420
}
14191421

14201422
@Override
14211423
public QueryExpression gte(LiteralExpression literal) {
14221424
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1423-
Object value = endpointValue(literal, isTimeStamp);
1424-
RangeQueryBuilder rq = rangeQuery(getFieldReference()).gte(value);
1425-
if (isTimeStamp) rq.format("date_time");
1426-
builder = rq;
1425+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1426+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).gte(value));
14271427
return this;
14281428
}
14291429

14301430
@Override
14311431
public QueryExpression lt(LiteralExpression literal) {
14321432
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1433-
Object value = endpointValue(literal, isTimeStamp);
1434-
RangeQueryBuilder rq = rangeQuery(getFieldReference()).lt(value);
1435-
if (isTimeStamp) rq.format("date_time");
1436-
builder = rq;
1433+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1434+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lt(value));
14371435
return this;
14381436
}
14391437

14401438
@Override
14411439
public QueryExpression lte(LiteralExpression literal) {
14421440
boolean isTimeStamp = isFieldOrLiteralDateTime(literal);
1443-
Object value = endpointValue(literal, isTimeStamp);
1444-
RangeQueryBuilder rq = rangeQuery(getFieldReference()).lte(value);
1445-
if (isTimeStamp) rq.format("date_time");
1446-
builder = rq;
1441+
Object value = convertEndpointValue(literal.value(), isTimeStamp);
1442+
builder = addFormatIfNecessary(isTimeStamp, rangeQuery(getFieldReference()).lte(value));
14471443
return this;
14481444
}
14491445

1450-
// Field type is the reliable source: RexSimplify can strip the literal's UDT when a sibling
1451-
// clause is folded into a Sarg, leaving it as VARCHAR.
1446+
/**
1447+
* Whether the comparison is a timestamp/date range. The field type is the reliable signal:
1448+
* {@code literal.isDateTime()} reads the literal's UDT, which {@link
1449+
* org.apache.calcite.rex.RexSimplify} can strip (to VARCHAR) when a sibling clause is folded
1450+
* into a {@code Sarg}, e.g. {@code @timestamp > X AND severityText IN (...)}. Falling back to
1451+
* {@code rel.isTimeStampType()} keeps ISO-8601 normalization and the {@code "date_time"} format
1452+
* hint on the range query.
1453+
*/
14521454
private boolean isFieldOrLiteralDateTime(LiteralExpression literal) {
14531455
return literal.isDateTime() || (rel != null && rel.isTimeStampType());
14541456
}
14551457

1456-
private Object endpointValue(LiteralExpression literal, boolean isTimeStamp) {
1457-
if (!isTimeStamp || literal.isDateTime()) {
1458-
return literal.value();
1459-
}
1460-
return timestampValueForPushDown(literal.value().toString());
1461-
}
1462-
14631458
@Override
14641459
public QueryExpression match(String query, Map<String, String> optionalArguments) {
14651460
builder = new MatchQuery().build(getFieldReference(), query, optionalArguments);
@@ -1607,6 +1602,11 @@ public QueryExpression between(Range<?> range, boolean isTimeStamp) {
16071602

16081603
private Object convertEndpointValue(Object value, boolean isTimeStamp) {
16091604
value = sargPointValue(value);
1605+
// Shared normalization entry point: a Sarg endpoint can normalize to null, and
1606+
// timestampValueForPushDown(value.toString()) would otherwise NPE.
1607+
if (value == null) {
1608+
return null;
1609+
}
16101610
return isTimeStamp ? timestampValueForPushDown(value.toString()) : value;
16111611
}
16121612
}
@@ -1738,16 +1738,19 @@ public static ScriptSortBuilder.ScriptSortType getScriptSortType(RelDataType rel
17381738
}
17391739

17401740
/**
1741-
* By default, range queries on date/time need use the format of the source to parse the literal.
1742-
* So we need to specify that the literal has "date_time" format
1741+
* Range queries on date/time fields need the source format to parse the literal, so we attach the
1742+
* {@code "date_time"} format. The caller resolves whether the comparison is a timestamp range
1743+
* from the field type (see {@link SimpleQueryExpression#isFieldOrLiteralDateTime}) rather than
1744+
* the literal's UDT, which {@link org.apache.calcite.rex.RexSimplify} can strip when a sibling
1745+
* clause is folded into a {@code Sarg}.
17431746
*
1744-
* @param literal literal value
1745-
* @param rangeQueryBuilder query builder to optionally add {@code format} expression
1746-
* @return existing builder with possible {@code format} attribute
1747+
* @param isTimeStamp whether the comparison endpoint is a timestamp/date range endpoint
1748+
* @param rangeQueryBuilder query builder to optionally add the {@code format} attribute
1749+
* @return the same builder, with {@code format("date_time")} added when {@code isTimeStamp}
17471750
*/
17481751
private static RangeQueryBuilder addFormatIfNecessary(
1749-
LiteralExpression literal, RangeQueryBuilder rangeQueryBuilder) {
1750-
if (literal.isDateTime()) {
1752+
boolean isTimeStamp, RangeQueryBuilder rangeQueryBuilder) {
1753+
if (isTimeStamp) {
17511754
rangeQueryBuilder.format("date_time");
17521755
}
17531756
return rangeQueryBuilder;

opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java

Lines changed: 101 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1178,6 +1178,107 @@ void gt_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnal
11781178
result.toString());
11791179
}
11801180

1181+
// Companion stripped-VARCHAR-literal tests for the remaining range shapes (equals -> gte+lte,
1182+
// notEquals -> two-should bool, lte -> single range). Each must produce the same DSL as its
1183+
// intact-UDT counterpart, proving the field-type fallback in isFieldOrLiteralDateTime keeps
1184+
// ISO-8601 normalization + format("date_time") on every comparison op, not just gt. See #5481.
1185+
@Test
1186+
void equals_normalizesVarcharLiteralAgainstTimestampField()
1187+
throws ExpressionNotAnalyzableException {
1188+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1189+
RexNode call = builder.makeCall(SqlStdOperatorTable.EQUALS, field4, varcharLiteral);
1190+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1191+
1192+
assertInstanceOf(RangeQueryBuilder.class, result);
1193+
assertEquals(
1194+
"""
1195+
{
1196+
"range" : {
1197+
"d" : {
1198+
"from" : "1987-02-03T04:34:56.000Z",
1199+
"to" : "1987-02-03T04:34:56.000Z",
1200+
"include_lower" : true,
1201+
"include_upper" : true,
1202+
"format" : "date_time",
1203+
"boost" : 1.0
1204+
}
1205+
}
1206+
}\
1207+
""",
1208+
result.toString());
1209+
}
1210+
1211+
@Test
1212+
void notEquals_normalizesVarcharLiteralAgainstTimestampField()
1213+
throws ExpressionNotAnalyzableException {
1214+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1215+
RexNode call = builder.makeCall(SqlStdOperatorTable.NOT_EQUALS, field4, varcharLiteral);
1216+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1217+
1218+
assertInstanceOf(BoolQueryBuilder.class, result);
1219+
assertEquals(
1220+
"""
1221+
{
1222+
"bool" : {
1223+
"should" : [
1224+
{
1225+
"range" : {
1226+
"d" : {
1227+
"from" : "1987-02-03T04:34:56.000Z",
1228+
"to" : null,
1229+
"include_lower" : false,
1230+
"include_upper" : true,
1231+
"format" : "date_time",
1232+
"boost" : 1.0
1233+
}
1234+
}
1235+
},
1236+
{
1237+
"range" : {
1238+
"d" : {
1239+
"from" : null,
1240+
"to" : "1987-02-03T04:34:56.000Z",
1241+
"include_lower" : true,
1242+
"include_upper" : false,
1243+
"format" : "date_time",
1244+
"boost" : 1.0
1245+
}
1246+
}
1247+
}
1248+
],
1249+
"adjust_pure_negative" : true,
1250+
"boost" : 1.0
1251+
}
1252+
}\
1253+
""",
1254+
result.toString());
1255+
}
1256+
1257+
@Test
1258+
void lte_normalizesVarcharLiteralAgainstTimestampField() throws ExpressionNotAnalyzableException {
1259+
RexLiteral varcharLiteral = (RexLiteral) builder.makeLiteral("1987-02-03 04:34:56");
1260+
RexNode call = builder.makeCall(SqlStdOperatorTable.LESS_THAN_OR_EQUAL, field4, varcharLiteral);
1261+
QueryBuilder result = PredicateAnalyzer.analyze(call, schema, fieldTypes);
1262+
1263+
assertInstanceOf(RangeQueryBuilder.class, result);
1264+
assertEquals(
1265+
"""
1266+
{
1267+
"range" : {
1268+
"d" : {
1269+
"from" : null,
1270+
"to" : "1987-02-03T04:34:56.000Z",
1271+
"include_lower" : true,
1272+
"include_upper" : true,
1273+
"format" : "date_time",
1274+
"boost" : 1.0
1275+
}
1276+
}
1277+
}\
1278+
""",
1279+
result.toString());
1280+
}
1281+
11811282
@Test
11821283
void gte_generatesRangeQueryWithFormatForDateTime() throws ExpressionNotAnalyzableException {
11831284
RexNode call =

0 commit comments

Comments
 (0)