Skip to content

Commit b2fd268

Browse files
authored
allow partial pushdown for semi-scripted predicates (#5565)
Signed-off-by: Simeon Widdis <sawiddis@amazon.com>
1 parent f6b6baa commit b2fd268

3 files changed

Lines changed: 113 additions & 1 deletion

File tree

Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.calcite.remote;
7+
8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS;
9+
import static org.opensearch.sql.util.MatcherUtils.rows;
10+
import static org.opensearch.sql.util.MatcherUtils.schema;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
12+
import static org.opensearch.sql.util.MatcherUtils.verifySchema;
13+
14+
import java.io.IOException;
15+
import org.json.JSONObject;
16+
import org.junit.jupiter.api.Test;
17+
import org.opensearch.sql.ppl.PPLIntegTestCase;
18+
19+
/**
20+
* Tests partial filter pushdown when some predicates can be pushed natively and others require
21+
* script evaluation.
22+
*
23+
* <p>Regression test for issue where LIKE on text fields (without .keyword subfield) caused entire
24+
* AND filter to fall back to script, preventing timestamp range pushdown.
25+
*/
26+
public class CalcitePartialFilterPushdownIT extends PPLIntegTestCase {
27+
28+
@Override
29+
public void init() throws Exception {
30+
super.init();
31+
enableCalcite();
32+
loadIndex(Index.LOGS);
33+
}
34+
35+
@Test
36+
public void testTimestampRangePushesWithUnpushableLike() throws IOException {
37+
// message is text field without .keyword — LIKE on it requires script evaluation
38+
// @timestamp is date field — range should push natively despite LIKE failing
39+
String query =
40+
String.format(
41+
"source=%s | where `@timestamp` >= '2023-01-01' and `@timestamp` < '2023-01-04' "
42+
+ "and LIKE(message, '%%failed%%') | stats count()",
43+
TEST_INDEX_LOGS);
44+
45+
JSONObject result = executeQuery(query);
46+
47+
// Just verify query executes and returns reasonable results
48+
// The key regression is that this doesn't do a full table scan
49+
verifySchema(result, schema("count()", "bigint"));
50+
// Should find "Database connection failed" in the date range
51+
verifyDataRows(result, rows(1L));
52+
}
53+
54+
@Test
55+
public void testMultipleUnpushablePredicatesInAnd() throws IOException {
56+
// Both LIKE conditions are on text field, but timestamp should still push
57+
String query =
58+
String.format(
59+
"source=%s | where `@timestamp` >= '2023-01-01' and LIKE(message, '%%space%%') "
60+
+ "and LIKE(message, '%%low%%') | stats count()",
61+
TEST_INDEX_LOGS);
62+
63+
JSONObject result = executeQuery(query);
64+
verifySchema(result, schema("count()", "bigint"));
65+
// Should find "Disk space low"
66+
verifyDataRows(result, rows(1L));
67+
}
68+
}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1394,7 +1394,7 @@ public QueryExpression like(LiteralExpression literal, boolean caseSensitive) {
13941394
.caseInsensitive(!caseSensitive);
13951395
return this;
13961396
}
1397-
throw new UnsupportedOperationException("Like query is not supported for text field");
1397+
throw new PredicateAnalyzerException("Like query is not supported for text field");
13981398
}
13991399

14001400
@Override

opensearch/src/test/java/org/opensearch/sql/opensearch/request/PredicateAnalyzerTest.java

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1649,4 +1649,48 @@ void notIsFalse_generatesOnlyMustNotTerm() throws ExpressionNotAnalyzableExcepti
16491649
""",
16501650
result.toString());
16511651
}
1652+
1653+
@Test
1654+
void andWithUnpushableLike_partiallyPushesOtherPredicates()
1655+
throws ExpressionNotAnalyzableException {
1656+
// field3 (c) is text without .keyword → LIKE throws PredicateAnalyzerException
1657+
// field4 (d) is date → timestamp range should push as RangeQueryBuilder
1658+
final RelDataType rowType =
1659+
builder
1660+
.getTypeFactory()
1661+
.builder()
1662+
.kind(StructKind.FULLY_QUALIFIED)
1663+
.add("a", typeFactory.createSqlType(SqlTypeName.INTEGER))
1664+
.add("b", typeFactory.createSqlType(SqlTypeName.VARCHAR))
1665+
.add("c", typeFactory.createSqlType(SqlTypeName.VARCHAR))
1666+
.add("d", typeFactory.createUDT(ExprUDT.EXPR_TIMESTAMP))
1667+
.add("e", typeFactory.createSqlType(SqlTypeName.BOOLEAN))
1668+
.build();
1669+
Hook.CURRENT_TIME.addThread((Consumer<Holder<Long>>) h -> h.set(0L));
1670+
1671+
RexInputRef field3 = builder.makeInputRef(typeFactory.createSqlType(SqlTypeName.VARCHAR), 2);
1672+
RexNode likeCall =
1673+
builder.makeCall(
1674+
SqlStdOperatorTable.LIKE, field3, stringLiteral, builder.makeLiteral("\\"));
1675+
RexNode rangeCall =
1676+
builder.makeCall(SqlStdOperatorTable.GREATER_THAN_OR_EQUAL, field4, dateTimeLiteral);
1677+
RexNode andCall = builder.makeCall(SqlStdOperatorTable.AND, rangeCall, likeCall);
1678+
1679+
QueryBuilder result = PredicateAnalyzer.analyze(andCall, schema, fieldTypes, rowType, cluster);
1680+
1681+
// Should be a BoolQueryBuilder with range in must[] and LIKE as script
1682+
assertInstanceOf(BoolQueryBuilder.class, result);
1683+
BoolQueryBuilder boolQuery = (BoolQueryBuilder) result;
1684+
assertEquals(2, boolQuery.must().size());
1685+
1686+
// First must clause should be the range query (pushable)
1687+
QueryBuilder firstMust = boolQuery.must().get(0);
1688+
assertInstanceOf(RangeQueryBuilder.class, firstMust);
1689+
RangeQueryBuilder rangeQuery = (RangeQueryBuilder) firstMust;
1690+
assertEquals("d", rangeQuery.fieldName());
1691+
1692+
// Second must clause should be script query (unpushable LIKE)
1693+
QueryBuilder secondMust = boolQuery.must().get(1);
1694+
assertInstanceOf(ScriptQueryBuilder.class, secondMust);
1695+
}
16521696
}

0 commit comments

Comments
 (0)