Skip to content

Commit a3249bf

Browse files
committed
Prevents filtering with isnull/isnotnull conditions on nested fields
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent 6a96118 commit a3249bf

3 files changed

Lines changed: 77 additions & 5 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLConditionBuiltinFunctionIT.java

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import static org.opensearch.sql.util.MatcherUtils.rows;
1111

1212
import java.io.IOException;
13+
import java.util.Locale;
1314
import org.json.JSONObject;
1415
import org.junit.jupiter.api.Test;
1516
import org.opensearch.client.Request;
@@ -24,7 +25,7 @@ public void init() throws Exception {
2425
loadIndex(Index.STATE_COUNTRY);
2526
loadIndex(Index.STATE_COUNTRY_WITH_NULL);
2627
loadIndex(Index.CALCS);
27-
loadIndex(Index.NESTED_WITHOUT_ARRAYS);
28+
loadIndex(Index.NESTED_SIMPLE);
2829
loadIndex(Index.BIG5);
2930
Request request1 =
3031
new Request("PUT", "/" + TEST_INDEX_STATE_COUNTRY_WITH_NULL + "/_doc/7?refresh=true");
@@ -49,13 +50,27 @@ public void testIsNull() throws IOException {
4950
verifySchema(actual, schema("age", "int"));
5051

5152
verifyDataRows(actual, rows(10));
53+
}
5254

53-
// Test isNull on struct objects
54-
actual = executeQuery("source=big5 | where isnull(aws) | fields aws");
55+
@Test
56+
public void testIsNullWithStruct() throws IOException {
57+
JSONObject actual = executeQuery("source=big5 | where isnull(aws) | fields aws");
5558
verifySchema(actual, schema("aws", "struct"));
5659
verifyNumOfRows(actual, 0);
5760
}
5861

62+
@Test
63+
public void testIsNullWithNested() throws IOException {
64+
JSONObject actual =
65+
executeQuery(
66+
String.format(
67+
Locale.ROOT,
68+
"source=%s | where isnull(address) | fields address",
69+
TEST_INDEX_NESTED_SIMPLE));
70+
verifySchema(actual, schema("address", "array"));
71+
verifyNumOfRows(actual, 0);
72+
}
73+
5974
@Test
6075
public void testIsNotNull() throws IOException {
6176
JSONObject actual =
@@ -75,13 +90,27 @@ public void testIsNotNull() throws IOException {
7590
rows("Kevin"),
7691
rows(" "),
7792
rows(""));
93+
}
7894

79-
// Test isNotNull on struct objects
80-
actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
95+
@Test
96+
public void testIsNotNullWithStruct() throws IOException {
97+
JSONObject actual = executeQuery("source=big5 | where isnotnull(aws) | fields aws");
8198
verifySchema(actual, schema("aws", "struct"));
8299
verifyNumOfRows(actual, 1);
83100
}
84101

102+
@Test
103+
public void testIsNotNullWithNested() throws IOException {
104+
JSONObject actual =
105+
executeQuery(
106+
String.format(
107+
Locale.ROOT,
108+
"source=%s | where isnotnull(address) | fields address",
109+
TEST_INDEX_NESTED_SIMPLE));
110+
verifySchema(actual, schema("address", "array"));
111+
verifyNumOfRows(actual, 5);
112+
}
113+
85114
@Test
86115
public void testNullIf() throws IOException {
87116
JSONObject actual =

opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchFilterIndexScanRule.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ public interface Config extends RelRule.Config {
5454
.withOperandSupplier(
5555
b0 ->
5656
b0.operand(LogicalFilter.class)
57+
.predicate(Predicate.not(OpenSearchIndexScanRule::isConditionExcluded))
5758
.oneInput(
5859
b1 ->
5960
b1.operand(CalciteLogicalIndexScan.class)

opensearch/src/main/java/org/opensearch/sql/opensearch/planner/physical/OpenSearchIndexScanRule.java

Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,13 +6,20 @@
66
package org.opensearch.sql.opensearch.planner.physical;
77

88
import java.util.HashSet;
9+
import java.util.List;
910
import java.util.Set;
1011
import org.apache.calcite.plan.RelOptTable;
12+
import org.apache.calcite.rel.core.Filter;
1113
import org.apache.calcite.rel.core.Sort;
1214
import org.apache.calcite.rel.logical.LogicalProject;
1315
import org.apache.calcite.rel.logical.LogicalSort;
16+
import org.apache.calcite.rel.type.RelDataTypeField;
17+
import org.apache.calcite.rex.RexCall;
18+
import org.apache.calcite.rex.RexInputRef;
1419
import org.apache.calcite.rex.RexNode;
1520
import org.apache.calcite.rex.RexOver;
21+
import org.apache.calcite.sql.SqlKind;
22+
import org.apache.calcite.sql.type.ArraySqlType;
1623
import org.opensearch.sql.opensearch.storage.OpenSearchIndex;
1724
import org.opensearch.sql.opensearch.storage.scan.AbstractCalciteIndexScan;
1825

@@ -62,4 +69,39 @@ static boolean isLogicalSortLimit(LogicalSort sort) {
6269
static boolean sortByFieldsOnly(Sort sort) {
6370
return !sort.getCollation().getFieldCollations().isEmpty() && sort.fetch == null;
6471
}
72+
73+
/**
74+
* Filters with conditions of these kinds should not be pushed down since DSL does not handle such
75+
* cases correctly
76+
*/
77+
Set<SqlKind> FILTER_PUSHDOWN_EXCLUDED_FUNCTION = Set.of(SqlKind.IS_NULL, SqlKind.IS_NOT_NULL);
78+
79+
/**
80+
* Determines whether a filter condition should be excluded from pushdown to OpenSearch.
81+
* Currently, this method only excludes IS NULL and IS NOT NULL operations on nested fields, as
82+
* OpenSearch DSL does not handle these conditions correctly.
83+
*
84+
* @param filter The filter operation to check
85+
* @return true if the condition should be excluded from pushdown, false otherwise
86+
*/
87+
static boolean isConditionExcluded(Filter filter) {
88+
RexNode condition = filter.getCondition();
89+
if (condition instanceof RexCall conditionCall) {
90+
return FILTER_PUSHDOWN_EXCLUDED_FUNCTION.contains(conditionCall.getKind())
91+
&& conditionContainsNestedField(condition, filter.getRowType().getFieldList());
92+
}
93+
return false;
94+
}
95+
96+
private static boolean conditionContainsNestedField(
97+
RexNode condition, List<RelDataTypeField> fields) {
98+
if (condition instanceof RexCall conditionCall) {
99+
return conditionCall.getOperands().stream()
100+
.filter(operand -> operand instanceof RexInputRef)
101+
.map(operand -> (RexInputRef) operand)
102+
// Nested fields are of type ArraySqlType
103+
.anyMatch(inputRef -> fields.get(inputRef.getIndex()).getType() instanceof ArraySqlType);
104+
}
105+
return false;
106+
}
65107
}

0 commit comments

Comments
 (0)