Skip to content

Commit 8143578

Browse files
committed
Support basic nested queries when applying filter on a nested filed
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent a7c5687 commit 8143578

7 files changed

Lines changed: 186 additions & 7 deletions

File tree

integ-test/build.gradle

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -480,8 +480,8 @@ integTest {
480480

481481
dependsOn ':opensearch-sql-plugin:bundlePlugin'
482482
if(!ignorePrometheus && getOSFamilyType() != "windows") {
483-
dependsOn startPrometheus
484-
finalizedBy stopPrometheus
483+
// dependsOn startPrometheus
484+
// finalizedBy stopPrometheus
485485
}
486486

487487
// enable calcite codegen in IT

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ACCOUNT;
99
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
1010
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
11+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DEEP_NESTED;
1112
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS;
1213
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE;
1314
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS;
@@ -24,6 +25,7 @@
2425
import org.junit.Ignore;
2526
import org.junit.Test;
2627
import org.opensearch.sql.common.setting.Settings;
28+
import org.opensearch.sql.common.utils.StringUtils;
2729
import org.opensearch.sql.ppl.ExplainIT;
2830

2931
public class CalciteExplainIT extends ExplainIT {
@@ -42,6 +44,7 @@ public void init() throws Exception {
4244
loadIndex(Index.WORKER);
4345
loadIndex(Index.WORK_INFORMATION);
4446
loadIndex(Index.WEBLOG);
47+
loadIndex(Index.DEEP_NESTED);
4548
}
4649

4750
@Override
@@ -1778,4 +1781,15 @@ public void testInternalItemAccessOnStructs() throws IOException {
17781781
+ " info.dummy_sub_field",
17791782
TEST_INDEX_WEBLOGS)));
17801783
}
1784+
1785+
@Test
1786+
public void testFilterOnNestedFields() throws IOException {
1787+
assertYamlEqualsIgnoreId(
1788+
loadExpectedPlan("filter_on_nested.yaml"),
1789+
explainQueryYaml(
1790+
StringUtils.format(
1791+
"source=%s | eval proj_name_len=length(projects.name) | fields projects.name,"
1792+
+ " proj_name_len | where proj_name_len > 29",
1793+
TEST_INDEX_DEEP_NESTED)));
1794+
}
17811795
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalFilter(condition=[>($1, 29)])
5+
LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
7+
physical: |
8+
EnumerableCalc(expr#0=[{inputs}], expr#1=[CHAR_LENGTH($t0)], proj#0..1=[{exprs}])
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[projects.name], SCRIPT->>(CHAR_LENGTH($0), 29), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["projects.name",29]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["projects.name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalFilter(condition=[>($1, 29)])
5+
LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
7+
physical: |
8+
EnumerableLimit(fetch=[10000])
9+
EnumerableCalc(expr#0..15=[{inputs}], expr#16=[CHAR_LENGTH($t3)], expr#17=[29], expr#18=[>($t16, $t17)], projects.name=[$t3], proj_name_len=[$t16], $condition=[$t18])
10+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: test_nested_eval_filter
5+
body:
6+
mappings:
7+
properties:
8+
"id":
9+
type: keyword
10+
"items":
11+
type: nested
12+
properties:
13+
"name":
14+
type: keyword
15+
- do:
16+
bulk:
17+
index: test_nested_eval_filter
18+
refresh: true
19+
body:
20+
- '{"index": {"_id": "1"}}'
21+
- '{"id": "order1", "items": [{"name": "apple"}]}'
22+
- '{"index": {"_id": "2"}}'
23+
- '{"id": "order2", "items": [{"name": "banana"}]}'
24+
- '{"index": {"_id": "3"}}'
25+
- '{"id": "order3", "items": [{"name": "orange"}]}'
26+
27+
---
28+
"eval on nested field without filter":
29+
- skip:
30+
features:
31+
- headers
32+
- do:
33+
headers:
34+
Content-Type: 'application/json'
35+
ppl:
36+
body:
37+
query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen
38+
39+
- match: { total: 3 }
40+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
41+
- match: {"datarows": [["order1", "apple", 5], ["order2", "banana", 6], ["order3", "orange", 6]]}
42+
43+
---
44+
"eval on nested field with filter on computed field":
45+
- skip:
46+
features:
47+
- headers
48+
- do:
49+
headers:
50+
Content-Type: 'application/json'
51+
ppl:
52+
body:
53+
query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen | where NameLen > 5
54+
55+
- match: { total: 2 }
56+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
57+
- match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]}
58+
59+
---
60+
"comparison with regular field - eval and filter works correctly":
61+
- skip:
62+
features:
63+
- headers
64+
- do:
65+
indices.create:
66+
index: test_regular_eval_filter
67+
body:
68+
mappings:
69+
properties:
70+
"id":
71+
type: keyword
72+
"name":
73+
type: keyword
74+
- do:
75+
bulk:
76+
index: test_regular_eval_filter
77+
refresh: true
78+
body:
79+
- '{"index": {"_id": "1"}}'
80+
- '{"id": "order1", "name": "apple"}'
81+
- '{"index": {"_id": "2"}}'
82+
- '{"id": "order2", "name": "banana"}'
83+
- '{"index": {"_id": "3"}}'
84+
- '{"id": "order3", "name": "orange"}'
85+
- do:
86+
headers:
87+
Content-Type: 'application/json'
88+
ppl:
89+
body:
90+
query: source=test_regular_eval_filter | eval NameLen=LENGTH(name) | fields id, name, NameLen | where NameLen > 5
91+
92+
- match: { total: 2 }
93+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
94+
- match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 56 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,16 +77,19 @@
7777
import org.apache.calcite.util.NlsString;
7878
import org.apache.calcite.util.RangeSets;
7979
import org.apache.calcite.util.Sarg;
80+
import org.apache.lucene.search.join.ScoreMode;
8081
import org.opensearch.index.mapper.DateFieldMapper;
8182
import org.opensearch.index.query.BoolQueryBuilder;
8283
import org.opensearch.index.query.QueryBuilder;
84+
import org.opensearch.index.query.QueryBuilders;
8385
import org.opensearch.index.query.RangeQueryBuilder;
8486
import org.opensearch.index.query.ScriptQueryBuilder;
8587
import org.opensearch.script.Script;
8688
import org.opensearch.sql.calcite.plan.OpenSearchConstants;
8789
import org.opensearch.sql.calcite.type.ExprIPType;
8890
import org.opensearch.sql.calcite.type.ExprSqlType;
8991
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT;
92+
import org.opensearch.sql.calcite.utils.PlanUtils;
9093
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
9194
import org.opensearch.sql.data.model.ExprIpValue;
9295
import org.opensearch.sql.data.model.ExprTimestampValue;
@@ -1450,6 +1453,8 @@ public static class ScriptQueryExpression extends QueryExpression {
14501453
private final Supplier<String> codeGenerator;
14511454
private String generatedCode;
14521455
private final ScriptParameterHelper parameterHelper;
1456+
private final Map<String, ExprType> fieldTypes;
1457+
private final List<String> referredFields;
14531458

14541459
public ScriptQueryExpression(
14551460
RexNode rexNode,
@@ -1471,6 +1476,12 @@ public ScriptQueryExpression(
14711476
() ->
14721477
SerializationWrapper.wrapWithLangType(
14731478
ScriptEngineType.CALCITE, serializer.serialize(rexNode, parameterHelper));
1479+
this.referredFields =
1480+
PlanUtils.getInputRefs(rexNode).stream()
1481+
.map(RexInputRef::getIndex)
1482+
.map(rowType.getFieldNames()::get)
1483+
.toList();
1484+
this.fieldTypes = fieldTypes;
14741485
}
14751486

14761487
// For filter script, this method will be called after planning phase;
@@ -1485,7 +1496,12 @@ private String getOrCreateGeneratedCode() {
14851496

14861497
@Override
14871498
public QueryBuilder builder() {
1488-
return new ScriptQueryBuilder(getScript());
1499+
ScriptQueryBuilder scriptQuery = QueryBuilders.scriptQuery(getScript());
1500+
String nestedPath = findNestedPath(fieldTypes);
1501+
if (nestedPath != null) {
1502+
return QueryBuilders.nestedQuery(nestedPath, scriptQuery, ScoreMode.None);
1503+
}
1504+
return scriptQuery;
14891505
}
14901506

14911507
public Script getScript() {
@@ -1511,6 +1527,45 @@ public void updateAnalyzedNodes(RexNode rexNode) {
15111527
public List<RexNode> getUnAnalyzableNodes() {
15121528
return List.of();
15131529
}
1530+
1531+
/**
1532+
* Find the nested path for fields referenced in the expression. If multiple nested paths exist,
1533+
* returns the top one.
1534+
*
1535+
* @param fieldTypes Map of field names to their types
1536+
* @return The nested path, or null if no nested fields are found
1537+
*/
1538+
private String findNestedPath(Map<String, ExprType> fieldTypes) {
1539+
if (fieldTypes == null || fieldTypes.isEmpty()) {
1540+
return null;
1541+
}
1542+
1543+
for (String fieldName : referredFields) {
1544+
// Check if the field is part of a nested structure
1545+
// For a field like "items.name", we need to check if "items" is nested
1546+
if (fieldName.contains(".")) {
1547+
String[] parts = fieldName.split("\\.");
1548+
StringBuilder pathBuilder = new StringBuilder();
1549+
1550+
// Build up the path progressively and check if any parent is nested
1551+
for (int i = 0; i < parts.length - 1; i++) {
1552+
if (i > 0) {
1553+
pathBuilder.append(".");
1554+
}
1555+
pathBuilder.append(parts[i]);
1556+
String currentPath = pathBuilder.toString();
1557+
1558+
// Check if this path exists in fieldTypes and is nested
1559+
ExprType pathType = fieldTypes.get(currentPath);
1560+
// OpenSearchDataType.Nested is mapped to ExprCoreType.ARRAY
1561+
if (pathType == ExprCoreType.ARRAY) {
1562+
return currentPath;
1563+
}
1564+
}
1565+
}
1566+
}
1567+
return null;
1568+
}
15141569
}
15151570

15161571
/**

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,7 @@ public AbstractRelNode pushDownFilter(Filter filter) {
153153
try {
154154
RelDataType rowType = this.getRowType();
155155
List<String> schema = this.getRowType().getFieldNames();
156-
Map<String, ExprType> fieldTypes =
157-
this.osIndex.getAllFieldTypes().entrySet().stream()
158-
.filter(entry -> schema.contains(entry.getKey()))
159-
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
156+
Map<String, ExprType> fieldTypes = this.osIndex.getAllFieldTypes();
160157
QueryExpression queryExpression =
161158
PredicateAnalyzer.analyzeExpression(
162159
filter.getCondition(), schema, fieldTypes, rowType, getCluster());

0 commit comments

Comments
 (0)