Skip to content

Commit 01c6b1e

Browse files
committed
lewSupport basic nested queries when applying filter on a nested filed
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com> # Conflicts: # integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java # opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java
1 parent 5dbcd28 commit 01c6b1e

6 files changed

Lines changed: 184 additions & 4 deletions

File tree

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_ALIAS;
1010
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK;
1111
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
12+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DEEP_NESTED;
1213
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_LOGS;
1314
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_NESTED_SIMPLE;
1415
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_OTEL_LOGS;
@@ -27,6 +28,8 @@
2728
import org.junit.Test;
2829
import org.opensearch.sql.ast.statement.ExplainMode;
2930
import org.opensearch.sql.common.setting.Settings.Key;
31+
import org.opensearch.sql.common.setting.Settings;
32+
import org.opensearch.sql.common.utils.StringUtils;
3033
import org.opensearch.sql.ppl.ExplainIT;
3134
import org.opensearch.sql.protocol.response.format.Format;
3235

@@ -47,6 +50,7 @@ public void init() throws Exception {
4750
loadIndex(Index.WORK_INFORMATION);
4851
loadIndex(Index.WEBLOG);
4952
loadIndex(Index.DATA_TYPE_ALIAS);
53+
loadIndex(Index.DEEP_NESTED);
5054
}
5155

5256
@Override
@@ -2368,4 +2372,15 @@ public void testExplainBWC() throws IOException {
23682372
explainQueryToStringBWC(query, format));
23692373
}
23702374
}
2375+
2376+
@Test
2377+
public void testFilterOnNestedFields() throws IOException {
2378+
assertYamlEqualsIgnoreId(
2379+
loadExpectedPlan("filter_on_nested.yaml"),
2380+
explainQueryYaml(
2381+
StringUtils.format(
2382+
"source=%s | eval proj_name_len=length(projects.name) | fields projects.name,"
2383+
+ " proj_name_len | where proj_name_len > 29",
2384+
TEST_INDEX_DEEP_NESTED)));
2385+
}
23712386
}
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalFilter(condition=[>($1, 29)])
5+
LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
7+
physical: |
8+
EnumerableCalc(expr#0=[{inputs}], expr#1=[CHAR_LENGTH($t0)], proj#0..1=[{exprs}])
9+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]], PushDownContext=[[PROJECT->[projects.name], SCRIPT->>(CHAR_LENGTH($0), 29), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","query":{"nested":{"query":{"script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQCIXsKICAib3AiOiB7CiAgICAibmFtZSI6ICI+IiwKICAgICJraW5kIjogIkdSRUFURVJfVEhBTiIsCiAgICAic3ludGF4IjogIkJJTkFSWSIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgIm9wIjogewogICAgICAgICJuYW1lIjogIkNIQVJfTEVOR1RIIiwKICAgICAgICAia2luZCI6ICJDSEFSX0xFTkdUSCIsCiAgICAgICAgInN5bnRheCI6ICJGVU5DVElPTiIKICAgICAgfSwKICAgICAgIm9wZXJhbmRzIjogWwogICAgICAgIHsKICAgICAgICAgICJkeW5hbWljUGFyYW0iOiAwLAogICAgICAgICAgInR5cGUiOiB7CiAgICAgICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgICAgIH0KICAgICAgICB9CiAgICAgIF0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAxLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJJTlRFR0VSIiwKICAgICAgICAibnVsbGFibGUiOiBmYWxzZQogICAgICB9CiAgICB9CiAgXQp9\"}","lang":"opensearch_compounded_script","params":{"utcTimestamp": 0,"SOURCES":[0,2],"DIGESTS":["projects.name",29]}},"boost":1.0}},"path":"projects","ignore_unmapped":false,"score_mode":"none","boost":1.0}},"_source":{"includes":["projects.name"],"excludes":[]}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
calcite:
2+
logical: |
3+
LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])
4+
LogicalFilter(condition=[>($1, 29)])
5+
LogicalProject(projects.name=[$3], proj_name_len=[CHAR_LENGTH($3)])
6+
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
7+
physical: |
8+
EnumerableLimit(fetch=[10000])
9+
EnumerableCalc(expr#0..15=[{inputs}], expr#16=[CHAR_LENGTH($t3)], expr#17=[29], expr#18=[>($t16, $t17)], projects.name=[$t3], proj_name_len=[$t16], $condition=[$t18])
10+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_deep_nested]])
Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
setup:
2+
- do:
3+
indices.create:
4+
index: test_nested_eval_filter
5+
body:
6+
mappings:
7+
properties:
8+
"id":
9+
type: keyword
10+
"items":
11+
type: nested
12+
properties:
13+
"name":
14+
type: keyword
15+
- do:
16+
bulk:
17+
index: test_nested_eval_filter
18+
refresh: true
19+
body:
20+
- '{"index": {"_id": "1"}}'
21+
- '{"id": "order1", "items": [{"name": "apple"}]}'
22+
- '{"index": {"_id": "2"}}'
23+
- '{"id": "order2", "items": [{"name": "banana"}]}'
24+
- '{"index": {"_id": "3"}}'
25+
- '{"id": "order3", "items": [{"name": "orange"}]}'
26+
27+
---
28+
"eval on nested field without filter":
29+
- skip:
30+
features:
31+
- headers
32+
- do:
33+
headers:
34+
Content-Type: 'application/json'
35+
ppl:
36+
body:
37+
query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen
38+
39+
- match: { total: 3 }
40+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
41+
- match: {"datarows": [["order1", "apple", 5], ["order2", "banana", 6], ["order3", "orange", 6]]}
42+
43+
---
44+
"eval on nested field with filter on computed field":
45+
- skip:
46+
features:
47+
- headers
48+
- do:
49+
headers:
50+
Content-Type: 'application/json'
51+
ppl:
52+
body:
53+
query: source=test_nested_eval_filter | eval NameLen=LENGTH(items.name) | fields id, items.name, NameLen | where NameLen > 5
54+
55+
- match: { total: 2 }
56+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "items.name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
57+
- match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]}
58+
59+
---
60+
"comparison with regular field - eval and filter works correctly":
61+
- skip:
62+
features:
63+
- headers
64+
- do:
65+
indices.create:
66+
index: test_regular_eval_filter
67+
body:
68+
mappings:
69+
properties:
70+
"id":
71+
type: keyword
72+
"name":
73+
type: keyword
74+
- do:
75+
bulk:
76+
index: test_regular_eval_filter
77+
refresh: true
78+
body:
79+
- '{"index": {"_id": "1"}}'
80+
- '{"id": "order1", "name": "apple"}'
81+
- '{"index": {"_id": "2"}}'
82+
- '{"id": "order2", "name": "banana"}'
83+
- '{"index": {"_id": "3"}}'
84+
- '{"id": "order3", "name": "orange"}'
85+
- do:
86+
headers:
87+
Content-Type: 'application/json'
88+
ppl:
89+
body:
90+
query: source=test_regular_eval_filter | eval NameLen=LENGTH(name) | fields id, name, NameLen | where NameLen > 5
91+
92+
- match: { total: 2 }
93+
- match: {"schema": [{"name": "id", "type": "string"}, {"name": "name", "type": "string"}, {"name": "NameLen", "type": "int"}]}
94+
- match: {"datarows": [["order2", "banana", 6], ["order3", "orange", 6]]}

opensearch/src/main/java/org/opensearch/sql/opensearch/request/PredicateAnalyzer.java

Lines changed: 55 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -84,13 +84,15 @@
8484
import org.opensearch.index.mapper.DateFieldMapper;
8585
import org.opensearch.index.query.BoolQueryBuilder;
8686
import org.opensearch.index.query.QueryBuilder;
87+
import org.opensearch.index.query.QueryBuilders;
8788
import org.opensearch.index.query.RangeQueryBuilder;
8889
import org.opensearch.index.query.ScriptQueryBuilder;
8990
import org.opensearch.script.Script;
9091
import org.opensearch.sql.calcite.plan.OpenSearchConstants;
9192
import org.opensearch.sql.calcite.type.ExprIPType;
9293
import org.opensearch.sql.calcite.type.ExprSqlType;
9394
import org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.ExprUDT;
95+
import org.opensearch.sql.calcite.utils.PlanUtils;
9496
import org.opensearch.sql.calcite.utils.UserDefinedFunctionUtils;
9597
import org.opensearch.sql.data.model.ExprIpValue;
9698
import org.opensearch.sql.data.model.ExprTimestampValue;
@@ -1464,6 +1466,8 @@ public static class ScriptQueryExpression extends QueryExpression {
14641466
private final Supplier<String> codeGenerator;
14651467
private String generatedCode;
14661468
private final ScriptParameterHelper parameterHelper;
1469+
private final Map<String, ExprType> fieldTypes;
1470+
private final List<String> referredFields;
14671471

14681472
public ScriptQueryExpression(
14691473
RexNode rexNode,
@@ -1487,6 +1491,12 @@ public ScriptQueryExpression(
14871491
() ->
14881492
SerializationWrapper.wrapWithLangType(
14891493
ScriptEngineType.CALCITE, serializer.serialize(rexNode, parameterHelper));
1494+
this.referredFields =
1495+
PlanUtils.getInputRefs(rexNode).stream()
1496+
.map(RexInputRef::getIndex)
1497+
.map(rowType.getFieldNames()::get)
1498+
.toList();
1499+
this.fieldTypes = fieldTypes;
14901500
}
14911501

14921502
// For filter script, this method will be called after planning phase;
@@ -1501,7 +1511,12 @@ private String getOrCreateGeneratedCode() {
15011511

15021512
@Override
15031513
public QueryBuilder builder() {
1504-
return new ScriptQueryBuilder(getScript());
1514+
ScriptQueryBuilder scriptQuery = QueryBuilders.scriptQuery(getScript());
1515+
String nestedPath = findNestedPath(fieldTypes);
1516+
if (nestedPath != null) {
1517+
return QueryBuilders.nestedQuery(nestedPath, scriptQuery, ScoreMode.None);
1518+
}
1519+
return scriptQuery;
15051520
}
15061521

15071522
public Script getScript() {
@@ -1527,6 +1542,45 @@ public void updateAnalyzedNodes(RexNode rexNode) {
15271542
public List<RexNode> getUnAnalyzableNodes() {
15281543
return List.of();
15291544
}
1545+
1546+
/**
1547+
* Find the nested path for fields referenced in the expression. If multiple nested paths exist,
1548+
* returns the top one.
1549+
*
1550+
* @param fieldTypes Map of field names to their types
1551+
* @return The nested path, or null if no nested fields are found
1552+
*/
1553+
private String findNestedPath(Map<String, ExprType> fieldTypes) {
1554+
if (fieldTypes == null || fieldTypes.isEmpty()) {
1555+
return null;
1556+
}
1557+
1558+
for (String fieldName : referredFields) {
1559+
// Check if the field is part of a nested structure
1560+
// For a field like "items.name", we need to check if "items" is nested
1561+
if (fieldName.contains(".")) {
1562+
String[] parts = fieldName.split("\\.");
1563+
StringBuilder pathBuilder = new StringBuilder();
1564+
1565+
// Build up the path progressively and check if any parent is nested
1566+
for (int i = 0; i < parts.length - 1; i++) {
1567+
if (i > 0) {
1568+
pathBuilder.append(".");
1569+
}
1570+
pathBuilder.append(parts[i]);
1571+
String currentPath = pathBuilder.toString();
1572+
1573+
// Check if this path exists in fieldTypes and is nested
1574+
ExprType pathType = fieldTypes.get(currentPath);
1575+
// OpenSearchDataType.Nested is mapped to ExprCoreType.ARRAY
1576+
if (pathType == ExprCoreType.ARRAY) {
1577+
return currentPath;
1578+
}
1579+
}
1580+
}
1581+
}
1582+
return null;
1583+
}
15301584
}
15311585

15321586
/**

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,9 +142,7 @@ public AbstractRelNode pushDownFilter(Filter filter) {
142142
RelDataType rowType = this.getRowType();
143143
List<String> schema = buildSchema();
144144
Map<String, ExprType> fieldTypes =
145-
this.osIndex.getAllFieldTypes().entrySet().stream()
146-
.filter(entry -> schema.contains(entry.getKey()))
147-
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
145+
this.osIndex.getAllFieldTypes();
148146
QueryExpression queryExpression =
149147
PredicateAnalyzer.analyzeExpression(
150148
filter.getCondition(), schema, fieldTypes, rowType, getCluster());

0 commit comments

Comments
 (0)