Skip to content

Commit 8b3e8d2

Browse files
committed
Support dedup with expression
Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent b2f64f0 commit 8b3e8d2

35 files changed

Lines changed: 356 additions & 254 deletions

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2101,8 +2101,8 @@ public void testDedupRename() throws IOException {
21012101
@Test
21022102
public void testRenameDedupThenSortExpr() throws IOException {
21032103
enabledOnlyWhenPushdownIsEnabled();
2104-
String expected = loadExpectedPlan("explain_dedup_expr_complex.yaml");
2105-
String alternative = loadExpectedPlan("explain_dedup_expr_complex_alternative.yaml");
2104+
String expected = loadExpectedPlan("explain_dedup_expr_complex1.yaml");
2105+
String alternative = loadExpectedPlan("explain_dedup_expr_complex1_alternative.yaml");
21062106
assertYamlEqualsIgnoreId(
21072107
expected,
21082108
alternative,
@@ -2111,6 +2111,53 @@ public void testRenameDedupThenSortExpr() throws IOException {
21112111
+ " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename"
21122112
+ " tmp_gender as new_gender | rename tmp_state as new_state | sort new_gender,"
21132113
+ " -new_state | dedup 2 new_gender, new_state"));
2114+
expected = loadExpectedPlan("explain_dedup_expr_complex2.yaml");
2115+
alternative = loadExpectedPlan("explain_dedup_expr_complex2_alternative.yaml");
2116+
assertYamlEqualsIgnoreId(
2117+
expected,
2118+
alternative,
2119+
explainQueryYaml(
2120+
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
2121+
+ " eval tmp_gender = lower(gender) | eval tmp_state = lower(state) | rename"
2122+
+ " tmp_gender as new_gender | rename tmp_state as new_state | dedup 2 new_gender,"
2123+
+ " new_state | sort new_gender, -new_state"));
2124+
}
2125+
2126+
@Test
2127+
public void testDedupWithExpr() throws IOException {
2128+
enabledOnlyWhenPushdownIsEnabled();
2129+
String expected = loadExpectedPlan("explain_dedup_with_expr1.yaml");
2130+
assertYamlEqualsIgnoreId(
2131+
expected,
2132+
explainQueryYaml(
2133+
"source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | dedup 1"
2134+
+ " age"));
2135+
expected = loadExpectedPlan("explain_dedup_with_expr2.yaml");
2136+
String alternative = loadExpectedPlan("explain_dedup_with_expr2_alternative.yaml");
2137+
assertYamlEqualsIgnoreId(
2138+
expected,
2139+
alternative,
2140+
explainQueryYaml(
2141+
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
2142+
+ " eval new_gender = lower(gender), new_state = lower(state) | dedup 1 age,"
2143+
+ " new_state"));
2144+
expected = loadExpectedPlan("explain_dedup_with_expr3.yaml");
2145+
alternative = loadExpectedPlan("explain_dedup_with_expr3_alternative.yaml");
2146+
assertYamlEqualsIgnoreId(
2147+
expected,
2148+
alternative,
2149+
explainQueryYaml(
2150+
"source=opensearch-sql_test_index_account | eval new_gender = lower(gender) | eval"
2151+
+ " new_state = lower(state) | dedup 2 age, account_number"));
2152+
expected = loadExpectedPlan("explain_dedup_with_expr4.yaml");
2153+
alternative = loadExpectedPlan("explain_dedup_with_expr4_alternative.yaml");
2154+
assertYamlEqualsIgnoreId(
2155+
expected,
2156+
alternative,
2157+
explainQueryYaml(
2158+
"source=opensearch-sql_test_index_account | fields account_number, gender, age, state |"
2159+
+ " eval new_gender = lower(gender) | eval new_state = lower(state) | sort gender,"
2160+
+ " -state | dedup 2 gender, state"));
21142161
}
21152162

21162163
@Test

integ-test/src/test/resources/expectedOutput/calcite/explain_complex_sort_expr_pushdown_for_smj_w_max_option.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,4 +18,4 @@ calcite:
1818
EnumerableMergeJoin(condition=[=($0, $7)], joinType=[left])
1919
EnumerableCalc(expr#0=[{inputs}], expr#1=['(?<lastname>^[A-Z])'], expr#2=['lastname'], expr#3=[REX_EXTRACT($t0, $t1, $t2)], $f0=[$t3])
2020
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[lastname], LIMIT->10000, SORT_EXPR->[REX_EXTRACT($0, '(?<lastname>^[A-Z])', 'lastname') ASCENDING NULLS_LAST]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":10000,"timeout":"1m","_source":{"includes":["lastname"],"excludes":[]},"sort":[{"_script":{"script":{"source":"{\"langType\":\"calcite\",\"script\":\"rO0ABXQC63sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRVhfRVhUUkFDVCIsCiAgICAia2luZCI6ICJPVEhFUl9GVU5DVElPTiIsCiAgICAic3ludGF4IjogIkZVTkNUSU9OIgogIH0sCiAgIm9wZXJhbmRzIjogWwogICAgewogICAgICAiZHluYW1pY1BhcmFtIjogMCwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgICAicHJlY2lzaW9uIjogLTEKICAgICAgfQogICAgfSwKICAgIHsKICAgICAgImR5bmFtaWNQYXJhbSI6IDEsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICAgInByZWNpc2lvbiI6IC0xCiAgICAgIH0KICAgIH0sCiAgICB7CiAgICAgICJkeW5hbWljUGFyYW0iOiAyLAogICAgICAidHlwZSI6IHsKICAgICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAgICJwcmVjaXNpb24iOiAtMQogICAgICB9CiAgICB9CiAgXSwKICAiY2xhc3MiOiAib3JnLm9wZW5zZWFyY2guc3FsLmV4cHJlc3Npb24uZnVuY3Rpb24uVXNlckRlZmluZWRGdW5jdGlvbkJ1aWxkZXIkMSIsCiAgInR5cGUiOiB7CiAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAicHJlY2lzaW9uIjogMjAwMAogIH0sCiAgImRldGVybWluaXN0aWMiOiB0cnVlLAogICJkeW5hbWljIjogZmFsc2UKfQ==\"}","lang":"opensearch_compounded_script","params":{"MISSING_MAX":true,"utcTimestamp": 0,"SOURCES":[0,2,2],"DIGESTS":["lastname","(?<lastname>^[A-Z])","lastname"]}},"type":"string","order":"asc"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
21-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[PROJECT->[account_number, firstname, address, birthdate, gender, city, lastname, balance, employer, state, age, email, male], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","birthdate","gender","city","lastname","balance","employer","state","age","email","male"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["lastname","account_number","firstname","address","birthdate","gender","city","balance","employer","state","age","email","male"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)])
21+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_bank]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->50000, SORT->[6]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"lastname":{"terms":{"field":"lastname","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["lastname","account_number","firstname","address","birthdate","gender","city","balance","employer","state","age","email","male","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=50000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex1.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ calcite:
77
LogicalFilter(condition=[IS NOT NULL($4)])
88
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
99
physical: |
10-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
10+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f1":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","account_number","firstname","address","balance","city","employer","state","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex2.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ calcite:
88
LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7])
99
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
1010
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
11+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(1)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":1,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex3.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,4 +7,4 @@ calcite:
77
LogicalFilter(condition=[AND(IS NOT NULL($4), IS NOT NULL($7))])
88
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
99
physical: |
10-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
10+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

integ-test/src/test/resources/expectedOutput/calcite/explain_dedup_complex4.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,4 @@ calcite:
88
LogicalProject(account_number=[$0], gender=[$4], age=[$8], state=[$7])
99
CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])
1010
physical: |
11-
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, gender, age, state], AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","_source":{"includes":["account_number","gender","age","state"],"excludes":[]},"aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","age"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])
11+
CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[AGGREGATION->rel#:LogicalAggregate.NONE.[](input=LogicalProject#,group={0, 1},agg#0=LITERAL_AGG(2)), LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={"from":0,"size":0,"timeout":"1m","aggregations":{"composite_buckets":{"composite":{"size":1000,"sources":[{"gender":{"terms":{"field":"gender.keyword","missing_bucket":false,"order":"asc"}}},{"state":{"terms":{"field":"state.keyword","missing_bucket":false,"order":"asc"}}}]},"aggregations":{"$f2":{"top_hits":{"from":0,"size":2,"version":false,"seq_no_primary_term":false,"explain":false,"_source":{"includes":["gender","state","account_number","firstname","address","balance","city","employer","age","email","lastname","_id","_index","_score","_maxscore","_sort","_routing"],"excludes":[]},"script_fields":{}}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])

0 commit comments

Comments
 (0)