Skip to content

Commit 1c046a2

Browse files
authored
Add a new row count estimation mechanism for CalciteIndexScan (opensearch-project#3605)
* Add a new row count mechanism for CalciteIndexScan Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix after merging main Signed-off-by: Heng Qian <qianheng@amazon.com> * Refine code Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix IT Signed-off-by: Heng Qian <qianheng@amazon.com> * Address comments Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix IT Signed-off-by: Heng Qian <qianheng@amazon.com> * Address comments Signed-off-by: Heng Qian <qianheng@amazon.com> * Fix IT Signed-off-by: Heng Qian <qianheng@amazon.com> * Add doc in settings.rst Signed-off-by: Heng Qian <qianheng@amazon.com> * Address comments Signed-off-by: Heng Qian <qianheng@amazon.com> --------- Signed-off-by: Heng Qian <qianheng@amazon.com>
1 parent b784b87 commit 1c046a2

18 files changed

Lines changed: 154 additions & 50 deletions

File tree

common/src/main/java/org/opensearch/sql/common/setting/Settings.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ public enum Key {
3131
CALCITE_ENGINE_ENABLED("plugins.calcite.enabled"),
3232
CALCITE_FALLBACK_ALLOWED("plugins.calcite.fallback.allowed"),
3333
CALCITE_PUSHDOWN_ENABLED("plugins.calcite.pushdown.enabled"),
34+
CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR(
35+
"plugins.calcite.pushdown.rowcount.estimation.factor"),
3436

3537
/** Query Settings. */
3638
FIELD_TYPE_TOLERANCE("plugins.query.field_type_tolerance"),

docs/user/admin/settings.rst

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -757,3 +757,39 @@ This setting is present from 3.0.0-beta. You can enable Calcite as new query opt
757757

758758
Check `introduce v3 engine <../../../dev/intro-v3-engine.md>`_ for more details.
759759
Check `join doc <../../ppl/cmd/join.rst>`_ for example.
760+
761+
plugins.calcite.fallback.allowed
762+
=======================
763+
764+
Description
765+
-----------
766+
767+
This setting is present from 3.0.0-beta. If Calcite is enabled, you can use this setting to decide whether to allow fallback to v2 engine for some queries which are not supported by v3 engine.
768+
769+
1. The default value is true in 3.0.0-beta.
770+
2. This setting is node scope.
771+
3. This setting can be updated dynamically.
772+
773+
plugins.calcite.pushdown.enabled
774+
=======================
775+
776+
Description
777+
-----------
778+
779+
This setting is present from 3.0.0-beta. If Calcite is enabled, you can use this setting to decide whether to enable the operator pushdown optimization for v3 engine.
780+
781+
1. The default value is true in 3.0.0-beta.
782+
2. This setting is node scope.
783+
3. This setting can be updated dynamically.
784+
785+
plugins.calcite.pushdown.rowcount.estimation.factor
786+
=======================
787+
788+
Description
789+
-----------
790+
791+
This setting is present from 3.1.0. If Calcite pushdown optimization is enabled, this setting is used to estimate the row count of the query plan. The value is a factor to multiply the row count of the table scan to get the estimated row count.
792+
793+
1. The default value is 0.9 in 3.1.0.
794+
2. This setting is node scope.
795+
3. This setting can be updated dynamically.

integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLAggregationIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -621,12 +621,12 @@ public void testSumGroupByNullValue() throws IOException {
621621
verifySchema(response, schema("a", null, "long"), schema("age", null, "integer"));
622622
verifyDataRows(
623623
response,
624-
rows(null, null),
624+
rows(isPushdownEnabled() ? 0 : null, null),
625625
rows(32838, 28),
626626
rows(39225, 32),
627627
rows(4180, 33),
628628
rows(48086, 34),
629-
rows(null, 36));
629+
rows(isPushdownEnabled() ? 0 : null, 36));
630630
}
631631

632632
@Test

integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLExplainIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -60,8 +60,8 @@ public void testExplainCommandCost() {
6060
String result = explainQuery("explain cost source=test | where age = 20 | fields name, age");
6161
assertTrue(
6262
result.contains(
63-
"CalciteEnumerableIndexScan(table=[[OpenSearch, test]]): rowcount = 100.0, cumulative"
64-
+ " cost = {100.0 rows, 101.0 cpu, 0.0 io}"));
63+
"CalciteEnumerableIndexScan(table=[[OpenSearch, test]]): rowcount = 10000.0, cumulative"
64+
+ " cost = {10000.0 rows, 10001.0 cpu, 0.0 io}"));
6565
}
6666

6767
@Test

integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLExplainPushdownIT.java

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -25,10 +25,9 @@ public void testExplainCommand() {
2525
+ " LogicalFilter(condition=[=($1, 20)])\\n"
2626
+ " CalciteLogicalIndexScan(table=[[OpenSearch, test]])\\n"
2727
+ "\",\n"
28-
+ " \"physical\": \"EnumerableCalc(expr#0..7=[{inputs}], proj#0..1=[{exprs}])\\n"
29-
+ " CalciteEnumerableIndexScan(table=[[OpenSearch, test]],"
30-
+ " PushDownContext=[[FILTER->=($1, 20)],"
31-
+ " OpenSearchRequestBuilder(sourceBuilder={\\\"from\\\":0,\\\"timeout\\\":\\\"1m\\\",\\\"query\\\":{\\\"term\\\":{\\\"age\\\":{\\\"value\\\":20,\\\"boost\\\":1.0}}},\\\"sort\\\":[{\\\"_doc\\\":{\\\"order\\\":\\\"asc\\\"}}]},"
28+
+ " \"physical\": \"CalciteEnumerableIndexScan(table=[[OpenSearch, test]],"
29+
+ " PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20)],"
30+
+ " OpenSearchRequestBuilder(sourceBuilder={\\\"from\\\":0,\\\"timeout\\\":\\\"1m\\\",\\\"query\\\":{\\\"term\\\":{\\\"age\\\":{\\\"value\\\":20,\\\"boost\\\":1.0}}},\\\"_source\\\":{\\\"includes\\\":[\\\"name\\\",\\\"age\\\"],\\\"excludes\\\":[]},\\\"sort\\\":[{\\\"_doc\\\":{\\\"order\\\":\\\"asc\\\"}}]},"
3231
+ " requestedTotalSize=200, pageSize=null, startFrom=0)])\\n"
3332
+ "\"\n"
3433
+ " }\n"
@@ -40,12 +39,24 @@ public void testExplainCommand() {
4039
@Test
4140
public void testExplainCommandCost() {
4241
String result = explainQuery("explain cost source=test | where age = 20 | fields name, age");
43-
assertTrue(
44-
result.contains(
45-
"CalciteEnumerableIndexScan(table=[[OpenSearch, test]], PushDownContext=[[FILTER->=($1,"
46-
+ " 20)],"
47-
+ " OpenSearchRequestBuilder(sourceBuilder={\\\"from\\\":0,\\\"timeout\\\":\\\"1m\\\",\\\"query\\\":{\\\"term\\\":{\\\"age\\\":{\\\"value\\\":20,\\\"boost\\\":1.0}}},\\\"sort\\\":[{\\\"_doc\\\":{\\\"order\\\":\\\"asc\\\"}}]},"
48-
+ " requestedTotalSize=200, pageSize=null, startFrom=0)]): rowcount = 100.0,"
49-
+ " cumulative cost = {100.0 rows, 101.0 cpu, 0.0 io}"));
42+
String expected =
43+
"{\n"
44+
+ " \"calcite\": {\n"
45+
+ " \"logical\": \"LogicalProject(name=[$0], age=[$1]): rowcount = 1500.0,"
46+
+ " cumulative cost = {13000.0 rows, 23001.0 cpu, 0.0 io}, id = *\\n"
47+
+ " LogicalFilter(condition=[=($1, 20)]): rowcount = 1500.0, cumulative cost ="
48+
+ " {11500.0 rows, 20001.0 cpu, 0.0 io}, id = *\\n"
49+
+ " CalciteLogicalIndexScan(table=[[OpenSearch, test]]): rowcount = 10000.0,"
50+
+ " cumulative cost = {10000.0 rows, 10001.0 cpu, 0.0 io}, id = *\\n"
51+
+ "\",\n"
52+
+ " \"physical\": \"CalciteEnumerableIndexScan(table=[[OpenSearch, test]],"
53+
+ " PushDownContext=[[PROJECT->[name, age], FILTER->=($1, 20)],"
54+
+ " OpenSearchRequestBuilder(sourceBuilder={\\\"from\\\":0,\\\"timeout\\\":\\\"1m\\\",\\\"query\\\":{\\\"term\\\":{\\\"age\\\":{\\\"value\\\":20,\\\"boost\\\":1.0}}},\\\"_source\\\":{\\\"includes\\\":[\\\"name\\\",\\\"age\\\"],\\\"excludes\\\":[]},\\\"sort\\\":[{\\\"_doc\\\":{\\\"order\\\":\\\"asc\\\"}}]},"
55+
+ " requestedTotalSize=200, pageSize=null, startFrom=0)]): rowcount = 1215.0,"
56+
+ " cumulative cost = {1215.0 rows, 1216.0 cpu, 0.0 io}, id = *\\n"
57+
+ "\"\n"
58+
+ " }\n"
59+
+ "}";
60+
assertEquals(expected, result.replaceAll("id = \\d+", "id = *"));
5061
}
5162
}

integ-test/src/test/java/org/opensearch/sql/calcite/standalone/CalcitePPLIntegTestCase.java

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -114,6 +114,7 @@ private Settings defaultSettings() {
114114
.put(Key.CALCITE_ENGINE_ENABLED, true)
115115
.put(Key.CALCITE_FALLBACK_ALLOWED, false)
116116
.put(Key.CALCITE_PUSHDOWN_ENABLED, false)
117+
.put(Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR, 0.9)
117118
.put(Key.DEFAULT_PATTERN_METHOD, "SIMPLE_PATTERN")
118119
.build();
119120

@@ -139,6 +140,7 @@ protected Settings enablePushdown() {
139140
.put(Key.CALCITE_ENGINE_ENABLED, true)
140141
.put(Key.CALCITE_FALLBACK_ALLOWED, false)
141142
.put(Key.CALCITE_PUSHDOWN_ENABLED, true)
143+
.put(Key.CALCITE_PUSHDOWN_ROWCOUNT_ESTIMATION_FACTOR, 0.9)
142144
.put(Key.DEFAULT_PATTERN_METHOD, "SIMPLE_PATTERN")
143145
.build();
144146

integ-test/src/test/java/org/opensearch/sql/ppl/ExplainIT.java

Lines changed: 11 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
package org.opensearch.sql.ppl;
77

88
import static org.hamcrest.Matchers.containsString;
9-
import static org.opensearch.sql.util.MatcherUtils.assertJsonEquals;
9+
import static org.opensearch.sql.util.MatcherUtils.assertJsonEqualsIgnoreRelId;
1010

1111
import com.google.common.io.Resources;
1212
import java.io.IOException;
@@ -31,7 +31,7 @@ public void testExplain() throws Exception {
3131
isCalciteEnabled()
3232
? loadFromFile("expectedOutput/calcite/explain_output.json")
3333
: loadFromFile("expectedOutput/ppl/explain_output.json");
34-
assertJsonEquals(
34+
assertJsonEqualsIgnoreRelId(
3535
expected,
3636
explainQueryToString(
3737
"source=opensearch-sql_test_index_account"
@@ -51,7 +51,7 @@ public void testFilterPushDownExplain() throws Exception {
5151
? loadFromFile("expectedOutput/calcite/explain_filter_push.json")
5252
: loadFromFile("expectedOutput/ppl/explain_filter_push.json");
5353

54-
assertJsonEquals(
54+
assertJsonEqualsIgnoreRelId(
5555
expected,
5656
explainQueryToString(
5757
"source=opensearch-sql_test_index_account"
@@ -63,13 +63,12 @@ public void testFilterPushDownExplain() throws Exception {
6363

6464
@Test
6565
public void testFilterAndAggPushDownExplain() throws Exception {
66-
// TODO check why the agg pushdown doesn't work in calcite
6766
String expected =
6867
isCalciteEnabled()
6968
? loadFromFile("expectedOutput/calcite/explain_filter_agg_push.json")
7069
: loadFromFile("expectedOutput/ppl/explain_filter_agg_push.json");
7170

72-
assertJsonEquals(
71+
assertJsonEqualsIgnoreRelId(
7372
expected,
7473
explainQueryToString(
7574
"source=opensearch-sql_test_index_account"
@@ -85,7 +84,7 @@ public void testSortPushDownExplain() throws Exception {
8584
? loadFromFile("expectedOutput/calcite/explain_sort_push.json")
8685
: loadFromFile("expectedOutput/ppl/explain_sort_push.json");
8786

88-
assertJsonEquals(
87+
assertJsonEqualsIgnoreRelId(
8988
expected,
9089
explainQueryToString(
9190
"source=opensearch-sql_test_index_account"
@@ -102,7 +101,7 @@ public void testLimitPushDownExplain() throws Exception {
102101
? loadFromFile("expectedOutput/calcite/explain_limit_push.json")
103102
: loadFromFile("expectedOutput/ppl/explain_limit_push.json");
104103

105-
assertJsonEquals(
104+
assertJsonEqualsIgnoreRelId(
106105
expected,
107106
explainQueryToString(
108107
"source=opensearch-sql_test_index_account"
@@ -115,7 +114,7 @@ public void testLimitPushDownExplain() throws Exception {
115114
public void testFillNullPushDownExplain() throws Exception {
116115
String expected = loadFromFile("expectedOutput/ppl/explain_fillnull_push.json");
117116

118-
assertJsonEquals(
117+
assertJsonEqualsIgnoreRelId(
119118
expected,
120119
explainQueryToString(
121120
"source=opensearch-sql_test_index_account"
@@ -126,7 +125,7 @@ public void testFillNullPushDownExplain() throws Exception {
126125
public void testTrendlinePushDownExplain() throws Exception {
127126
String expected = loadFromFile("expectedOutput/ppl/explain_trendline_push.json");
128127

129-
assertJsonEquals(
128+
assertJsonEqualsIgnoreRelId(
130129
expected,
131130
explainQueryToString(
132131
"source=opensearch-sql_test_index_account"
@@ -139,7 +138,7 @@ public void testTrendlinePushDownExplain() throws Exception {
139138
public void testTrendlineWithSortPushDownExplain() throws Exception {
140139
String expected = loadFromFile("expectedOutput/ppl/explain_trendline_sort_push.json");
141140

142-
assertJsonEquals(
141+
assertJsonEqualsIgnoreRelId(
143142
expected,
144143
explainQueryToString(
145144
"source=opensearch-sql_test_index_account"
@@ -173,7 +172,7 @@ public void testPatternsWithoutAggExplain() throws Exception {
173172
? loadFromFile("expectedOutput/calcite/explain_patterns.json")
174173
: loadFromFile("expectedOutput/ppl/explain_patterns.json");
175174

176-
assertJsonEquals(
175+
assertJsonEqualsIgnoreRelId(
177176
expected,
178177
explainQueryToString("source=opensearch-sql_test_index_account | patterns email"));
179178
}
@@ -186,7 +185,7 @@ public void testPatternsWithAggPushDownExplain() throws Exception {
186185
? loadFromFile("expectedOutput/calcite/explain_patterns_agg_push.json")
187186
: loadFromFile("expectedOutput/ppl/explain_patterns_agg_push.json");
188187

189-
assertJsonEquals(
188+
assertJsonEqualsIgnoreRelId(
190189
expected,
191190
explainQueryToString(
192191
"source=opensearch-sql_test_index_account"

integ-test/src/test/java/org/opensearch/sql/util/MatcherUtils.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -394,4 +394,13 @@ public static Matcher<String> equalToIgnoreCaseAndWhiteSpace(String expectedStri
394394
public static void assertJsonEquals(String expected, String actual) {
395395
assertEquals(JsonParser.parseString(expected), JsonParser.parseString(actual));
396396
}
397+
398+
/** Compare two JSON string are equals with ignoring the RelNode id in the Calcite plan. */
399+
public static void assertJsonEqualsIgnoreRelId(String expected, String actual) {
400+
assertJsonEquals(eliminateRelId(expected), eliminateRelId(actual));
401+
}
402+
403+
private static String eliminateRelId(String s) {
404+
return s.replaceAll("rel#\\d+", "rel#").replaceAll("RelSubset#\\d+", "RelSubset#");
405+
}
397406
}
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"calcite": {
33
"logical": "LogicalProject(avg_age=[$2], state=[$1], city=[$0])\n LogicalAggregate(group=[{5, 7}], avg_age=[AVG($8)])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4-
"physical": "EnumerableCalc(expr#0..3=[{inputs}], expr#4=[0], expr#5=[=($t3, $t4)], expr#6=[null:BIGINT], expr#7=[CASE($t5, $t6, $t2)], expr#8=[CAST($t7):DOUBLE], expr#9=[/($t8, $t3)], avg_age=[$t9], state=[$t1], city=[$t0])\n EnumerableAggregate(group=[{5, 7}], agg#0=[$SUM0($8)], agg#1=[COUNT($8)])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($8, 30)], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
4+
"physical": "EnumerableCalc(expr#0..2=[{inputs}], avg_age=[$t2], state=[$t1], city=[$t0])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#12051:LogicalAggregate.NONE.[](input=RelSubset#12050,group={0, 1},avg_age=AVG($2))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
55
}
6-
}
6+
}
Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
2-
"calcite":{
3-
"logical":"LogicalProject(age=[$8])\n LogicalFilter(condition=[>($3, 10000)])\n LogicalFilter(condition=[<($8, 40)])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4-
"physical":"EnumerableCalc(expr#0..16=[{inputs}], age=[$t8])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[FILTER->>($8, 30), FILTER->AND(<($8, 40), >($3, 10000))], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"bool\":{\"must\":[{\"range\":{\"age\":{\"from\":null,\"to\":40,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"balance\":{\"from\":10000,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
2+
"calcite": {
3+
"logical": "LogicalProject(age=[$8])\n LogicalFilter(condition=[>($3, 10000)])\n LogicalFilter(condition=[<($8, 40)])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[balance, age], FILTER->>($1, 30), FILTER-><($1, 40), FILTER->>($0, 10000), PROJECT->[age]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"query\":{\"bool\":{\"filter\":[{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},{\"range\":{\"age\":{\"from\":null,\"to\":40,\"include_lower\":true,\"include_upper\":false,\"boost\":1.0}}},{\"range\":{\"balance\":{\"from\":10000,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}}],\"adjust_pure_negative\":true,\"boost\":1.0}},\"_source\":{\"includes\":[\"age\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=10000, pageSize=null, startFrom=0)])\n"
55
}
66
}

0 commit comments

Comments
 (0)