Skip to content

Commit 89d487b

Browse files
committed
Convert to keyword fields when sorting opensearch text fields
Signed-off-by: Yuanchun Shen <yuanchu@amazon.com>
1 parent e080439 commit 89d487b

3 files changed

Lines changed: 8 additions & 3 deletions

File tree

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"calcite": {
33
"logical": "LogicalSort(sort0=[$3], sort1=[$4], sort2=[$2], dir0=[DESC], dir1=[DESC], dir2=[ASC])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4])\n LogicalSort(sort0=[$3], sort1=[$4], sort2=[$2], dir0=[DESC], dir1=[DESC], dir2=[ASC])\n LogicalSort(sort0=[$0], sort1=[$1], sort2=[$2], sort3=[$3], dir0=[ASC], dir1=[ASC], dir2=[ASC], dir3=[ASC])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4-
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender], SORT->[{\n \"balance\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"gender\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"address\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"],\"excludes\":[]},\"sort\":[{\"balance\":{\"order\":\"desc\",\"missing\":\"_first\"}},{\"gender\":{\"order\":\"desc\",\"missing\":\"_first\"}},{\"address\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"firstname\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
4+
"physical": "CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender], SORT->[{\n \"balance\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"gender\" : {\n \"order\" : \"desc\",\n \"missing\" : \"_first\"\n }\n}, {\n \"address\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"account_number\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}, {\n \"firstname.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"timeout\":\"1m\",\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\"],\"excludes\":[]},\"sort\":[{\"balance\":{\"order\":\"desc\",\"missing\":\"_first\"}},{\"gender\":{\"order\":\"desc\",\"missing\":\"_first\"}},{\"address\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"account_number\":{\"order\":\"asc\",\"missing\":\"_last\"}},{\"firstname.keyword\":{\"order\":\"asc\",\"missing\":\"_last\"}}]}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
55
}
66
}
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
{
22
"calcite": {
33
"logical": "LogicalProject(age2=[$2])\n LogicalFilter(condition=[<=($3, 1)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[$2], _row_number_=[ROW_NUMBER() OVER (PARTITION BY $2 ORDER BY $2)])\n LogicalFilter(condition=[IS NOT NULL($2)])\n LogicalProject(avg_age=[$0], state=[$1], age2=[+($0, 2)])\n LogicalSort(sort0=[$1], dir0=[ASC])\n LogicalProject(avg_age=[$2], state=[$0], city=[$1])\n LogicalAggregate(group=[{0, 1}], avg_age=[AVG($2)])\n LogicalProject(state=[$7], city=[$5], age=[$8])\n LogicalFilter(condition=[>($8, 30)])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4-
"physical": "EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[+($t2, $t3)], expr#5=[IS NOT NULL($t2)], state=[$t1], age2=[$t4], $condition=[$t5])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#246:LogicalAggregate.NONE.[](input=RelSubset#201,group={0, 1},avg_age=AVG($2)), SORT->[{\n \"state\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[{\"state\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
4+
"physical": "EnumerableCalc(expr#0..2=[{inputs}], expr#3=[1], expr#4=[<=($t2, $t3)], age2=[$t1], $condition=[$t4])\n EnumerableWindow(window#0=[window(partition {1} order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])])\n EnumerableCalc(expr#0..2=[{inputs}], expr#3=[2], expr#4=[+($t2, $t3)], expr#5=[IS NOT NULL($t2)], state=[$t1], age2=[$t4], $condition=[$t5])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[city, state, age], FILTER->>($2, 30), AGGREGATION->rel#1142:LogicalAggregate.NONE.[](input=RelSubset#1097,group={0, 1},avg_age=AVG($2)), SORT->[{\n \"state.keyword\" : {\n \"order\" : \"asc\",\n \"missing\" : \"_last\"\n }\n}]], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":0,\"timeout\":\"1m\",\"query\":{\"range\":{\"age\":{\"from\":30,\"to\":null,\"include_lower\":false,\"include_upper\":true,\"boost\":1.0}}},\"_source\":{\"includes\":[\"city\",\"state\",\"age\"],\"excludes\":[]},\"sort\":[{\"state.keyword\":{\"order\":\"asc\",\"missing\":\"_last\"}}],\"aggregations\":{\"composite_buckets\":{\"composite\":{\"size\":1000,\"sources\":[{\"city\":{\"terms\":{\"field\":\"city.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}},{\"state\":{\"terms\":{\"field\":\"state.keyword\",\"missing_bucket\":true,\"missing_order\":\"first\",\"order\":\"asc\"}}}]},\"aggregations\":{\"avg_age\":{\"avg\":{\"field\":\"age\"}}}}}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)])\n"
55
}
66
}

opensearch/src/main/java/org/opensearch/sql/opensearch/storage/scan/CalciteLogicalIndexScan.java

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
import org.opensearch.sql.common.setting.Settings;
4444
import org.opensearch.sql.data.type.ExprType;
4545
import org.opensearch.sql.opensearch.data.type.OpenSearchDataType;
46+
import org.opensearch.sql.opensearch.data.type.OpenSearchTextType;
4647
import org.opensearch.sql.opensearch.planner.physical.EnumerableIndexScanRule;
4748
import org.opensearch.sql.opensearch.planner.physical.OpenSearchIndexRules;
4849
import org.opensearch.sql.opensearch.request.AggregateAnalyzer;
@@ -296,7 +297,11 @@ public CalciteLogicalIndexScan pushDownSort(List<RelFieldCollation> collations)
296297
case LAST -> "_last";
297298
default -> null;
298299
};
299-
sortBuilder = SortBuilders.fieldSort(fieldName).missing(missing);
300+
// Keyword field is optimized for sorting in OpenSearch
301+
String fieldNameKeyword =
302+
OpenSearchTextType.convertTextToKeyword(
303+
fieldName, osIndex.getFieldTypes().get(fieldName));
304+
sortBuilder = SortBuilders.fieldSort(fieldNameKeyword).missing(missing);
300305
}
301306
builders.add(sortBuilder.order(order));
302307
}

0 commit comments

Comments
 (0)