Skip to content

Commit edaedee

Browse files
committed
sed - add pushdown for sed and explain IT and IT with fix
Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent f4cf561 commit edaedee

8 files changed

Lines changed: 85 additions & 2 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -191,6 +191,17 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
191191
fieldRex,
192192
context.rexBuilder.makeLiteral(patternStr));
193193

194+
// Extract regex pattern from sed command for pushdown optimization
195+
String filterPattern = extractRegexFromSedPattern(patternStr);
196+
if (filterPattern != null) {
197+
RexNode regexMatchCondition =
198+
context.rexBuilder.makeCall(
199+
SqlLibraryOperators.REGEXP_CONTAINS,
200+
fieldRex,
201+
context.rexBuilder.makeLiteral(filterPattern));
202+
context.relBuilder.filter(regexMatchCondition);
203+
}
204+
194205
String fieldName = node.getField().toString();
195206
projectPlusOverriding(List.of(sedCall), List.of(fieldName), context);
196207
return context.relBuilder.peek();
@@ -253,6 +264,30 @@ public RelNode visitRex(Rex node, CalcitePlanContext context) {
253264
return context.relBuilder.peek();
254265
}
255266

267+
/**
268+
* Extract regex pattern from sed command for filter pushdown. Supports basic sed patterns like
269+
* s/pattern/replacement/flags Returns the search pattern that can be used for filtering.
270+
*/
271+
private String extractRegexFromSedPattern(String sedPattern) {
272+
if (sedPattern == null || sedPattern.isEmpty()) {
273+
return null;
274+
}
275+
276+
// Handle sed substitute command: s/pattern/replacement/flags
277+
if (sedPattern.startsWith("s/")) {
278+
int firstSlash = sedPattern.indexOf('/', 2);
279+
if (firstSlash != -1) {
280+
String pattern = sedPattern.substring(2, firstSlash);
281+
// Return the pattern if it's not empty and looks like a valid regex
282+
if (!pattern.isEmpty() && !pattern.equals(".*")) {
283+
return pattern;
284+
}
285+
}
286+
}
287+
288+
return null;
289+
}
290+
256291
private boolean containsSubqueryExpression(Node expr) {
257292
if (expr == null) {
258293
return false;

integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@
8383
CalcitePrometheusDataSourceCommandsIT.class,
8484
CalciteQueryAnalysisIT.class,
8585
CalciteRareCommandIT.class,
86+
CalciteRexCommandIT.class,
8687
CalciteRenameCommandIT.class,
8788
CalciteRenameCommandIT.class,
8889
CalciteResourceMonitorIT.class,

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -313,6 +313,26 @@ public void testExplainOnAggregationWithSumEnhancement() throws IOException {
313313
TEST_INDEX_BANK)));
314314
}
315315

316+
@Test
317+
public void testRexExplain() throws IOException {
318+
String query =
319+
"source=opensearch-sql_test_index_account | rex field=lastname \\\"(?<initial>^[A-Z])\\\" |"
320+
+ " head 5";
321+
var result = explainQueryToString(query);
322+
String expected = loadExpectedPlan("explain_rex.json");
323+
assertJsonEqualsIgnoreId(expected, result);
324+
}
325+
326+
@Test
327+
public void testRexSedModeExplain() throws IOException {
328+
String query =
329+
"source=opensearch-sql_test_index_account | rex field=lastname mode=sed \\\"s/^[A-Z]/X/\\\""
330+
+ " | head 5";
331+
var result = explainQueryToString(query);
332+
String expected = loadExpectedPlan("explain_rex_sed.json");
333+
assertJsonEqualsIgnoreId(expected, result);
334+
}
335+
316336
/**
317337
* Executes the PPL query and returns the result as a string with windows-style line breaks
318338
* replaced with Unix-style ones.
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], initial=[$17])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], initial=[REX_EXTRACT($10, '(?<initial>^[A-Z])', 1)])\n LogicalFilter(condition=[REGEXP_CONTAINS($10, '(^[A-Z])')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['(?<initial>^[A-Z])'], expr#12=[1], expr#13=[REX_EXTRACT($t10, $t11, $t12)], proj#0..10=[{exprs}], initial=[$t13])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[SCRIPT->REGEXP_CONTAINS($10, '(^[A-Z])'), LIMIT->5, PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBO3sKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMTAsCiAgICAgICJuYW1lIjogIiQxMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIiheW0EtWl0pIiwKICAgICAgInR5cGUiOiB7CiAgICAgICAgInR5cGUiOiAiQ0hBUiIsCiAgICAgICAgIm51bGxhYmxlIjogZmFsc2UsCiAgICAgICAgInByZWNpc2lvbiI6IDgKICAgICAgfQogICAgfQogIF0KfXQACmZpZWxkVHlwZXNzcgARamF2YS51dGlsLkhhc2hNYXAFB9rBwxZg0QMAAkYACmxvYWRGYWN0b3JJAAl0aHJlc2hvbGR4cD9AAAAAAAAMdwgAAAAQAAAAC3QADmFjY291bnRfbnVtYmVyfnIAKW9yZy5vcGVuc2VhcmNoLnNxbC5kYXRhLnR5cGUuRXhwckNvcmVUeXBlAAAAAAAAAAASAAB4cgAOamF2YS5sYW5nLkVudW0AAAAAAAAAABIAAHhwdAAETE9OR3QACWZpcnN0bmFtZXNyADpvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaFRleHRUeXBlrYOjkwTjMUQCAAFMAAZmaWVsZHN0AA9MamF2YS91dGlsL01hcDt4cgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hEYXRhVHlwZcJjvMoC+gU1AgADTAAMZXhwckNvcmVUeXBldAArTG9yZy9vcGVuc2VhcmNoL3NxbC9kYXRhL3R5cGUvRXhwckNvcmVUeXBlO0wAC21hcHBpbmdUeXBldABITG9yZy9vcGVuc2VhcmNoL3NxbC9vcGVuc2VhcmNoL2RhdGEvdHlwZS9PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGU7TAAKcHJvcGVydGllc3EAfgAQeHB+cQB+AAp0AAdVTktOT1dOfnIARm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGUkTWFwcGluZ1R5cGUAAAAAAAAAABIAAHhxAH4AC3QABFRleHRzcgA8c2hhZGVkLmNvbS5nb29nbGUuY29tbW9uLmNvbGxlY3QuSW1tdXRhYmxlTWFwJFNlcmlhbGl6ZWRGb3JtAAAAAAAAAAACAAJMAARrZXlzdAASTGphdmEvbGFuZy9PYmplY3Q7TAAGdmFsdWVzcQB+ABt4cHVyABNbTGphdmEubGFuZy5PYmplY3Q7kM5YnxBzKWwCAAB4cAAAAAB1cQB+AB0AAAAAc3EAfgAAAAAAA3cEAAAAAnQAB2tleXdvcmRzcQB+ABF+cQB+AAp0AAZTVFJJTkd+cQB+ABd0AAdLZXl3b3JkcQB+ABx4dAAHYWRkcmVzc3NxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAB4dAAHYmFsYW5jZXEAfgAMdAAGZ2VuZGVyc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAEY2l0eXNxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAJxAH4AIXEAfgAieHQACGVtcGxveWVyc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAFc3RhdGVzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAACcQB+ACFxAH4AInh0AANhZ2VxAH4ADHQABWVtYWlsc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAAIbGFzdG5hbWVzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAACcQB+ACFxAH4AInh4eA==\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], lastname=[REX_SED($10, 's/^[A-Z]/X/')])\n LogicalFilter(condition=[REGEXP_CONTAINS($10, '^[A-Z]')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableCalc(expr#0..10=[{inputs}], expr#11=['s/^[A-Z]/X/'], expr#12=[REX_SED($t10, $t11)], proj#0..9=[{exprs}], lastname=[$t12])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[SCRIPT->REGEXP_CONTAINS($10, '^[A-Z]'), LIMIT->5, PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname], LIMIT->10000], OpenSearchRequestBuilder(sourceBuilder={\"from\":0,\"size\":5,\"timeout\":\"1m\",\"query\":{\"script\":{\"script\":{\"source\":\"{\\\"langType\\\":\\\"calcite\\\",\\\"script\\\":\\\"rO0ABXNyABFqYXZhLnV0aWwuQ29sbFNlcleOq7Y6G6gRAwABSQADdGFneHAAAAADdwQAAAAGdAAHcm93VHlwZXQGy3sKICAiZmllbGRzIjogWwogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhY2NvdW50X251bWJlciIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImZpcnN0bmFtZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImFkZHJlc3MiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJiYWxhbmNlIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiZ2VuZGVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiY2l0eSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImVtcGxveWVyIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAic3RhdGUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJhZ2UiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJWQVJDSEFSIiwKICAgICAgIm51bGxhYmxlIjogdHJ1ZSwKICAgICAgInByZWNpc2lvbiI6IC0xLAogICAgICAibmFtZSI6ICJlbWFpbCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogImxhc3RuYW1lIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2lkIgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiVkFSQ0hBUiIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJwcmVjaXNpb24iOiAtMSwKICAgICAgIm5hbWUiOiAiX2luZGV4IgogICAgfSwKICAgIHsKICAgICAgInR5cGUiOiAiUkVBTCIsCiAgICAgICJudWxsYWJsZSI6IHRydWUsCiAgICAgICJuYW1lIjogIl9zY29yZSIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlJFQUwiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfbWF4c2NvcmUiCiAgICB9LAogICAgewogICAgICAidHlwZSI6ICJCSUdJTlQiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAibmFtZSI6ICJfc29ydCIKICAgIH0sCiAgICB7CiAgICAgICJ0eXBlIjogIlZBUkNIQVIiLAogICAgICAibnVsbGFibGUiOiB0cnVlLAogICAgICAicHJlY2lzaW9uIjogLTEsCiAgICAgICJuYW1lIjogIl9yb3V0aW5nIgogICAgfQogIF0sCiAgIm51bGxhYmxlIjogdHJ1ZQp9dAAEZXhwcnQBOXsKICAib3AiOiB7CiAgICAibmFtZSI6ICJSRUdFWFBfQ09OVEFJTlMiLAogICAgImtpbmQiOiAiT1RIRVJfRlVOQ1RJT04iLAogICAgInN5bnRheCI6ICJGVU5DVElPTiIKICB9LAogICJvcGVyYW5kcyI6IFsKICAgIHsKICAgICAgImlucHV0IjogMTAsCiAgICAgICJuYW1lIjogIiQxMCIKICAgIH0sCiAgICB7CiAgICAgICJsaXRlcmFsIjogIl5bQS1aXSIsCiAgICAgICJ0eXBlIjogewogICAgICAgICJ0eXBlIjogIkNIQVIiLAogICAgICAgICJudWxsYWJsZSI6IGZhbHNlLAogICAgICAgICJwcmVjaXNpb24iOiA2CiAgICAgIH0KICAgIH0KICBdCn10AApmaWVsZFR5cGVzc3IAEWphdmEudXRpbC5IYXNoTWFwBQfawcMWYNEDAAJGAApsb2FkRmFjdG9ySQAJdGhyZXNob2xkeHA/QAAAAAAADHcIAAAAEAAAAAt0AA5hY2NvdW50X251bWJlcn5yAClvcmcub3BlbnNlYXJjaC5zcWwuZGF0YS50eXBlLkV4cHJDb3JlVHlwZQAAAAAAAAAAEgAAeHIADmphdmEubGFuZy5FbnVtAAAAAAAAAAASAAB4cHQABExPTkd0AAlmaXJzdG5hbWVzcgA6b3JnLm9wZW5zZWFyY2guc3FsLm9wZW5zZWFyY2guZGF0YS50eXBlLk9wZW5TZWFyY2hUZXh0VHlwZa2Do5ME4zFEAgABTAAGZmllbGRzdAAPTGphdmEvdXRpbC9NYXA7eHIAOm9yZy5vcGVuc2VhcmNoLnNxbC5vcGVuc2VhcmNoLmRhdGEudHlwZS5PcGVuU2VhcmNoRGF0YVR5cGXCY7zKAvoFNQIAA0wADGV4cHJDb3JlVHlwZXQAK0xvcmcvb3BlbnNlYXJjaC9zcWwvZGF0YS90eXBlL0V4cHJDb3JlVHlwZTtMAAttYXBwaW5nVHlwZXQASExvcmcvb3BlbnNlYXJjaC9zcWwvb3BlbnNlYXJjaC9kYXRhL3R5cGUvT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlO0wACnByb3BlcnRpZXNxAH4AEHhwfnEAfgAKdAAHVU5LTk9XTn5yAEZvcmcub3BlbnNlYXJjaC5zcWwub3BlbnNlYXJjaC5kYXRhLnR5cGUuT3BlblNlYXJjaERhdGFUeXBlJE1hcHBpbmdUeXBlAAAAAAAAAAASAAB4cQB+AAt0AARUZXh0c3IAPHNoYWRlZC5jb20uZ29vZ2xlLmNvbW1vbi5jb2xsZWN0LkltbXV0YWJsZU1hcCRTZXJpYWxpemVkRm9ybQAAAAAAAAAAAgACTAAEa2V5c3QAEkxqYXZhL2xhbmcvT2JqZWN0O0wABnZhbHVlc3EAfgAbeHB1cgATW0xqYXZhLmxhbmcuT2JqZWN0O5DOWJ8QcylsAgAAeHAAAAAAdXEAfgAdAAAAAHNxAH4AAAAAAAN3BAAAAAJ0AAdrZXl3b3Jkc3EAfgARfnEAfgAKdAAGU1RSSU5HfnEAfgAXdAAHS2V5d29yZHEAfgAceHQAB2FkZHJlc3NzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAAAeHQAB2JhbGFuY2VxAH4ADHQABmdlbmRlcnNxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAJxAH4AIXEAfgAieHQABGNpdHlzcQB+AA9xAH4AFXEAfgAYcQB+ABxzcQB+AAAAAAADdwQAAAACcQB+ACFxAH4AInh0AAhlbXBsb3llcnNxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAJxAH4AIXEAfgAieHQABXN0YXRlc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4dAADYWdlcQB+AAx0AAVlbWFpbHNxAH4AD3EAfgAVcQB+ABhxAH4AHHNxAH4AAAAAAAN3BAAAAAJxAH4AIXEAfgAieHQACGxhc3RuYW1lc3EAfgAPcQB+ABVxAH4AGHEAfgAcc3EAfgAAAAAAA3cEAAAAAnEAfgAhcQB+ACJ4eHg=\\\"}\",\"lang\":\"opensearch_compounded_script\",\"params\":{\"utcTimestamp\":*}},\"boost\":1.0}},\"_source\":{\"includes\":[\"account_number\",\"firstname\",\"address\",\"balance\",\"gender\",\"city\",\"employer\",\"state\",\"age\",\"email\",\"lastname\"],\"excludes\":[]},\"sort\":[{\"_doc\":{\"order\":\"asc\"}}]}, requestedTotalSize=5, pageSize=null, startFrom=0)])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], initial=[$17])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], initial=[REX_EXTRACT($10, '(?<initial>^[A-Z])', 1)])\n LogicalFilter(condition=[REGEXP_CONTAINS($10, '(^[A-Z])')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['(?<initial>^[A-Z])'], expr#18=[1], expr#19=[REX_EXTRACT($t10, $t17, $t18)], proj#0..10=[{exprs}], initial=[$t19])\n EnumerableLimit(fetch=[5])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['(^[A-Z])'], expr#18=[REGEXP_CONTAINS($t10, $t17)], proj#0..16=[{exprs}], $condition=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"calcite": {
3+
"logical": "LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$16])\n LogicalSort(fetch=[5])\n LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], lastname=[REX_SED($10, 's/^[A-Z]/X/')])\n LogicalFilter(condition=[REGEXP_CONTAINS($10, '^[A-Z]')])\n CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n",
4+
"physical": "EnumerableLimit(fetch=[10000])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['s/^[A-Z]/X/'], expr#18=[REX_SED($t10, $t17)], proj#0..9=[{exprs}], lastname=[$t18])\n EnumerableLimit(fetch=[5])\n EnumerableCalc(expr#0..16=[{inputs}], expr#17=['^[A-Z]'], expr#18=[REGEXP_CONTAINS($t10, $t17)], proj#0..16=[{exprs}], $condition=[$t18])\n CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]])\n"
5+
}
6+
}

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLRexTest.java

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -75,11 +75,14 @@ public void testRexSedMode() {
7575
RelNode root = getRelNode(ppl);
7676
String expectedLogical =
7777
"LogicalProject(ENAME=[REX_SED($1, 's/A/X/')])\n"
78-
+ " LogicalTableScan(table=[[scott, EMP]])\n";
78+
+ " LogicalFilter(condition=[REGEXP_CONTAINS($1, 'A')])\n"
79+
+ " LogicalTableScan(table=[[scott, EMP]])\n";
7980
verifyLogical(root, expectedLogical);
8081

8182
String expectedSparkSql =
82-
"SELECT `REX_SED`(`ENAME`, 's/A/X/') `ENAME`\n" + "FROM `scott`.`EMP`";
83+
"SELECT `REX_SED`(`ENAME`, 's/A/X/') `ENAME`\n"
84+
+ "FROM `scott`.`EMP`\n"
85+
+ "WHERE REGEXP_CONTAINS(`ENAME`, 'A')";
8386
verifyPPLToSparkSQL(root, expectedSparkSql);
8487
}
8588

0 commit comments

Comments
 (0)