Skip to content

Commit 5975fd0

Browse files
committed
Set max=1 in join as default when plugins.ppl.syntax.legacy.preferred=false (opensearch-project#5057)
* Set max=1 in join as default when plugins.ppl.syntax.legacy.preferred=false Signed-off-by: Lantao Jin <ltjin@amazon.com> * update doc Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com> (cherry picked from commit d31769d)
1 parent b73526b commit 5975fd0

6 files changed

Lines changed: 110 additions & 2 deletions

File tree

docs/user/ppl/admin/settings.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -241,7 +241,8 @@ This configuration is introduced since 3.3.0 which is used to switch some behavi
241241
The behaviours it controlled includes:
242242
- The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details.
243243
- The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details.
244-
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
244+
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
245+
- The default value of argument `max` in `join` command. Check [join command](../cmd/join.md) for details.
245246

246247
### Example 1
247248

docs/user/ppl/cmd/join.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ The `join` command combines two datasets together. The left side could be an ind
2020
join [type=<joinType>] [overwrite=<bool>] [max=n] (\<join-field-list\> \| [leftAlias] [rightAlias] (on \| where) \<joinCriteria\>) \<right-dataset\>
2121
* type: optional. Join type using extended syntax. Options: `left`, `outer` (alias of `left`), `semi`, `anti`, and performance-sensitive types `right`, `full`, `cross`. **Default:** `inner`.
2222
* overwrite: optional boolean. Only works with `join-field-list`. Specifies whether duplicate-named fields from right-dataset should replace corresponding fields in the main search results. **Default:** `true`.
23-
* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited).
23+
* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited) when plugins.ppl.syntax.legacy.preferred is `true`. When the setting is `false` the default value is `1`.
2424
* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If not specified, all fields common to both sides will be used as join keys.
2525
* leftAlias: optional. Same as basic syntax when used with extended syntax.
2626
* rightAlias: optional. Same as basic syntax when used with extended syntax.

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.junit.Ignore;
2929
import org.junit.Test;
3030
import org.opensearch.sql.ast.statement.ExplainMode;
31+
import org.opensearch.sql.common.setting.Settings;
3132
import org.opensearch.sql.common.setting.Settings.Key;
3233
import org.opensearch.sql.common.utils.StringUtils;
3334
import org.opensearch.sql.ppl.ExplainIT;
@@ -113,6 +114,26 @@ public void testJoinWithFieldListAndMaxOption() throws IOException {
113114
assertYamlEqualsIgnoreId(expected, result);
114115
}
115116

117+
@Test
118+
public void testJoinWhenLegacyNotPreferred() throws IOException {
119+
withSettings(
120+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
121+
"false",
122+
() -> {
123+
String query =
124+
"source=opensearch-sql_test_index_bank | join type=inner account_number"
125+
+ " opensearch-sql_test_index_bank";
126+
String result = null;
127+
try {
128+
result = explainQueryYaml(query);
129+
String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml");
130+
assertYamlEqualsIgnoreId(expected, result);
131+
} catch (IOException e) {
132+
fail();
133+
}
134+
});
135+
}
136+
116137
// Only for Calcite
117138
@Test
118139
public void testJoinWithFieldList() throws IOException {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.json.JSONObject;
2121
import org.junit.Test;
2222
import org.opensearch.client.Request;
23+
import org.opensearch.sql.common.setting.Settings;
2324
import org.opensearch.sql.legacy.TestsConstants;
2425
import org.opensearch.sql.ppl.PPLIntegTestCase;
2526

@@ -892,6 +893,55 @@ public void testJoinWithFieldListMaxEqualsOne() throws IOException {
892893
rows("David", "USA"));
893894
}
894895

896+
@Test
897+
public void testJoinWhenLegacyNotPreferred() throws IOException {
898+
withSettings(
899+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
900+
"false",
901+
() -> {
902+
JSONObject actual = null;
903+
try {
904+
actual =
905+
executeQuery(
906+
String.format(
907+
"source=%s | join type=inner name,year,month %s",
908+
TestsConstants.TEST_INDEX_STATE_COUNTRY,
909+
TestsConstants.TEST_INDEX_OCCUPATION));
910+
} catch (IOException e) {
911+
fail();
912+
}
913+
verifySchema(
914+
actual,
915+
schema("name", "string"),
916+
schema("age", "int"),
917+
schema("state", "string"),
918+
schema("country", "string"),
919+
schema("year", "int"),
920+
schema("month", "int"),
921+
schema("occupation", "string"),
922+
schema("salary", "int"));
923+
JSONObject actual2 = null;
924+
try {
925+
actual2 =
926+
executeQuery(
927+
String.format(
928+
"source=%s | join type=inner max=1 name,year,month %s | fields name,"
929+
+ " country",
930+
TestsConstants.TEST_INDEX_STATE_COUNTRY,
931+
TestsConstants.TEST_INDEX_OCCUPATION));
932+
} catch (IOException e) {
933+
fail();
934+
}
935+
verifyDataRows(
936+
actual2,
937+
rows("Jake", "England"),
938+
rows("Jane", "Canada"),
939+
rows("John", "Canada"),
940+
rows("Hello", "USA"),
941+
rows("David", "USA"));
942+
});
943+
}
944+
895945
@Test
896946
public void testJoinComparing() throws IOException {
897947
JSONObject actual =

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsByClauseContext;
131131
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor;
132132
import org.opensearch.sql.ppl.utils.ArgumentFactory;
133+
import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper;
133134

134135
/** Class of building the AST. Refines the visit path and build the AST nodes */
135136
public class AstBuilder extends OpenSearchPPLParserBaseVisitor<UnresolvedPlan> {
@@ -264,6 +265,11 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct
264265
ctx.joinOption().stream()
265266
.map(o -> (Argument) expressionBuilder.visit(o))
266267
.collect(Collectors.toList());
268+
if (arguments.stream().noneMatch(arg -> arg.getArgName().equals("max"))
269+
&& !UnresolvedPlanHelper.legacyPreferred(settings)) {
270+
arguments = new ArrayList<>(arguments);
271+
arguments.add(new Argument("max", Literal.ONE));
272+
}
267273
Argument.ArgumentMap argumentMap = Argument.ArgumentMap.of(arguments);
268274
if (argumentMap.get("type") != null) {
269275
Join.JoinType joinTypeFromArgument = ArgumentFactory.getJoinType(argumentMap);

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1062,6 +1062,36 @@ public void testJoinWithMaxEqualsZero() {
10621062
verifyPPLToSparkSQL(root, expectedSparkSql);
10631063
}
10641064

1065+
@Test
1066+
public void testJoinWhenLegacyNotPreferred() {
1067+
doReturn(false).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED);
1068+
String ppl = "source=EMP | join type=outer DEPTNO DEPT";
1069+
RelNode root = getRelNode(ppl);
1070+
String expectedLogical =
1071+
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
1072+
+ " COMM=[$6], DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n"
1073+
+ " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n"
1074+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
1075+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
1076+
+ " LogicalFilter(condition=[<=($3, 1)])\n"
1077+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2],"
1078+
+ " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n"
1079+
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
1080+
verifyLogical(root, expectedLogical);
1081+
verifyResultCount(root, 14);
1082+
1083+
String expectedSparkSql =
1084+
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
1085+
+ " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n"
1086+
+ "FROM `scott`.`EMP`\n"
1087+
+ "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n"
1088+
+ "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)"
1089+
+ " `_row_number_dedup_`\n"
1090+
+ "FROM `scott`.`DEPT`) `t`\n"
1091+
+ "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`";
1092+
verifyPPLToSparkSQL(root, expectedSparkSql);
1093+
}
1094+
10651095
@Test
10661096
public void testJoinSubsearchMaxOut() {
10671097
String ppl1 = "source=EMP | join type=inner max=0 DEPTNO DEPT";

0 commit comments

Comments
 (0)