Skip to content

Commit d31769d

Browse files
authored
Set max=1 in join as default when plugins.ppl.syntax.legacy.preferred=false (#5057)
* Set max=1 in join as default when plugins.ppl.syntax.legacy.preferred=false Signed-off-by: Lantao Jin <ltjin@amazon.com> * update doc Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent cee7f6b commit d31769d

6 files changed

Lines changed: 110 additions & 2 deletions

File tree

docs/user/ppl/admin/settings.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -266,7 +266,8 @@ This configuration is introduced since 3.3.0 which is used to switch some behavi
266266
The behaviours it controlled includes:
267267
- The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details.
268268
- The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details.
269-
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
269+
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
270+
- The default value of argument `max` in `join` command. Check [join command](../cmd/join.md) for details.
270271

271272
### Example 1
272273

docs/user/ppl/cmd/join.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ The extended `join` syntax supports the following parameters.
7676
| `type` | Optional | The join type when using extended syntax. Valid values are `left`, `outer` (same as `left`), `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. |
7777
| `<join-field-list>` | Optional | A list of fields used to build the join criteria. These fields must exist in both datasets. If not specified, all fields common to both datasets are used as join keys. |
7878
| `overwrite` | Optional | Applicable only when `join-field-list` is specified. Specifies whether fields from the right dataset with duplicate names should replace corresponding fields in the main search results. Default is `true`. |
79-
| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited). |
79+
| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited) when plugins.ppl.syntax.legacy.preferred is `true`. When the setting is `false` the default value is `1`. |
8080
| `left` | Optional | An alias for the left dataset (typically a subsearch) used to avoid ambiguous field names. Specify as `left = <leftAlias>`. |
8181
| `right` | Optional | An alias for the right dataset (typically, a subsearch) used to avoid ambiguous field names. Specify as `right = <rightAlias>`. |
8282

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@
2828
import org.junit.Ignore;
2929
import org.junit.Test;
3030
import org.opensearch.sql.ast.statement.ExplainMode;
31+
import org.opensearch.sql.common.setting.Settings;
3132
import org.opensearch.sql.common.setting.Settings.Key;
3233
import org.opensearch.sql.common.utils.StringUtils;
3334
import org.opensearch.sql.ppl.ExplainIT;
@@ -113,6 +114,26 @@ public void testJoinWithFieldListAndMaxOption() throws IOException {
113114
assertYamlEqualsIgnoreId(expected, result);
114115
}
115116

117+
@Test
118+
public void testJoinWhenLegacyNotPreferred() throws IOException {
119+
withSettings(
120+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
121+
"false",
122+
() -> {
123+
String query =
124+
"source=opensearch-sql_test_index_bank | join type=inner account_number"
125+
+ " opensearch-sql_test_index_bank";
126+
String result = null;
127+
try {
128+
result = explainQueryYaml(query);
129+
String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml");
130+
assertYamlEqualsIgnoreId(expected, result);
131+
} catch (IOException e) {
132+
fail();
133+
}
134+
});
135+
}
136+
116137
// Only for Calcite
117138
@Test
118139
public void testJoinWithFieldList() throws IOException {

integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@
2020
import org.json.JSONObject;
2121
import org.junit.Test;
2222
import org.opensearch.client.Request;
23+
import org.opensearch.sql.common.setting.Settings;
2324
import org.opensearch.sql.legacy.TestsConstants;
2425
import org.opensearch.sql.ppl.PPLIntegTestCase;
2526

@@ -878,6 +879,55 @@ public void testJoinWithFieldListMaxEqualsOne() throws IOException {
878879
rows("David", "USA"));
879880
}
880881

882+
@Test
883+
public void testJoinWhenLegacyNotPreferred() throws IOException {
884+
withSettings(
885+
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
886+
"false",
887+
() -> {
888+
JSONObject actual = null;
889+
try {
890+
actual =
891+
executeQuery(
892+
String.format(
893+
"source=%s | join type=inner name,year,month %s",
894+
TestsConstants.TEST_INDEX_STATE_COUNTRY,
895+
TestsConstants.TEST_INDEX_OCCUPATION));
896+
} catch (IOException e) {
897+
fail();
898+
}
899+
verifySchema(
900+
actual,
901+
schema("name", "string"),
902+
schema("age", "int"),
903+
schema("state", "string"),
904+
schema("country", "string"),
905+
schema("year", "int"),
906+
schema("month", "int"),
907+
schema("occupation", "string"),
908+
schema("salary", "int"));
909+
JSONObject actual2 = null;
910+
try {
911+
actual2 =
912+
executeQuery(
913+
String.format(
914+
"source=%s | join type=inner max=1 name,year,month %s | fields name,"
915+
+ " country",
916+
TestsConstants.TEST_INDEX_STATE_COUNTRY,
917+
TestsConstants.TEST_INDEX_OCCUPATION));
918+
} catch (IOException e) {
919+
fail();
920+
}
921+
verifyDataRows(
922+
actual2,
923+
rows("Jake", "England"),
924+
rows("Jane", "Canada"),
925+
rows("John", "Canada"),
926+
rows("Hello", "USA"),
927+
rows("David", "USA"));
928+
});
929+
}
930+
881931
@Test
882932
public void testJoinComparing() throws IOException {
883933
JSONObject actual =

ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@
130130
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsByClauseContext;
131131
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor;
132132
import org.opensearch.sql.ppl.utils.ArgumentFactory;
133+
import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper;
133134

134135
/** Class of building the AST. Refines the visit path and build the AST nodes */
135136
public class AstBuilder extends OpenSearchPPLParserBaseVisitor<UnresolvedPlan> {
@@ -263,6 +264,11 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct
263264
}
264265
List<Argument> arguments =
265266
ctx.joinOption().stream().map(o -> (Argument) expressionBuilder.visit(o)).toList();
267+
if (arguments.stream().noneMatch(arg -> arg.getArgName().equals("max"))
268+
&& !UnresolvedPlanHelper.legacyPreferred(settings)) {
269+
arguments = new ArrayList<>(arguments);
270+
arguments.add(new Argument("max", Literal.ONE));
271+
}
266272
Argument.ArgumentMap argumentMap = Argument.ArgumentMap.of(arguments);
267273
if (argumentMap.get("type") != null) {
268274
Join.JoinType joinTypeFromArgument = ArgumentFactory.getJoinType(argumentMap);

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1070,6 +1070,36 @@ public void testJoinWithMaxEqualsZero() {
10701070
verifyPPLToSparkSQL(root, expectedSparkSql);
10711071
}
10721072

1073+
@Test
1074+
public void testJoinWhenLegacyNotPreferred() {
1075+
doReturn(false).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED);
1076+
String ppl = "source=EMP | join type=outer DEPTNO DEPT";
1077+
RelNode root = getRelNode(ppl);
1078+
String expectedLogical =
1079+
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
1080+
+ " COMM=[$6], DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n"
1081+
+ " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n"
1082+
+ " LogicalTableScan(table=[[scott, EMP]])\n"
1083+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
1084+
+ " LogicalFilter(condition=[<=($3, 1)])\n"
1085+
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2],"
1086+
+ " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n"
1087+
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
1088+
verifyLogical(root, expectedLogical);
1089+
verifyResultCount(root, 14);
1090+
1091+
String expectedSparkSql =
1092+
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
1093+
+ " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n"
1094+
+ "FROM `scott`.`EMP`\n"
1095+
+ "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n"
1096+
+ "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)"
1097+
+ " `_row_number_dedup_`\n"
1098+
+ "FROM `scott`.`DEPT`) `t`\n"
1099+
+ "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`";
1100+
verifyPPLToSparkSQL(root, expectedSparkSql);
1101+
}
1102+
10731103
@Test
10741104
public void testJoinSubsearchMaxOut() {
10751105
String ppl1 = "source=EMP | join type=inner max=0 DEPTNO DEPT";

0 commit comments

Comments
 (0)