Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/user/ppl/admin/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,8 @@ This configuration is introduced since 3.3.0 which is used to switch some behavi
The behaviours it controlled includes:
- The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details.
- The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details.
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
- The default value of argument `max` in `join` command. Check [join command](../cmd/join.md) for details.

### Example 1

Expand Down
2 changes: 1 addition & 1 deletion docs/user/ppl/cmd/join.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ The `join` command combines two datasets together. The left side could be an ind
join [type=<joinType>] [overwrite=<bool>] [max=n] (\<join-field-list\> \| [leftAlias] [rightAlias] (on \| where) \<joinCriteria\>) \<right-dataset\>
* type: optional. Join type using extended syntax. Options: `left`, `outer` (alias of `left`), `semi`, `anti`, and performance-sensitive types `right`, `full`, `cross`. **Default:** `inner`.
* overwrite: optional boolean. Only works with `join-field-list`. Specifies whether duplicate-named fields from right-dataset should replace corresponding fields in the main search results. **Default:** `true`.
* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited).
* max: optional integer. Controls how many subsearch results could be joined against each row in main search. **Default:** 0 (unlimited) when plugins.ppl.syntax.legacy.preferred is `true`. When the setting is `false` the default value is `1`.
* join-field-list: optional. The fields used to build the join criteria. The join field list must exist on both sides. If not specified, all fields common to both sides will be used as join keys.
* leftAlias: optional. Same as basic syntax when used with extended syntax.
* rightAlias: optional. Same as basic syntax when used with extended syntax.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.junit.Ignore;
import org.junit.Test;
import org.opensearch.sql.ast.statement.ExplainMode;
import org.opensearch.sql.common.setting.Settings;
import org.opensearch.sql.common.setting.Settings.Key;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.ppl.ExplainIT;
Expand Down Expand Up @@ -113,6 +114,26 @@ public void testJoinWithFieldListAndMaxOption() throws IOException {
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testJoinWhenLegacyNotPreferred() throws IOException {
withSettings(
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
"false",
() -> {
String query =
"source=opensearch-sql_test_index_bank | join type=inner account_number"
+ " opensearch-sql_test_index_bank";
String result = null;
try {
result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml");
assertYamlEqualsIgnoreId(expected, result);
} catch (IOException e) {
fail();
}
});
}

// Only for Calcite
@Test
public void testJoinWithFieldList() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.client.Request;
import org.opensearch.sql.common.setting.Settings;
import org.opensearch.sql.legacy.TestsConstants;
import org.opensearch.sql.ppl.PPLIntegTestCase;

Expand Down Expand Up @@ -892,6 +893,55 @@ public void testJoinWithFieldListMaxEqualsOne() throws IOException {
rows("David", "USA"));
}

@Test
public void testJoinWhenLegacyNotPreferred() throws IOException {
withSettings(
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
"false",
() -> {
JSONObject actual = null;
try {
actual =
executeQuery(
String.format(
"source=%s | join type=inner name,year,month %s",
TestsConstants.TEST_INDEX_STATE_COUNTRY,
TestsConstants.TEST_INDEX_OCCUPATION));
} catch (IOException e) {
fail();
}
verifySchema(
actual,
schema("name", "string"),
schema("age", "int"),
schema("state", "string"),
schema("country", "string"),
schema("year", "int"),
schema("month", "int"),
schema("occupation", "string"),
schema("salary", "int"));
JSONObject actual2 = null;
try {
actual2 =
executeQuery(
String.format(
"source=%s | join type=inner max=1 name,year,month %s | fields name,"
+ " country",
TestsConstants.TEST_INDEX_STATE_COUNTRY,
TestsConstants.TEST_INDEX_OCCUPATION));
} catch (IOException e) {
fail();
}
verifyDataRows(
actual2,
rows("Jake", "England"),
rows("Jane", "Canada"),
rows("John", "Canada"),
rows("Hello", "USA"),
rows("David", "USA"));
});
}

@Test
public void testJoinComparing() throws IOException {
JSONObject actual =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsByClauseContext;
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor;
import org.opensearch.sql.ppl.utils.ArgumentFactory;
import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper;

/** Class of building the AST. Refines the visit path and build the AST nodes */
public class AstBuilder extends OpenSearchPPLParserBaseVisitor<UnresolvedPlan> {
Expand Down Expand Up @@ -264,6 +265,11 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct
ctx.joinOption().stream()
.map(o -> (Argument) expressionBuilder.visit(o))
.collect(Collectors.toList());
if (arguments.stream().noneMatch(arg -> arg.getArgName().equals("max"))
&& !UnresolvedPlanHelper.legacyPreferred(settings)) {
arguments = new ArrayList<>(arguments);
arguments.add(new Argument("max", Literal.ONE));
}
Argument.ArgumentMap argumentMap = Argument.ArgumentMap.of(arguments);
if (argumentMap.get("type") != null) {
Join.JoinType joinTypeFromArgument = ArgumentFactory.getJoinType(argumentMap);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1062,6 +1062,36 @@ public void testJoinWithMaxEqualsZero() {
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testJoinWhenLegacyNotPreferred() {
doReturn(false).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED);
String ppl = "source=EMP | join type=outer DEPTNO DEPT";
RelNode root = getRelNode(ppl);
String expectedLogical =
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
+ " COMM=[$6], DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n"
+ " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n"
+ " LogicalTableScan(table=[[scott, EMP]])\n"
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
+ " LogicalFilter(condition=[<=($3, 1)])\n"
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2],"
+ " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n"
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
verifyLogical(root, expectedLogical);
verifyResultCount(root, 14);

String expectedSparkSql =
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
+ " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n"
+ "FROM `scott`.`EMP`\n"
+ "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n"
+ "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)"
+ " `_row_number_dedup_`\n"
+ "FROM `scott`.`DEPT`) `t`\n"
+ "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`";
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testJoinSubsearchMaxOut() {
String ppl1 = "source=EMP | join type=inner max=0 DEPTNO DEPT";
Expand Down
Loading