Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion docs/user/ppl/admin/settings.md
Original file line number Diff line number Diff line change
Expand Up @@ -266,7 +266,8 @@ This configuration is introduced since 3.3.0 which is used to switch some behavi
The behaviours it controlled includes:
- The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details.
- The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details.
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details.
- The default value of argument `max` in `join` command. Check [join command](../cmd/join.md) for details.

### Example 1

Expand Down
2 changes: 1 addition & 1 deletion docs/user/ppl/cmd/join.md
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ The extended `join` syntax supports the following parameters.
| `type` | Optional | The join type when using extended syntax. Valid values are `left`, `outer` (same as `left`), `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. |
| `<join-field-list>` | Optional | A list of fields used to build the join criteria. These fields must exist in both datasets. If not specified, all fields common to both datasets are used as join keys. |
| `overwrite` | Optional | Applicable only when `join-field-list` is specified. Specifies whether fields from the right dataset with duplicate names should replace corresponding fields in the main search results. Default is `true`. |
| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited). |
| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited) when plugins.ppl.syntax.legacy.preferred is `true`. When the setting is `false` the default value is `1`. |
| `left` | Optional | An alias for the left dataset (typically a subsearch) used to avoid ambiguous field names. Specify as `left = <leftAlias>`. |
| `right` | Optional | An alias for the right dataset (typically, a subsearch) used to avoid ambiguous field names. Specify as `right = <rightAlias>`. |

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.junit.Ignore;
import org.junit.Test;
import org.opensearch.sql.ast.statement.ExplainMode;
import org.opensearch.sql.common.setting.Settings;
import org.opensearch.sql.common.setting.Settings.Key;
import org.opensearch.sql.common.utils.StringUtils;
import org.opensearch.sql.ppl.ExplainIT;
Expand Down Expand Up @@ -113,6 +114,26 @@ public void testJoinWithFieldListAndMaxOption() throws IOException {
assertYamlEqualsIgnoreId(expected, result);
}

@Test
public void testJoinWhenLegacyNotPreferred() throws IOException {
withSettings(
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
"false",
() -> {
String query =
"source=opensearch-sql_test_index_bank | join type=inner account_number"
+ " opensearch-sql_test_index_bank";
String result = null;
try {
result = explainQueryYaml(query);
String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml");
assertYamlEqualsIgnoreId(expected, result);
} catch (IOException e) {
fail();
}
});
}

// Only for Calcite
@Test
public void testJoinWithFieldList() throws IOException {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.json.JSONObject;
import org.junit.Test;
import org.opensearch.client.Request;
import org.opensearch.sql.common.setting.Settings;
import org.opensearch.sql.legacy.TestsConstants;
import org.opensearch.sql.ppl.PPLIntegTestCase;

Expand Down Expand Up @@ -878,6 +879,55 @@ public void testJoinWithFieldListMaxEqualsOne() throws IOException {
rows("David", "USA"));
}

@Test
public void testJoinWhenLegacyNotPreferred() throws IOException {
withSettings(
Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED,
"false",
() -> {
JSONObject actual = null;
try {
actual =
executeQuery(
String.format(
"source=%s | join type=inner name,year,month %s",
TestsConstants.TEST_INDEX_STATE_COUNTRY,
TestsConstants.TEST_INDEX_OCCUPATION));
} catch (IOException e) {
fail();
}
verifySchema(
actual,
schema("name", "string"),
schema("age", "int"),
schema("state", "string"),
schema("country", "string"),
schema("year", "int"),
schema("month", "int"),
schema("occupation", "string"),
schema("salary", "int"));
JSONObject actual2 = null;
try {
actual2 =
executeQuery(
String.format(
"source=%s | join type=inner max=1 name,year,month %s | fields name,"
+ " country",
TestsConstants.TEST_INDEX_STATE_COUNTRY,
TestsConstants.TEST_INDEX_OCCUPATION));
} catch (IOException e) {
fail();
}
verifyDataRows(
actual2,
rows("Jake", "England"),
rows("Jane", "Canada"),
rows("John", "Canada"),
rows("Hello", "USA"),
rows("David", "USA"));
});
Comment thread
LantaoJin marked this conversation as resolved.
}

@Test
public void testJoinComparing() throws IOException {
JSONObject actual =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsByClauseContext;
import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor;
import org.opensearch.sql.ppl.utils.ArgumentFactory;
import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper;

/** Class of building the AST. Refines the visit path and build the AST nodes */
public class AstBuilder extends OpenSearchPPLParserBaseVisitor<UnresolvedPlan> {
Expand Down Expand Up @@ -263,6 +264,11 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct
}
List<Argument> arguments =
ctx.joinOption().stream().map(o -> (Argument) expressionBuilder.visit(o)).toList();
if (arguments.stream().noneMatch(arg -> arg.getArgName().equals("max"))
&& !UnresolvedPlanHelper.legacyPreferred(settings)) {
arguments = new ArrayList<>(arguments);
arguments.add(new Argument("max", Literal.ONE));
}
Argument.ArgumentMap argumentMap = Argument.ArgumentMap.of(arguments);
if (argumentMap.get("type") != null) {
Join.JoinType joinTypeFromArgument = ArgumentFactory.getJoinType(argumentMap);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1070,6 +1070,36 @@ public void testJoinWithMaxEqualsZero() {
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testJoinWhenLegacyNotPreferred() {
doReturn(false).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED);
String ppl = "source=EMP | join type=outer DEPTNO DEPT";
RelNode root = getRelNode(ppl);
String expectedLogical =
"LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5],"
+ " COMM=[$6], DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n"
+ " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n"
+ " LogicalTableScan(table=[[scott, EMP]])\n"
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n"
+ " LogicalFilter(condition=[<=($3, 1)])\n"
+ " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2],"
+ " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n"
+ " LogicalTableScan(table=[[scott, DEPT]])\n";
verifyLogical(root, expectedLogical);
verifyResultCount(root, 14);

String expectedSparkSql =
"SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`,"
+ " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n"
+ "FROM `scott`.`EMP`\n"
+ "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n"
+ "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)"
+ " `_row_number_dedup_`\n"
+ "FROM `scott`.`DEPT`) `t`\n"
+ "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`";
verifyPPLToSparkSQL(root, expectedSparkSql);
}

@Test
public void testJoinSubsearchMaxOut() {
String ppl1 = "source=EMP | join type=inner max=0 DEPTNO DEPT";
Expand Down
Loading