From c81ba0c390b8897f5cd1fe0538995285c5608c61 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Tue, 20 Jan 2026 18:00:09 +0800 Subject: [PATCH 1/2] Set max=1 in join as default when plugins.ppl.syntax.legacy.preferred=false Signed-off-by: Lantao Jin --- .../sql/calcite/remote/CalciteExplainIT.java | 21 ++++++++ .../sql/calcite/remote/CalcitePPLJoinIT.java | 50 +++++++++++++++++++ .../opensearch/sql/ppl/parser/AstBuilder.java | 6 +++ .../sql/ppl/calcite/CalcitePPLJoinTest.java | 30 +++++++++++ 4 files changed, 107 insertions(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c57b33ab6d8..0136820b197 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -28,6 +28,7 @@ import org.junit.Ignore; import org.junit.Test; import org.opensearch.sql.ast.statement.ExplainMode; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.common.setting.Settings.Key; import org.opensearch.sql.common.utils.StringUtils; import org.opensearch.sql.ppl.ExplainIT; @@ -113,6 +114,26 @@ public void testJoinWithFieldListAndMaxOption() throws IOException { assertYamlEqualsIgnoreId(expected, result); } + @Test + public void testJoinWhenLegacyNotPreferred() throws IOException { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + String query = + "source=opensearch-sql_test_index_bank | join type=inner account_number" + + " opensearch-sql_test_index_bank"; + String result = null; + try { + result = explainQueryYaml(query); + String expected = loadExpectedPlan("explain_join_with_fields_max_option.yaml"); + assertYamlEqualsIgnoreId(expected, result); + } catch (IOException e) { + fail(); + } + }); + } + // Only for Calcite @Test public void testJoinWithFieldList() throws IOException { diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java index f6b8ee73a3c..d6d1c72f992 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLJoinIT.java @@ -20,6 +20,7 @@ import org.json.JSONObject; import org.junit.Test; import org.opensearch.client.Request; +import org.opensearch.sql.common.setting.Settings; import org.opensearch.sql.legacy.TestsConstants; import org.opensearch.sql.ppl.PPLIntegTestCase; @@ -878,6 +879,55 @@ public void testJoinWithFieldListMaxEqualsOne() throws IOException { rows("David", "USA")); } + @Test + public void testJoinWhenLegacyNotPreferred() throws IOException { + withSettings( + Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED, + "false", + () -> { + JSONObject actual = null; + try { + actual = + executeQuery( + String.format( + "source=%s | join type=inner name,year,month %s", + TestsConstants.TEST_INDEX_STATE_COUNTRY, + TestsConstants.TEST_INDEX_OCCUPATION)); + } catch (IOException e) { + fail(); + } + verifySchema( + actual, + schema("name", "string"), + schema("age", "int"), + schema("state", "string"), + schema("country", "string"), + schema("year", "int"), + schema("month", "int"), + schema("occupation", "string"), + schema("salary", "int")); + JSONObject actual2 = null; + try { + actual2 = + executeQuery( + String.format( + "source=%s | join type=inner max=1 name,year,month %s | fields name," + + " country", + TestsConstants.TEST_INDEX_STATE_COUNTRY, + TestsConstants.TEST_INDEX_OCCUPATION)); + } catch (IOException e) { + fail(); + } + verifyDataRows( + actual2, + rows("Jake", "England"), + rows("Jane", "Canada"), + rows("John", "Canada"), + rows("Hello", "USA"), + rows("David", "USA")); + }); + } + @Test public void testJoinComparing() throws IOException { JSONObject actual = diff --git a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java index 7ac29faf4c9..07f7e7935a0 100644 --- a/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java +++ b/ppl/src/main/java/org/opensearch/sql/ppl/parser/AstBuilder.java @@ -130,6 +130,7 @@ import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParser.StatsByClauseContext; import org.opensearch.sql.ppl.antlr.parser.OpenSearchPPLParserBaseVisitor; import org.opensearch.sql.ppl.utils.ArgumentFactory; +import org.opensearch.sql.ppl.utils.UnresolvedPlanHelper; /** Class of building the AST. Refines the visit path and build the AST nodes */ public class AstBuilder extends OpenSearchPPLParserBaseVisitor { @@ -263,6 +264,11 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct } List arguments = ctx.joinOption().stream().map(o -> (Argument) expressionBuilder.visit(o)).toList(); + if (arguments.stream().noneMatch(arg -> arg.getArgName().equals("max")) + && !UnresolvedPlanHelper.legacyPreferred(settings)) { + arguments = new ArrayList<>(arguments); + arguments.add(new Argument("max", Literal.ONE)); + } Argument.ArgumentMap argumentMap = Argument.ArgumentMap.of(arguments); if (argumentMap.get("type") != null) { Join.JoinType joinTypeFromArgument = ArgumentFactory.getJoinType(argumentMap); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java index e7ad1f6448c..b9c6dd4b093 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLJoinTest.java @@ -1070,6 +1070,36 @@ public void testJoinWithMaxEqualsZero() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testJoinWhenLegacyNotPreferred() { + doReturn(false).when(settings).getSettingValue(Settings.Key.PPL_SYNTAX_LEGACY_PREFERRED); + String ppl = "source=EMP | join type=outer DEPTNO DEPT"; + RelNode root = getRelNode(ppl); + String expectedLogical = + "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," + + " COMM=[$6], DEPTNO=[$8], DNAME=[$9], LOC=[$10])\n" + + " LogicalJoin(condition=[=($7, $8)], joinType=[left])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n" + + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2])\n" + + " LogicalFilter(condition=[<=($3, 1)])\n" + + " LogicalProject(DEPTNO=[$0], DNAME=[$1], LOC=[$2]," + + " _row_number_dedup_=[ROW_NUMBER() OVER (PARTITION BY $0)])\n" + + " LogicalTableScan(table=[[scott, DEPT]])\n"; + verifyLogical(root, expectedLogical); + verifyResultCount(root, 14); + + String expectedSparkSql = + "SELECT `EMP`.`EMPNO`, `EMP`.`ENAME`, `EMP`.`JOB`, `EMP`.`MGR`, `EMP`.`HIREDATE`," + + " `EMP`.`SAL`, `EMP`.`COMM`, `t1`.`DEPTNO`, `t1`.`DNAME`, `t1`.`LOC`\n" + + "FROM `scott`.`EMP`\n" + + "LEFT JOIN (SELECT `DEPTNO`, `DNAME`, `LOC`\n" + + "FROM (SELECT `DEPTNO`, `DNAME`, `LOC`, ROW_NUMBER() OVER (PARTITION BY `DEPTNO`)" + + " `_row_number_dedup_`\n" + + "FROM `scott`.`DEPT`) `t`\n" + + "WHERE `_row_number_dedup_` <= 1) `t1` ON `EMP`.`DEPTNO` = `t1`.`DEPTNO`"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + @Test public void testJoinSubsearchMaxOut() { String ppl1 = "source=EMP | join type=inner max=0 DEPTNO DEPT"; From 2f6c9495391d82cd27b440799857ef8736967e83 Mon Sep 17 00:00:00 2001 From: Lantao Jin Date: Wed, 21 Jan 2026 13:22:47 +0800 Subject: [PATCH 2/2] update doc Signed-off-by: Lantao Jin --- docs/user/ppl/admin/settings.md | 3 ++- docs/user/ppl/cmd/join.md | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/user/ppl/admin/settings.md b/docs/user/ppl/admin/settings.md index 24000f8fd8a..b1d5eef17fa 100644 --- a/docs/user/ppl/admin/settings.md +++ b/docs/user/ppl/admin/settings.md @@ -266,7 +266,8 @@ This configuration is introduced since 3.3.0 which is used to switch some behavi The behaviours it controlled includes: - The default value of argument `bucket_nullable` in `stats` command. Check [stats command](../cmd/stats.md) for details. - The return value of `divide` and `/` operator. Check [expressions](../functions/expressions.md) for details. -- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details. +- The default value of argument `usenull` in `top` and `rare` commands. Check [top command](../cmd/top.md) and [rare command](../cmd/rare.md) for details. +- The default value of argument `max` in `join` command. Check [join command](../cmd/join.md) for details. ### Example 1 diff --git a/docs/user/ppl/cmd/join.md b/docs/user/ppl/cmd/join.md index 983c3045600..f7c4e45e9a6 100644 --- a/docs/user/ppl/cmd/join.md +++ b/docs/user/ppl/cmd/join.md @@ -76,7 +76,7 @@ The extended `join` syntax supports the following parameters. | `type` | Optional | The join type when using extended syntax. Valid values are `left`, `outer` (same as `left`), `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. | | `` | Optional | A list of fields used to build the join criteria. These fields must exist in both datasets. If not specified, all fields common to both datasets are used as join keys. | | `overwrite` | Optional | Applicable only when `join-field-list` is specified. Specifies whether fields from the right dataset with duplicate names should replace corresponding fields in the main search results. Default is `true`. | -| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited). | +| `max` | Optional | The maximum number of subsearch results to join with each row in the main search. Default is `0` (unlimited) when plugins.ppl.syntax.legacy.preferred is `true`. When the setting is `false` the default value is `1`. | | `left` | Optional | An alias for the left dataset (typically a subsearch) used to avoid ambiguous field names. Specify as `left = `. | | `right` | Optional | An alias for the right dataset (typically, a subsearch) used to avoid ambiguous field names. Specify as `right = `. |