Skip to content

Commit 92004cf

Browse files
committed
feat(sql): support JOIN in unified query path
Extend the V2 ANTLR grammar with JOIN clause rules (INNER/LEFT/RIGHT/ CROSS) and add ExtendedAstBuilder in SqlV2QueryParser to produce Join AST nodes for the Calcite-based unified query path. The base AstBuilder throws SyntaxCheckException to preserve legacy engine fallback. A semantic predicate prevents LEFT/RIGHT from being consumed as implicit table aliases when followed by JOIN. Signed-off-by: Chen Dai <daichen@amazon.com>
1 parent c5ae56f commit 92004cf

10 files changed

Lines changed: 294 additions & 18 deletions

File tree

api/src/main/java/org/opensearch/sql/api/UnifiedQueryContext.java

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,11 @@ public static class Builder {
122122
* Setting values with defaults from SysLimit.DEFAULT. Only includes planning-required settings
123123
* to avoid coupling with OpenSearchSettings.
124124
*
125+
* <p>{@link Settings.Key#PPL_JOIN_SUBSEARCH_MAXOUT} defaults to {@code 0} to avoid injecting
126+
* {@code LogicalSystemLimit} into the logical plan, which is an OpenSearch-specific operational
127+
* concern irrelevant to external consumers of the unified query API. {@link
128+
* Settings.Key#PPL_SUBSEARCH_MAXOUT} is set to {@code 0} for the same reason.
129+
*
125130
* <p>{@link Settings.Key#CALCITE_ENGINE_ENABLED} defaults to {@code true} here because the
126131
* unified query path is by definition Calcite-based — every query reaching this context flows
127132
* through Calcite's planner, never the v2 engine. The PPL {@link
@@ -142,8 +147,8 @@ public static class Builder {
142147
new HashMap<Settings.Key, Object>(
143148
Map.of(
144149
QUERY_SIZE_LIMIT, SysLimit.DEFAULT.querySizeLimit(),
145-
PPL_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.subsearchLimit(),
146-
PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.DEFAULT.joinSubsearchLimit(),
150+
PPL_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.subsearchLimit(),
151+
PPL_JOIN_SUBSEARCH_MAXOUT, SysLimit.UNLIMITED_SUBSEARCH.joinSubsearchLimit(),
147152
CALCITE_ENGINE_ENABLED, true,
148153
PPL_REX_MAX_MATCH_LIMIT, 10));
149154

api/src/main/java/org/opensearch/sql/api/parser/SqlV2QueryParser.java

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,18 @@
55

66
package org.opensearch.sql.api.parser;
77

8+
import static org.opensearch.sql.ast.dsl.AstDSL.join;
9+
10+
import java.util.Optional;
811
import org.antlr.v4.runtime.tree.ParseTree;
12+
import org.opensearch.sql.ast.expression.UnresolvedExpression;
913
import org.opensearch.sql.ast.statement.Query;
1014
import org.opensearch.sql.ast.statement.Statement;
15+
import org.opensearch.sql.ast.tree.Join.JoinType;
1116
import org.opensearch.sql.ast.tree.UnresolvedPlan;
1217
import org.opensearch.sql.sql.antlr.SQLSyntaxParser;
18+
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser;
19+
import org.opensearch.sql.sql.antlr.parser.OpenSearchSQLParser.JoinClauseContext;
1320
import org.opensearch.sql.sql.parser.AstBuilder;
1421
import org.opensearch.sql.sql.parser.AstStatementBuilder;
1522

@@ -24,7 +31,8 @@ public UnresolvedPlan parse(String query) {
2431
ParseTree cst = syntaxParser.parse(query);
2532
AstStatementBuilder astStmtBuilder =
2633
new AstStatementBuilder(
27-
new AstBuilder(query), AstStatementBuilder.StatementBuilderContext.builder().build());
34+
new ExtendedAstBuilder(query),
35+
AstStatementBuilder.StatementBuilderContext.builder().build());
2836
Statement statement = cst.accept(astStmtBuilder);
2937

3038
if (statement instanceof Query) {
@@ -33,4 +41,33 @@ public UnresolvedPlan parse(String query) {
3341
throw new UnsupportedOperationException(
3442
"Only query statements are supported but got " + statement.getClass().getSimpleName());
3543
}
44+
45+
/**
46+
* Extends the V2 AstBuilder with JOIN support that the base AstBuilder rejects with
47+
* SyntaxCheckException to trigger legacy engine fallback.
48+
*/
49+
private static class ExtendedAstBuilder extends AstBuilder {
50+
51+
ExtendedAstBuilder(String query) {
52+
super(query);
53+
}
54+
55+
@Override
56+
public UnresolvedPlan visitJoinClause(JoinClauseContext ctx) {
57+
JoinType joinType = toJoinType(ctx);
58+
UnresolvedPlan right = visit(ctx.relation());
59+
Optional<UnresolvedExpression> condition =
60+
Optional.ofNullable(ctx.expression()).map(this::visitAstExpression);
61+
return join(right, joinType, condition);
62+
}
63+
64+
private JoinType toJoinType(JoinClauseContext ctx) {
65+
return switch (ctx.getStart().getType()) {
66+
case OpenSearchSQLParser.LEFT -> JoinType.LEFT;
67+
case OpenSearchSQLParser.RIGHT -> JoinType.RIGHT;
68+
case OpenSearchSQLParser.CROSS -> JoinType.CROSS;
69+
default -> JoinType.INNER;
70+
};
71+
}
72+
}
3673
}

api/src/test/java/org/opensearch/sql/api/UnifiedQueryContextTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,8 +30,8 @@ public void testContextCreationWithDefaults() {
3030
assertNotNull("PlanContext should be created", context.getPlanContext());
3131
assertNotNull("Settings should be created", context.getSettings());
3232
assertEquals(
33-
"Settings should have default system limits",
34-
SysLimit.DEFAULT,
33+
"Settings should have unlimited subsearch limits for clean logical plans",
34+
new SysLimit(SysLimit.DEFAULT.querySizeLimit(), 0, 0),
3535
SysLimit.fromSettings(context.getSettings()));
3636
assertEquals(
3737
"PPL_REX_MAX_MATCH_LIMIT default should be 10",

api/src/test/java/org/opensearch/sql/api/UnifiedQueryPlannerSqlV2Test.java

Lines changed: 84 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,19 @@
55

66
package org.opensearch.sql.api;
77

8+
import static org.apache.calcite.sql.type.SqlTypeName.INTEGER;
9+
import static org.apache.calcite.sql.type.SqlTypeName.VARCHAR;
10+
11+
import java.util.Map;
12+
import org.apache.calcite.schema.Table;
13+
import org.apache.calcite.schema.impl.AbstractSchema;
14+
import org.junit.Before;
815
import org.junit.Test;
916
import org.opensearch.sql.executor.QueryType;
1017

1118
/**
12-
* Tests for basic SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER
13-
* BY operations that produce valid RelNode plans.
19+
* Tests for SQL query planning through the V2 ANTLR parser path. Covers SELECT, WHERE, ORDER BY,
20+
* and JOIN operations that produce valid RelNode plans.
1421
*/
1522
public class UnifiedQueryPlannerSqlV2Test extends UnifiedQueryTestBase {
1623

@@ -19,17 +26,35 @@ protected QueryType queryType() {
1926
return QueryType.SQL;
2027
}
2128

22-
@Test
23-
public void selectStar() {
24-
givenQuery("SELECT * FROM catalog.employees")
25-
.assertPlan(
26-
"""
27-
LogicalTableScan(table=[[catalog, employees]])
28-
""");
29+
@Before
30+
@Override
31+
public void setUp() {
32+
testSchema =
33+
new AbstractSchema() {
34+
@Override
35+
protected Map<String, Table> getTableMap() {
36+
return Map.of(
37+
"employees", createEmployeesTable(),
38+
"departments", createDepartmentsTable());
39+
}
40+
};
41+
42+
context = contextBuilder().build();
43+
planner = new UnifiedQueryPlanner(context);
44+
}
45+
46+
private Table createDepartmentsTable() {
47+
return SimpleTable.builder()
48+
.col("dept_id", INTEGER)
49+
.col("dept_name", VARCHAR)
50+
.row(new Object[] {1, "Engineering"})
51+
.row(new Object[] {2, "Sales"})
52+
.row(new Object[] {3, "Marketing"})
53+
.build();
2954
}
3055

3156
@Test
32-
public void selectStarFields() {
57+
public void selectStar() {
3358
givenQuery("SELECT * FROM catalog.employees")
3459
.assertPlan(
3560
"""
@@ -68,4 +93,53 @@ public void testFilterAndOrderBy() {
6893
LogicalTableScan(table=[[catalog, employees]])
6994
""");
7095
}
96+
97+
@Test
98+
public void testJoinTypes() {
99+
Map.of("JOIN", "inner", "LEFT JOIN", "left", "RIGHT JOIN", "right")
100+
.forEach(
101+
(syntax, type) ->
102+
givenQuery(
103+
"""
104+
SELECT * FROM catalog.employees %s catalog.departments
105+
ON employees.department = departments.dept_name
106+
"""
107+
.formatted(syntax))
108+
.assertPlan(
109+
"""
110+
LogicalJoin(condition=[=($3, $5)], joinType=[%s])
111+
LogicalTableScan(table=[[catalog, employees]])
112+
LogicalTableScan(table=[[catalog, departments]])
113+
"""
114+
.formatted(type)));
115+
}
116+
117+
@Test
118+
public void testCrossJoin() {
119+
givenQuery("SELECT * FROM catalog.employees CROSS JOIN catalog.departments")
120+
.assertPlan(
121+
"""
122+
LogicalJoin(condition=[true], joinType=[inner])
123+
LogicalTableScan(table=[[catalog, employees]])
124+
LogicalTableScan(table=[[catalog, departments]])
125+
""");
126+
}
127+
128+
@Test
129+
public void testJoinWithFilterAndOrderBy() {
130+
givenQuery(
131+
"""
132+
SELECT * FROM catalog.employees JOIN catalog.departments
133+
ON employees.department = departments.dept_name
134+
WHERE employees.age > 30 ORDER BY employees.name
135+
""")
136+
.assertPlan(
137+
"""
138+
LogicalSort(sort0=[$1], dir0=[ASC-nulls-first])
139+
LogicalFilter(condition=[>($2, 30)])
140+
LogicalJoin(condition=[=($3, $5)], joinType=[inner])
141+
LogicalTableScan(table=[[catalog, employees]])
142+
LogicalTableScan(table=[[catalog, departments]])
143+
""");
144+
}
71145
}

core/src/main/java/org/opensearch/sql/ast/dsl/AstDSL.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
import org.opensearch.sql.ast.tree.FillNull;
6161
import org.opensearch.sql.ast.tree.Filter;
6262
import org.opensearch.sql.ast.tree.Head;
63+
import org.opensearch.sql.ast.tree.Join;
6364
import org.opensearch.sql.ast.tree.Limit;
6465
import org.opensearch.sql.ast.tree.MinSpanBin;
6566
import org.opensearch.sql.ast.tree.MvCombine;
@@ -757,4 +758,17 @@ public static Bin bin(UnresolvedExpression field, Argument... arguments) {
757758
public static Field implicitTimestampField() {
758759
return AstDSL.field(OpenSearchConstants.IMPLICIT_FIELD_TIMESTAMP);
759760
}
761+
762+
public static UnresolvedPlan join(
763+
UnresolvedPlan right, Join.JoinType joinType, Optional<UnresolvedExpression> condition) {
764+
return new Join(
765+
right,
766+
Optional.empty(),
767+
Optional.empty(),
768+
joinType,
769+
condition,
770+
new Join.JoinHint(),
771+
Optional.empty(),
772+
Argument.ArgumentMap.empty());
773+
}
760774
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
/*
2+
* Copyright OpenSearch Contributors
3+
* SPDX-License-Identifier: Apache-2.0
4+
*/
5+
6+
package org.opensearch.sql.legacy;
7+
8+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_DOG;
9+
import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_PEOPLE;
10+
import static org.opensearch.sql.util.MatcherUtils.rows;
11+
import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
12+
13+
import java.io.IOException;
14+
import org.json.JSONObject;
15+
import org.junit.Test;
16+
17+
/**
18+
* Sanity tests for the legacy SQL engine. Many legacy integration tests (JoinIT, SubqueryIT,
19+
* MultiQueryIT, etc.) are @Ignored because they assert on the deprecated JSON response format
20+
* removed in 3.0. These tests provide minimal coverage that the legacy engine still executes
21+
* correctly for queries that fall back from the V2 engine.
22+
*/
23+
public class SqlLegacyEngineSanityIT extends SQLIntegTestCase {
24+
25+
@Override
26+
protected void init() throws Exception {
27+
loadIndex(Index.DOG);
28+
loadIndex(Index.PEOPLE);
29+
}
30+
31+
@Test
32+
public void testInnerJoinFallback() throws IOException {
33+
JSONObject result =
34+
executeQuery(
35+
"SELECT a.firstname, d.dog_name FROM %s a JOIN %s d ON d.holdersName = a.firstname WHERE a.age > 35"
36+
.formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG));
37+
verifyDataRows(result, rows("Hattie", "snoopy"));
38+
}
39+
40+
@Test
41+
public void testLeftJoinFallback() throws IOException {
42+
JSONObject result =
43+
executeQuery(
44+
"SELECT a.firstname, d.dog_name FROM %s a LEFT JOIN %s d ON d.holdersName = a.firstname WHERE a.firstname = 'Daenerys'"
45+
.formatted(TEST_INDEX_PEOPLE, TEST_INDEX_DOG));
46+
verifyDataRows(result, rows("Daenerys", "rex"));
47+
}
48+
}

sql/src/main/antlr/OpenSearchSQLParser.g4

Lines changed: 22 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,20 @@ parser grammar OpenSearchSQLParser;
3232

3333

3434
options { tokenVocab = OpenSearchSQLLexer; }
35+
36+
@members {
37+
/**
38+
* Returns true if the next token is a join keyword that should not be consumed as a table alias.
39+
* LEFT/RIGHT are valid identifiers (text function names), so without this predicate ANTLR4
40+
* greedily consumes them as implicit table aliases (e.g., FROM t1 LEFT becomes alias='LEFT'
41+
* instead of starting a LEFT JOIN clause).
42+
*/
43+
private boolean isJoinKeyword() {
44+
int t = _input.LT(1).getType();
45+
return t == LEFT || t == RIGHT || t == INNER || t == CROSS || t == JOIN;
46+
}
47+
}
48+
3549
// Top Level Description
3650

3751
// Root rule
@@ -104,12 +118,18 @@ selectElement
104118
;
105119

106120
fromClause
107-
: FROM relation (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY
121+
: FROM relation joinClause* (whereClause)? (groupByClause)? (havingClause)? (orderByClause)? // Place it under FROM for now but actually not necessary ex. A UNION B ORDER BY
108122

109123
;
110124

125+
joinClause
126+
: (INNER | CROSS)? JOIN relation (ON expression)?
127+
| (LEFT | RIGHT) OUTER? JOIN relation (ON expression)?
128+
;
129+
111130
relation
112-
: tableName (AS? alias)? # tableAsRelation
131+
// The predicate guarantees only match implicit alias if next token is NOT a join keyword
132+
: tableName (AS alias | {!isJoinKeyword()}? alias)? # tableAsRelation
113133
| LR_BRACKET subquery = querySpecification RR_BRACKET AS? alias # subqueryAsRelation
114134
| qualifiedName LR_BRACKET tableFunctionArgs RR_BRACKET (AS? alias)? # tableFunctionRelation
115135
;

sql/src/main/java/org/opensearch/sql/sql/parser/AstBuilder.java

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,10 @@ public UnresolvedPlan visitLimitClause(OpenSearchSQLParser.LimitClauseContext ct
139139
public UnresolvedPlan visitFromClause(FromClauseContext ctx) {
140140
UnresolvedPlan result = visit(ctx.relation());
141141

142+
for (var joinCtx : ctx.joinClause()) {
143+
result = visit(joinCtx).attach(result);
144+
}
145+
142146
if (ctx.whereClause() != null) {
143147
result = visit(ctx.whereClause()).attach(result);
144148
}
@@ -250,6 +254,12 @@ public UnresolvedPlan visitWhereClause(WhereClauseContext ctx) {
250254
return new Filter(visitAstExpression(ctx.expression()));
251255
}
252256

257+
@Override
258+
public UnresolvedPlan visitJoinClause(OpenSearchSQLParser.JoinClauseContext ctx) {
259+
throw new SyntaxCheckException(
260+
"JOIN is not supported in the V2 SQL engine. Falling back to legacy engine.");
261+
}
262+
253263
@Override
254264
public UnresolvedPlan visitHavingClause(HavingClauseContext ctx) {
255265
AstHavingFilterBuilder builder = new AstHavingFilterBuilder(context.peek());
@@ -261,7 +271,11 @@ protected UnresolvedPlan aggregateResult(UnresolvedPlan aggregate, UnresolvedPla
261271
return nextResult != null ? nextResult : aggregate;
262272
}
263273

264-
private UnresolvedExpression visitAstExpression(ParseTree tree) {
274+
/**
275+
* Visit expression tree node and convert to UnresolvedExpression. Protected to allow subclass
276+
* access (e.g., ExtendedAstBuilder for join conditions).
277+
*/
278+
protected UnresolvedExpression visitAstExpression(ParseTree tree) {
265279
return expressionBuilder.visit(tree);
266280
}
267281

0 commit comments

Comments
 (0)