Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1889,10 +1889,23 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) {
}
} else {
// The join-with-criteria grammar doesn't allow empty join condition
RexNode joinCondition =
node.getJoinCondition()
.map(c -> rexVisitor.analyzeJoinCondition(c, context))
.orElse(context.relBuilder.literal(true));
RexNode joinCondition;
Comment thread
RyanL1997 marked this conversation as resolved.
Optional<List<String>> bareFields =
JoinAndLookupUtils.collectBareFields(node.getJoinCondition().get());
if (bareFields.isPresent()) {
// Bare-field shorthand `on a [AND b ...]`. Resolving by stack position rather than by
// qualifier keeps a self-join `join on f t` a real cross-scan equi-join, not f = f.
joinCondition =
bareFields.get().stream()
.map(f -> buildJoinConditionByFieldName(context, f))
.reduce(context.rexBuilder::and)
.orElse(context.relBuilder.literal(true));
} else {
joinCondition =
node.getJoinCondition()
.map(c -> rexVisitor.analyzeJoinCondition(c, context))
.orElse(context.relBuilder.literal(true));
}
if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) {
// semi and anti join only return left table outputs
context.relBuilder.join(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,15 @@
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.util.Pair;
import org.opensearch.sql.ast.expression.And;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;
import org.opensearch.sql.ast.tree.Join;
import org.opensearch.sql.ast.tree.Lookup;
import org.opensearch.sql.calcite.CalcitePlanContext;
Expand All @@ -37,6 +42,32 @@ static JoinRelType translateJoinType(Join.JoinType joinType) {
}
}

/**
* Collects the names of bare single-part-field join criteria (e.g. {@code on a AND b} -> {@code
* Optional.of(["a","b"])}). Returns empty for anything else (qualified field, comparison, OR).
* The list is only constructed when every node in the AND-tree is a bare field.
*/
static Optional<List<String>> collectBareFields(UnresolvedExpression expr) {
if (expr instanceof And and) {
return collectBareFields(and.getLeft())
.flatMap(
left ->
collectBareFields(and.getRight())
.map(
right -> {
List<String> merged = new ArrayList<>(left);
merged.addAll(right);
return merged;
}));
}
if (expr instanceof Field field
&& field.getField() instanceof QualifiedName qn
&& qn.getPrefix().isEmpty()) {
return Optional.of(new ArrayList<>(List.of(qn.getSuffix())));
}
return Optional.empty();
}

/* ------For Lookup------ */

/**
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
/*
* Copyright OpenSearch Contributors
* SPDX-License-Identifier: Apache-2.0
*/

package org.opensearch.sql.calcite.utils;

import static org.junit.jupiter.api.Assertions.assertEquals;
import static org.junit.jupiter.api.Assertions.assertTrue;

import java.util.List;
import java.util.Optional;
import org.junit.jupiter.api.Test;
import org.opensearch.sql.ast.expression.And;
import org.opensearch.sql.ast.expression.Compare;
import org.opensearch.sql.ast.expression.Field;
import org.opensearch.sql.ast.expression.QualifiedName;
import org.opensearch.sql.ast.expression.UnresolvedExpression;

public class JoinAndLookupUtilsTest {

@Test
public void collectBareFields_singleField() {
UnresolvedExpression expr = bareField("a");
Optional<List<String>> result = JoinAndLookupUtils.collectBareFields(expr);
assertTrue(result.isPresent());
assertEquals(List.of("a"), result.get());
}

@Test
public void collectBareFields_multipleFieldsAndChain() {
UnresolvedExpression expr = new And(bareField("a"), new And(bareField("b"), bareField("c")));
Optional<List<String>> result = JoinAndLookupUtils.collectBareFields(expr);
assertTrue(result.isPresent());
assertEquals(List.of("a", "b", "c"), result.get());
}

@Test
public void collectBareFields_mixedConditionReturnsEmpty() {
// `a AND l.x = r.x` — the Compare node makes this not all-bare-fields
UnresolvedExpression compare =
new Compare("=", qualifiedField("l", "x"), qualifiedField("r", "x"));
UnresolvedExpression expr = new And(bareField("a"), compare);
Optional<List<String>> result = JoinAndLookupUtils.collectBareFields(expr);
assertTrue(result.isEmpty());
}

@Test
public void collectBareFields_qualifiedFieldReturnsEmpty() {
// A two-part name like `t.x` is not a bare field
UnresolvedExpression expr = qualifiedField("t", "x");
Optional<List<String>> result = JoinAndLookupUtils.collectBareFields(expr);
assertTrue(result.isEmpty());
}

@Test
public void collectBareFields_mixedLeftSideReturnsEmptyNoPartialState() {
// Compare on left, bare field on right — must return empty (not ["b"])
UnresolvedExpression compare = new Compare("=", bareField("x"), bareField("y"));
UnresolvedExpression expr = new And(compare, bareField("b"));
Optional<List<String>> result = JoinAndLookupUtils.collectBareFields(expr);
assertTrue(result.isEmpty());
}

private static Field bareField(String name) {
return new Field(QualifiedName.of(name));
}

private static Field qualifiedField(String qualifier, String name) {
return new Field(QualifiedName.of(qualifier, name));
}
}
7 changes: 5 additions & 2 deletions docs/user/ppl/cmd/join.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,9 @@ source = table1 | left anti join left = l right = r on l.a = r.a table2
source = table1 | join left = l right = r [ source = table2 | where d > 10 | head 5 ]
source = table1 | inner join on table1.a = table2.a table2 | fields table1.a, table2.a, table1.b, table1.c
source = table1 | inner join on a = c table2 | fields a, b, c, d
source = table1 | inner join on a table2 | fields a, table2.a, b, c
source = table1 | inner join on a AND b table2 | fields a, table2.a, b, table2.b
source = table1 | join on a table2 | fields a, table2.a, b, c
source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields l.a, r.a
source = table1 as t1 | join left = l right = r on l.a = r.a table2 as t2 | fields t1.a, t2.a
source = table1 | join left = l right = r on l.a = r.a [ source = table2 ] as s | fields l.a, s.a
Expand All @@ -40,7 +43,7 @@ The basic `join` syntax supports the following parameters.

| Parameter | Required/Optional | Description |
| --- | --- | --- |
| `<joinCriteria>` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. |
| `<joinCriteria>` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. A bare field name `f` (or `f AND g ...`) is shorthand for an equi-join on the common field(s) and keeps both key columns. |
| `<right-dataset>` | Required | The right dataset, which can be an index or a subsearch, with or without an alias. |
| `joinType` | Optional | The type of join to perform. Valid values are `left`, `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. |
| `left` | Optional | An alias for the left dataset (typically a subsearch) used to avoid ambiguous field names. Specify as `left = <leftAlias>`. |
Expand Down Expand Up @@ -71,7 +74,7 @@ The extended `join` syntax supports the following parameters.

| Parameter | Required/Optional | Description |
| --- | --- | --- |
| `<joinCriteria>` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. |
| `<joinCriteria>` | Required | A comparison expression specifying how to join the datasets. Must be placed after the `on` or `where` keyword in the query. A bare field name `f` (or `f AND g ...`) is shorthand for an equi-join on the common field(s); it keeps both key columns and differs from `<join-field-list>`, which merges duplicates. |
| `<right-dataset>` | Required | The right dataset, which can be an index or a subsearch, with or without an alias. |
| `type` | Optional | The join type when using extended syntax. Valid values are `left`, `outer` (same as `left`), `semi`, `anti`, and performance-sensitive types (`right`, `full`, and `cross`). Default is `inner`. |
| `<join-field-list>` | Optional | A list of fields used to build the join criteria. These fields must exist in both datasets. If not specified, all fields common to both datasets are used as join keys. |
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -928,6 +928,109 @@ public void testJoinWhenLegacyNotPreferred() throws IOException {
});
}

@Test
public void testJoinWithImplicitField() throws IOException {
// Keep-both, so co-named columns (other than the key) resolve to the left side.
JSONObject actual =
executeQuery(
String.format(
"source=%s | inner join on name %s | fields name, age, state, country, occupation,"
+ " salary | sort name, occupation",
TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION));
verifySchema(
actual,
schema("name", "string"),
schema("age", "int"),
schema("state", "string"),
schema("country", "string"),
schema("occupation", "string"),
schema("salary", "int"));
// Rows are listed in the `sort name, occupation` order so verifyDataRowsInOrder also
// validates the sort (David/Doctor < David/Unemployed, then Hello, Jake, Jane, John).
verifyDataRowsInOrder(
actual,
rows("David", 40, "Washington", "USA", "Doctor", 120000),
rows("David", 40, "Washington", "USA", "Unemployed", 0),
rows("Hello", 30, "New York", "USA", "Artist", 70000),
rows("Jake", 70, "California", "USA", "Engineer", 100000),
rows("Jane", 20, "Quebec", "Canada", "Scientist", 90000),
rows("John", 25, "Ontario", "Canada", "Doctor", 120000));

// The shorthand and the explicit qualified-equality form are output-identical.
JSONObject explicitForm =
executeQuery(
String.format(
"source=%s | inner join on %s.name = %s.name %s | fields name, age, state, country,"
+ " occupation, salary | sort name, occupation",
TEST_INDEX_STATE_COUNTRY,
TEST_INDEX_STATE_COUNTRY,
TEST_INDEX_OCCUPATION,
TEST_INDEX_OCCUPATION));
assertJsonEquals(explicitForm.toString(), actual.toString());
}

@Test
public void testJoinNoPrefixImplicitField() throws IOException {
// No-prefix `join on name` matches the explicit qualified form, not the field-list `join name`.
JSONObject actual =
executeQuery(
String.format(
"source=%s | join on name %s | fields name, age, state, occupation, salary | sort"
+ " name, occupation",
TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION));
JSONObject explicitForm =
executeQuery(
String.format(
"source=%s | join on %s.name = %s.name %s | fields name, age, state, occupation,"
+ " salary | sort name, occupation",
TEST_INDEX_STATE_COUNTRY,
TEST_INDEX_STATE_COUNTRY,
TEST_INDEX_OCCUPATION,
TEST_INDEX_OCCUPATION));
assertJsonEquals(explicitForm.toString(), actual.toString());
}

@Test
public void testLeftJoinWithImplicitField() throws IOException {
// Keep-both does not coalesce, so unmatched-left rows keep the non-null left key, not a NULL.
JSONObject actual =
executeQuery(
String.format(
"source=%s | left join on name %s | fields name, age, state, occupation, salary",
TEST_INDEX_STATE_COUNTRY, TEST_INDEX_OCCUPATION));
verifySchema(
actual,
schema("name", "string"),
schema("age", "int"),
schema("state", "string"),
schema("occupation", "string"),
schema("salary", "int"));
verifyDataRows(
actual,
rows("Jake", 70, "California", "Engineer", 100000),
rows("Hello", 30, "New York", "Artist", 70000),
rows("John", 25, "Ontario", "Doctor", 120000),
rows("Jane", 20, "Quebec", "Scientist", 90000),
rows("David", 40, "Washington", "Doctor", 120000),
rows("David", 40, "Washington", "Unemployed", 0),
rows("Jim", 27, "B.C", null, null),
rows("Peter", 57, "B.C", null, null),
rows("Rick", 70, "B.C", null, null));
}

@Test
public void testAliasedSelfJoinWithImplicitField() throws IOException {
// Self-join on the unique `name`: a real equi-join yields 8 rows, not the 64-row cross product
// a tautology would give.
JSONObject actual =
executeQuery(
String.format(
"source=%s | inner join left=l right=r on name %s | fields name, r.name",
TEST_INDEX_STATE_COUNTRY, TEST_INDEX_STATE_COUNTRY));
verifyNumOfRows(actual, 8);
verifySchema(actual, schema("name", "string"), schema("r.name", "string"));
}

@Test
public void testJoinComparing() throws IOException {
JSONObject actual =
Expand Down
5 changes: 3 additions & 2 deletions ppl/src/main/antlr/OpenSearchPPLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -729,8 +729,9 @@ sourceFilterArg

// join
joinCommand
: JOIN (joinOption)* (fieldList)? right = tableOrSubqueryClause
| sqlLikeJoinType? JOIN (joinOption)* sideAlias joinHintList? joinCriteria right = tableOrSubqueryClause
// Criteria alt listed first - so `join on a` reads `on` as the criteria keyword, not a field.
: sqlLikeJoinType? JOIN (joinOption)* sideAlias joinHintList? joinCriteria right = tableOrSubqueryClause
Comment thread
RyanL1997 marked this conversation as resolved.
| JOIN (joinOption)* (fieldList)? right = tableOrSubqueryClause
;

sqlLikeJoinType
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -326,6 +326,8 @@ public UnresolvedPlan visitJoinCommand(OpenSearchPPLParser.JoinCommandContext ct
if (ctx.fieldList() != null) {
joinFields = Optional.of(getFieldList(ctx.fieldList()));
}
// Keep a bare `on <field>` criteria verbatim; the planner expands it to an equi-join. Folding
// it into joinFields here would instead merge the key into one column.
return new Join(
projectExceptMeta(right),
leftAlias,
Expand Down
Loading
Loading