Skip to content

Commit 401c72e

Browse files
authored
Fix field not found issue in join output when column names are ambiguous (opensearch-project#3760)
* Fix field not found issue in join output when column names are ambiguous Signed-off-by: Lantao Jin <ltjin@amazon.com> * update join doc Signed-off-by: Lantao Jin <ltjin@amazon.com> --------- Signed-off-by: Lantao Jin <ltjin@amazon.com>
1 parent 2751320 commit 401c72e

12 files changed

Lines changed: 548 additions & 370 deletions

File tree

core/src/main/java/org/opensearch/sql/ast/tree/Join.java

Lines changed: 33 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,20 +19,49 @@
1919

2020
@ToString
2121
@Getter
22-
@RequiredArgsConstructor
2322
@EqualsAndHashCode(callSuper = false)
2423
public class Join extends UnresolvedPlan {
2524
private UnresolvedPlan left;
2625
private final UnresolvedPlan right;
27-
private final Optional<String> leftAlias;
28-
private final Optional<String> rightAlias;
26+
private Optional<String> leftAlias;
27+
private Optional<String> rightAlias;
2928
private final JoinType joinType;
3029
private final Optional<UnresolvedExpression> joinCondition;
3130
private final JoinHint joinHint;
3231

32+
public Join(
33+
UnresolvedPlan right,
34+
Optional<String> leftAlias,
35+
Optional<String> rightAlias,
36+
JoinType joinType,
37+
Optional<UnresolvedExpression> joinCondition,
38+
JoinHint joinHint) {
39+
this.right = right;
40+
this.leftAlias = leftAlias;
41+
this.rightAlias = rightAlias;
42+
this.joinType = joinType;
43+
this.joinCondition = joinCondition;
44+
this.joinHint = joinHint;
45+
}
46+
3347
@Override
3448
public UnresolvedPlan attach(UnresolvedPlan child) {
35-
this.left = leftAlias.isEmpty() ? child : new SubqueryAlias(leftAlias.get(), child);
49+
// attach child to left, meanwhile fill back side aliases if possible.
50+
if (leftAlias.isPresent()) {
51+
if (child instanceof SubqueryAlias alias) {
52+
this.left = new SubqueryAlias(leftAlias.get(), alias.getChild().getFirst());
53+
} else {
54+
this.left = new SubqueryAlias(leftAlias.get(), child);
55+
}
56+
} else {
57+
if (child instanceof SubqueryAlias alias) {
58+
leftAlias = Optional.of(alias.getAlias());
59+
}
60+
this.left = child;
61+
}
62+
if (rightAlias.isEmpty() && this.right instanceof SubqueryAlias alias) {
63+
rightAlias = Optional.of(alias.getAlias());
64+
}
3665
return this;
3766
}
3867

core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,8 @@
66
package org.opensearch.sql.calcite;
77

88
import static org.apache.calcite.sql.SqlKind.AS;
9+
import static org.opensearch.sql.ast.tree.Join.JoinType.ANTI;
10+
import static org.opensearch.sql.ast.tree.Join.JoinType.SEMI;
911
import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_FIRST;
1012
import static org.opensearch.sql.ast.tree.Sort.NullOrder.NULL_LAST;
1113
import static org.opensearch.sql.ast.tree.Sort.SortOption.DEFAULT_DESC;
@@ -646,8 +648,42 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) {
646648
node.getJoinCondition()
647649
.map(c -> rexVisitor.analyzeJoinCondition(c, context))
648650
.orElse(context.relBuilder.literal(true));
649-
context.relBuilder.join(
650-
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
651+
if (node.getJoinType() == SEMI || node.getJoinType() == ANTI) {
652+
// semi and anti join only return left table outputs
653+
context.relBuilder.join(
654+
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
655+
} else {
656+
// Join condition could contain duplicated column name, Calcite will rename the duplicated
657+
// column name with numeric suffix, e.g. ON t1.id = t2.id, the output contains `id` and `id0`
658+
// when a new project add to stack. To avoid `id0`, we will rename the `id0` to `alias.id`
659+
// or `tableIdentifier.id`:
660+
List<String> leftColumns = context.relBuilder.peek(1).getRowType().getFieldNames();
661+
List<String> rightColumns = context.relBuilder.peek().getRowType().getFieldNames();
662+
List<String> rightTableName =
663+
PlanUtils.findTable(context.relBuilder.peek()).getQualifiedName();
664+
// Using `table.column` instead of `catalog.database.table.column` as column prefix because
665+
// the schema for OpenSearch index is always `OpenSearch`. But if we reuse this logic in other
666+
// query engines, the column can only be searched in current schema namespace. For example,
667+
// If the plan convert to Spark plan, and there are two table1: database1.table1 and
668+
// database2.table1. The query with column `table1.id` can only be resolved in the namespace
669+
// of "database1". User should run `using database1` before the query which access `table1.id`
670+
String rightTableQualifiedName = rightTableName.getLast();
671+
// new columns with alias or table;
672+
List<String> rightColumnsWithAliasIfConflict =
673+
rightColumns.stream()
674+
.map(
675+
col ->
676+
leftColumns.contains(col)
677+
? node.getRightAlias()
678+
.map(a -> a + "." + col)
679+
.orElse(rightTableQualifiedName + "." + col)
680+
: col)
681+
.toList();
682+
context.relBuilder.join(
683+
JoinAndLookupUtils.translateJoinType(node.getJoinType()), joinCondition);
684+
JoinAndLookupUtils.renameToExpectedFields(
685+
rightColumnsWithAliasIfConflict, leftColumns.size(), context);
686+
}
651687
return context.relBuilder.peek();
652688
}
653689

core/src/main/java/org/opensearch/sql/calcite/CalciteRexNodeVisitor.java

Lines changed: 43 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@
77

88
import static java.util.Objects.requireNonNull;
99
import static org.apache.calcite.sql.SqlKind.AS;
10+
import static org.apache.commons.lang3.StringUtils.substringAfterLast;
1011
import static org.opensearch.sql.ast.expression.SpanUnit.NONE;
1112
import static org.opensearch.sql.ast.expression.SpanUnit.UNKNOWN;
1213
import static org.opensearch.sql.calcite.utils.OpenSearchTypeFactory.TYPE_FACTORY;
@@ -279,7 +280,24 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context
279280
}
280281
} else if (parts.size() == 2) {
281282
// 1.2 Handle the case of `t1.id = t2.id` or `alias1.id = alias2.id`
282-
return context.relBuilder.field(2, parts.get(0), parts.get(1));
283+
try {
284+
return context.relBuilder.field(2, parts.get(0), parts.get(1));
285+
} catch (IllegalArgumentException e) {
286+
// Similar to the step 2.3.
287+
List<String> candidates =
288+
context.relBuilder.peek(1).getRowType().getFieldNames().stream()
289+
.filter(col -> substringAfterLast(col, ".").equals(parts.getLast()))
290+
.toList();
291+
for (String candidate : candidates) {
292+
try {
293+
// field("nation2", "n2.n_name"); // pass
294+
return context.relBuilder.field(2, parts.get(0), candidate);
295+
} catch (IllegalArgumentException e1) {
296+
// field("nation2", "n_name"); // do nothing when fail (n_name is field of nation1)
297+
}
298+
}
299+
throw new UnsupportedOperationException("Unsupported qualified name: " + node);
300+
}
283301
} else if (parts.size() == 3) {
284302
throw new UnsupportedOperationException("Unsupported qualified name: " + node);
285303
}
@@ -294,21 +312,43 @@ public RexNode visitQualifiedName(QualifiedName node, CalcitePlanContext context
294312
List<String> currentFields = context.relBuilder.peek().getRowType().getFieldNames();
295313
if (currentFields.contains(qualifiedName)) {
296314
// 2.1 resolve QualifiedName from stack top
315+
// Note: QualifiedName with multiple parts also could be applied in step 2.1,
316+
// for example `n2.n_name` or `nation2.n_name` in the output of join can be resolved here.
297317
return context.relBuilder.field(qualifiedName);
298318
} else if (node.getParts().size() == 2) {
299319
// 2.2 resolve QualifiedName with an alias or table name
300320
List<String> parts = node.getParts();
301321
try {
302322
return context.relBuilder.field(1, parts.get(0), parts.get(1));
303323
} catch (IllegalArgumentException e) {
304-
// 2.3 resolve QualifiedName with outer alias
324+
// 2.3 For field which renamed with <alias.field>, to resolve the field with table
325+
// identifier
326+
// `nation2.n_name`,
327+
// we convert it to resolve <table.alias.field>, e.g. `nation2.n2.n_name`
328+
// `n2.n_name` was the renamed field name from the duplicated field `(nation2.)n_name0` of
329+
// join output.
330+
// Build the candidates which contains `n_name`: e.g. `(nation1.)n_name`, `n2.n_name`
331+
List<String> candidates =
332+
context.relBuilder.peek().getRowType().getFieldNames().stream()
333+
.filter(col -> substringAfterLast(col, ".").equals(parts.getLast()))
334+
.toList();
335+
for (String candidate : candidates) {
336+
try {
337+
// field("nation2", "n2.n_name"); // pass
338+
return context.relBuilder.field(parts.get(0), candidate);
339+
} catch (IllegalArgumentException e1) {
340+
// field("nation2", "n_name"); // do nothing when fail (n_name is field of nation1)
341+
}
342+
}
343+
// 2.4 resolve QualifiedName with outer alias
344+
// check existing of parts.get(0)
305345
return context
306346
.peekCorrelVar()
307347
.map(correlVar -> context.relBuilder.field(correlVar, parts.get(1)))
308348
.orElseThrow(() -> e); // Re-throw the exception if no correlated variable exists
309349
}
310350
} else if (currentFields.stream().noneMatch(f -> f.startsWith(qualifiedName))) {
311-
// 2.4 try resolving combination of 2.1 and 2.3 to resolve rest cases
351+
// 2.5 try resolving combination of 2.1 and 2.4 to resolve rest cases
312352
return context
313353
.peekCorrelVar()
314354
.map(correlVar -> context.relBuilder.field(correlVar, qualifiedName))

core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,19 @@
1515
import java.util.ArrayList;
1616
import java.util.List;
1717
import javax.annotation.Nullable;
18+
import org.apache.calcite.plan.RelOptTable;
19+
import org.apache.calcite.rel.RelHomogeneousShuttle;
20+
import org.apache.calcite.rel.RelNode;
21+
import org.apache.calcite.rel.RelShuttle;
22+
import org.apache.calcite.rel.core.TableScan;
1823
import org.apache.calcite.rex.RexInputRef;
1924
import org.apache.calcite.rex.RexNode;
2025
import org.apache.calcite.rex.RexVisitorImpl;
2126
import org.apache.calcite.rex.RexWindowBound;
2227
import org.apache.calcite.sql.fun.SqlStdOperatorTable;
2328
import org.apache.calcite.sql.type.SqlTypeName;
2429
import org.apache.calcite.tools.RelBuilder;
30+
import org.apache.calcite.util.Util;
2531
import org.opensearch.sql.ast.AbstractNodeVisitor;
2632
import org.opensearch.sql.ast.Node;
2733
import org.opensearch.sql.ast.expression.IntervalUnit;
@@ -283,6 +289,25 @@ public Relation visitRelation(Relation node, Object context) {
283289
return node.getChild().getFirst().accept(relationVisitor, null);
284290
}
285291

292+
/** Similar to {@link org.apache.calcite.plan.RelOptUtil#findTable(RelNode, String) } */
293+
static RelOptTable findTable(RelNode root) {
294+
try {
295+
RelShuttle visitor =
296+
new RelHomogeneousShuttle() {
297+
@Override
298+
public RelNode visit(TableScan scan) {
299+
final RelOptTable scanTable = scan.getTable();
300+
throw new Util.FoundOne(scanTable);
301+
}
302+
};
303+
root.accept(visitor);
304+
return null;
305+
} catch (Util.FoundOne e) {
306+
Util.swallow(e, null);
307+
return (RelOptTable) e.getNode();
308+
}
309+
}
310+
286311
/**
287312
* Transform plan to attach specified child to the first leaf node.
288313
*

0 commit comments

Comments
 (0)