Skip to content

Commit 328ee1b

Browse files
committed
Decorrelate subqueries before handing the plan to the analytics engine
PPL queries that contain EXISTS / IN / SOME / ANY subqueries (and the `subsearch` shapes that lower to them) currently crash on the analytics-engine route with: IllegalStateException: Unrecognized filter operator [EXISTS / EXISTS] The analytics-engine's OpenSearchFilterRule resolves every leaf predicate through ScalarFunction.fromSqlOperatorWithFallback, which doesn't (and shouldn't) cover RexSubQuery operators. The legacy engine path doesn't hit this because Calcite's RelRunner.prepareStatement internally runs SubQueryRemoveRule + decorrelation before physical conversion — the analytics path skips that pipeline entirely. Fix: run the three SubQueryToCorrelate rules followed by RelDecorrelator.decorrelateQuery on the RelNode just before handing it to QueryPlanExecutor.execute (and the same in explain()). Placed on AnalyticsExecutionEngine specifically so the legacy plan flow stays byte-for-byte unchanged. After: 19 -> 13 failures on CalcitePPLExistsSubqueryIT. The "Unrecognized filter operator" is gone from every test (0/19 of that signature remain); the remaining 13 failures are AssertionError result mismatches that point at downstream analytics-engine semantics for LEFT_ANTI / bare-group-by Aggregate inside the decorrelated join, which are unrelated to subquery removal and need separate work. Signed-off-by: Jialiang Liang <jialianl@amazon.com> Signed-off-by: Jialiang Liang <jiallian@amazon.com>
1 parent c4cac2a commit 328ee1b

1 file changed

Lines changed: 36 additions & 2 deletions

File tree

core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java

Lines changed: 36 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,9 +10,14 @@
1010
import java.util.List;
1111
import java.util.Map;
1212
import org.apache.calcite.plan.RelOptUtil;
13+
import org.apache.calcite.plan.hep.HepPlanner;
14+
import org.apache.calcite.plan.hep.HepProgram;
15+
import org.apache.calcite.plan.hep.HepProgramBuilder;
1316
import org.apache.calcite.rel.RelNode;
17+
import org.apache.calcite.rel.rules.CoreRules;
1418
import org.apache.calcite.rel.type.RelDataType;
1519
import org.apache.calcite.rel.type.RelDataTypeField;
20+
import org.apache.calcite.sql2rel.RelDecorrelator;
1621
import org.opensearch.analytics.exec.QueryPlanExecutor;
1722
import org.opensearch.core.action.ActionListener;
1823
import org.opensearch.sql.ast.statement.ExplainMode;
@@ -77,15 +82,16 @@ public void execute(
7782
// taken by SimpleJsonResponseFormatter sees the correct value.
7883
ProfileMetric execMetric = QueryProfiling.current().getOrCreateMetric(MetricName.EXECUTE);
7984
long execStart = System.nanoTime();
85+
final RelNode executablePlan = removeSubQueries(plan, context);
8086

8187
planExecutor.execute(
82-
plan,
88+
executablePlan,
8389
null,
8490
new ActionListener<>() {
8591
@Override
8692
public void onResponse(Iterable<Object[]> rows) {
8793
try {
88-
List<RelDataTypeField> fields = plan.getRowType().getFieldList();
94+
List<RelDataTypeField> fields = executablePlan.getRowType().getFieldList();
8995
List<ExprValue> results = convertRows(rows, fields);
9096
Schema schema = buildSchema(fields);
9197
execMetric.set(System.nanoTime() - execStart);
@@ -109,6 +115,7 @@ public void explain(
109115
CalcitePlanContext context,
110116
ResponseListener<ExplainResponse> listener) {
111117
try {
118+
plan = removeSubQueries(plan, context);
112119
String logical = RelOptUtil.toString(plan, mode.toExplainLevel());
113120
ExplainResponse response =
114121
new ExplainResponse(new ExplainResponseNodeV2(logical, null, null));
@@ -148,4 +155,31 @@ private ExprType convertType(RelDataType type) {
148155
return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN;
149156
}
150157
}
158+
159+
/**
160+
* Converts every {@link org.apache.calcite.rex.RexSubQuery} in the plan to a {@code
161+
* LogicalCorrelate}, then decorrelates back to standard join shapes (LEFT_SEMI / LEFT_ANTI /
162+
* INNER). The analytics-engine planner has no RexSubQuery handler — it routes filter / project
163+
* expressions through a {@code ScalarFunction} table that doesn't include EXISTS / IN / SOME /
164+
* ANY operators, so an un-removed subquery surfaces as {@code "Unrecognized filter operator
165+
* [EXISTS / EXISTS]"} at the filter rule. The legacy engine path picks this up implicitly inside
166+
* Calcite's {@code RelRunner.prepareStatement}, which is skipped on the analytics route.
167+
*
168+
* <p>Scoped to the analytics path only — placing this on {@link AnalyticsExecutionEngine} (rather
169+
* than on the central {@code UnifiedQueryPlanner}) keeps the legacy path's RelNode pipeline
170+
* untouched.
171+
*/
172+
private static final HepProgram SUBQUERY_REMOVE_PROGRAM =
173+
new HepProgramBuilder()
174+
.addRuleInstance(CoreRules.FILTER_SUB_QUERY_TO_CORRELATE)
175+
.addRuleInstance(CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE)
176+
.addRuleInstance(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE)
177+
.build();
178+
179+
private static RelNode removeSubQueries(RelNode plan, CalcitePlanContext context) {
180+
HepPlanner planner = new HepPlanner(SUBQUERY_REMOVE_PROGRAM);
181+
planner.setRoot(plan);
182+
RelNode withCorrelates = planner.findBestExp();
183+
return RelDecorrelator.decorrelateQuery(withCorrelates, context.relBuilder);
184+
}
151185
}

0 commit comments

Comments
 (0)