From 328ee1ba70a8dd5573b34d17bf1b2185885e5ee3 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 21 May 2026 14:33:04 -0700 Subject: [PATCH 1/2] Decorrelate subqueries before handing the plan to the analytics engine MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PPL queries that contain EXISTS / IN / SOME / ANY subqueries (and the `subsearch` shapes that lower to them) currently crash on the analytics-engine route with: IllegalStateException: Unrecognized filter operator [EXISTS / EXISTS] The analytics-engine's OpenSearchFilterRule resolves every leaf predicate through ScalarFunction.fromSqlOperatorWithFallback, which doesn't (and shouldn't) cover RexSubQuery operators. The legacy engine path doesn't hit this because Calcite's RelRunner.prepareStatement internally runs SubQueryRemoveRule + decorrelation before physical conversion — the analytics path skips that pipeline entirely. Fix: run the three SubQueryToCorrelate rules followed by RelDecorrelator.decorrelateQuery on the RelNode just before handing it to QueryPlanExecutor.execute (and the same in explain()). Placed on AnalyticsExecutionEngine specifically so the legacy plan flow stays byte-for-byte unchanged. After: 19 -> 13 failures on CalcitePPLExistsSubqueryIT. The "Unrecognized filter operator" is gone from every test (0/19 of that signature remain); the remaining 13 failures are AssertionError result mismatches that point at downstream analytics-engine semantics for LEFT_ANTI / bare-group-by Aggregate inside the decorrelated join, which are unrelated to subquery removal and need separate work. Signed-off-by: Jialiang Liang Signed-off-by: Jialiang Liang --- .../analytics/AnalyticsExecutionEngine.java | 38 ++++++++++++++++++- 1 file changed, 36 insertions(+), 2 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java index ddfe5fd3556..c5ab238a5e8 100644 --- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -10,9 +10,14 @@ import java.util.List; import java.util.Map; import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rules.CoreRules; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.sql2rel.RelDecorrelator; import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.core.action.ActionListener; import org.opensearch.sql.ast.statement.ExplainMode; @@ -77,15 +82,16 @@ public void execute( // taken by SimpleJsonResponseFormatter sees the correct value. ProfileMetric execMetric = QueryProfiling.current().getOrCreateMetric(MetricName.EXECUTE); long execStart = System.nanoTime(); + final RelNode executablePlan = removeSubQueries(plan, context); planExecutor.execute( - plan, + executablePlan, null, new ActionListener<>() { @Override public void onResponse(Iterable rows) { try { - List fields = plan.getRowType().getFieldList(); + List fields = executablePlan.getRowType().getFieldList(); List results = convertRows(rows, fields); Schema schema = buildSchema(fields); execMetric.set(System.nanoTime() - execStart); @@ -109,6 +115,7 @@ public void explain( CalcitePlanContext context, ResponseListener listener) { try { + plan = removeSubQueries(plan, context); String logical = RelOptUtil.toString(plan, mode.toExplainLevel()); ExplainResponse response = new ExplainResponse(new ExplainResponseNodeV2(logical, null, null)); @@ -148,4 +155,31 @@ private ExprType convertType(RelDataType type) { return org.opensearch.sql.data.type.ExprCoreType.UNKNOWN; } } + + /** + * Converts every {@link org.apache.calcite.rex.RexSubQuery} in the plan to a {@code + * LogicalCorrelate}, then decorrelates back to standard join shapes (LEFT_SEMI / LEFT_ANTI / + * INNER). The analytics-engine planner has no RexSubQuery handler — it routes filter / project + * expressions through a {@code ScalarFunction} table that doesn't include EXISTS / IN / SOME / + * ANY operators, so an un-removed subquery surfaces as {@code "Unrecognized filter operator + * [EXISTS / EXISTS]"} at the filter rule. The legacy engine path picks this up implicitly inside + * Calcite's {@code RelRunner.prepareStatement}, which is skipped on the analytics route. + * + *

Scoped to the analytics path only — placing this on {@link AnalyticsExecutionEngine} (rather + * than on the central {@code UnifiedQueryPlanner}) keeps the legacy path's RelNode pipeline + * untouched. + */ + private static final HepProgram SUBQUERY_REMOVE_PROGRAM = + new HepProgramBuilder() + .addRuleInstance(CoreRules.FILTER_SUB_QUERY_TO_CORRELATE) + .addRuleInstance(CoreRules.PROJECT_SUB_QUERY_TO_CORRELATE) + .addRuleInstance(CoreRules.JOIN_SUB_QUERY_TO_CORRELATE) + .build(); + + private static RelNode removeSubQueries(RelNode plan, CalcitePlanContext context) { + HepPlanner planner = new HepPlanner(SUBQUERY_REMOVE_PROGRAM); + planner.setRoot(plan); + RelNode withCorrelates = planner.findBestExp(); + return RelDecorrelator.decorrelateQuery(withCorrelates, context.relBuilder); + } } From 7de621566618efa402eb74e5f221ea7079e5e44b Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Thu, 21 May 2026 14:48:24 -0700 Subject: [PATCH 2/2] Add containsSubQuery fast-path + unit tests on the discriminator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Without the fast-path, the rewrite path's HepPlanner.setRoot(plan) calls .getCluster() on the input — which is null on the mocked RelNodes the existing AnalyticsExecutionEngineTest passes through execute(). Six of the suite's nine tests started failing with 'NullPointerException: cluster' after this PR's first commit. Add a containsSubQuery(RelNode) pre-check that walks the plan with a RelHomogeneousShuttle + RexShuttle, returns true the moment it sees a RexSubQuery, and false otherwise. The rewrite chain only runs when the discriminator says there's something to remove — restores compatibility with the existing mock-based tests and skips HEP entirely for the typical query that has no subqueries. Adds two focused unit tests on the discriminator (package-private so the test can call it directly): * containsSubQuery_falseForPlanWithoutSubQuery — vanilla mocked plan reports false, preventing the HEP/decorrelator path from ever firing on cluster-less mocks. * containsSubQuery_trueWhenRelNodeExposesRexSubQueryDuringTraversal — stub the RelNode.accept(RexShuttle) to surface a RexSubQuery and verify it's detected. The HEP+decorrelator chain itself is Calcite-tested; the end-to-end behavior is covered by CalcitePPLExistsSubqueryIT (0/19 -> 6/19 pass, zero remaining 'Unrecognized filter operator' failures). Signed-off-by: Jialiang Liang Signed-off-by: Jialiang Liang --- .../analytics/AnalyticsExecutionEngine.java | 32 +++++++++++ .../AnalyticsExecutionEngineTest.java | 53 +++++++++++++++++++ 2 files changed, 85 insertions(+) diff --git a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java index c5ab238a5e8..d802c011a48 100644 --- a/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java +++ b/core/src/main/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngine.java @@ -13,10 +13,13 @@ import org.apache.calcite.plan.hep.HepPlanner; import org.apache.calcite.plan.hep.HepProgram; import org.apache.calcite.plan.hep.HepProgramBuilder; +import org.apache.calcite.rel.RelHomogeneousShuttle; import org.apache.calcite.rel.RelNode; import org.apache.calcite.rel.rules.CoreRules; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeField; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.sql2rel.RelDecorrelator; import org.opensearch.analytics.exec.QueryPlanExecutor; import org.opensearch.core.action.ActionListener; @@ -177,9 +180,38 @@ private ExprType convertType(RelDataType type) { .build(); private static RelNode removeSubQueries(RelNode plan, CalcitePlanContext context) { + // Fast-path: skip HEP + decorrelation entirely if the plan has no RexSubQuery. Avoids HEP + // overhead for the typical query and keeps the rewrite a no-op for callers (incl. unit tests) + // whose RelNodes don't have a real RelOptCluster wired up. + if (!containsSubQuery(plan)) { + return plan; + } HepPlanner planner = new HepPlanner(SUBQUERY_REMOVE_PROGRAM); planner.setRoot(plan); RelNode withCorrelates = planner.findBestExp(); return RelDecorrelator.decorrelateQuery(withCorrelates, context.relBuilder); } + + /** Walks {@code plan} and returns {@code true} as soon as any {@link RexSubQuery} is found. */ + static boolean containsSubQuery(RelNode plan) { + boolean[] found = {false}; + RexShuttle rexFinder = + new RexShuttle() { + @Override + public org.apache.calcite.rex.RexNode visitSubQuery(RexSubQuery sub) { + found[0] = true; + return sub; + } + }; + plan.accept( + new RelHomogeneousShuttle() { + @Override + public RelNode visit(RelNode node) { + if (found[0]) return node; + node.accept(rexFinder); + return found[0] ? node : super.visit(node); + } + }); + return found[0]; + } } diff --git a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java index 4de596fb375..9fcd7fa1d14 100644 --- a/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java +++ b/core/src/test/java/org/opensearch/sql/executor/analytics/AnalyticsExecutionEngineTest.java @@ -6,6 +6,7 @@ package org.opensearch.sql.executor.analytics; import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; import static org.junit.jupiter.api.Assertions.assertNotNull; import static org.junit.jupiter.api.Assertions.assertTrue; import static org.mockito.ArgumentMatchers.any; @@ -20,9 +21,12 @@ import java.util.concurrent.atomic.AtomicReference; import java.util.stream.Collectors; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelShuttle; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rel.type.RelDataTypeFactory; import org.apache.calcite.rel.type.RelDataTypeSystem; +import org.apache.calcite.rex.RexShuttle; +import org.apache.calcite.rex.RexSubQuery; import org.apache.calcite.sql.type.SqlTypeFactoryImpl; import org.apache.calcite.sql.type.SqlTypeName; import org.junit.jupiter.api.BeforeEach; @@ -276,6 +280,55 @@ void physicalPlanExplain_callsOnFailure() { + errorRef.get().getMessage()); } + // --- Subquery-removal discriminator (see {@link AnalyticsExecutionEngine#containsSubQuery}) --- + // + // {@code containsSubQuery} gates the SubQueryRemoveRule + RelDecorrelator chain we run inside + // {@code execute()} / {@code explain()}. The chain is Calcite-tested, but the *discriminator* + // is plugin code; if it falsely returns true for a vanilla mock RelNode the existing + // execute-path tests in this class break with "NullPointerException: cluster" inside HepPlanner. + // If it falsely returns false on a plan that does contain a RexSubQuery, the EXISTS-subquery + // regression we shipped this fix for would silently reappear. + + @Test + void containsSubQuery_falseForPlanWithoutSubQuery() { + // A plain mocked RelNode has no RexNodes — Mockito default returns null from accept(...), + // which keeps the shuttle from ever reaching visitSubQuery. + RelNode plan = mock(RelNode.class); + + assertFalse( + AnalyticsExecutionEngine.containsSubQuery(plan), + "containsSubQuery should report false on a plan with no RexSubQuery — otherwise the" + + " HepPlanner / RelDecorrelator path would fire on plans that have no subqueries to" + + " remove (and on tests that pass mocked RelNodes without a real cluster)."); + } + + @Test + void containsSubQuery_trueWhenRelNodeExposesRexSubQueryDuringTraversal() { + // Stub the RelNode.accept(RexShuttle) call to feed a RexSubQuery into the shuttle, mirroring + // what a real LogicalFilter(condition=[EXISTS(subq)]) would do during traversal. + RelNode plan = mock(RelNode.class); + RexSubQuery subQuery = mock(RexSubQuery.class); + when(plan.accept(any(RelShuttle.class))) + .thenAnswer( + inv -> { + RelShuttle shuttle = inv.getArgument(0); + return shuttle.visit(plan); + }); + when(plan.accept(any(RexShuttle.class))) + .thenAnswer( + inv -> { + RexShuttle rex = inv.getArgument(0); + rex.visitSubQuery(subQuery); + return null; + }); + + assertTrue( + AnalyticsExecutionEngine.containsSubQuery(plan), + "containsSubQuery should detect a RexSubQuery surfaced through the node's" + + " accept(RexShuttle) — the discriminator is what gates the EXISTS/IN/SOME/ANY" + + " removal pass."); + } + // --- helpers --- private QueryResponse executeAndCapture(RelNode relNode) {