From 8bfb649e79945872a714b1147c578e879fa05284 Mon Sep 17 00:00:00 2001 From: ryan-gh-bot Date: Fri, 29 May 2026 01:47:15 +0000 Subject: [PATCH 1/7] [BugFix] Restore dedup pushdown when combined with WHERE clause (#5482) Run PPLSimplifyDedupRule before FilterMergeRule in the HEP optimizer so the bucket-non-null filter PPL emits for dedup is matched as-is. With the previous order, an upstream user where filter sat adjacent to the bucket-non-null filter; FilterMergeRule fired first and merged them into a conjunction that no longer satisfied PPLSimplifyDedupRule's operand predicate, defeating dedup pushdown to the shard. Use sequential addRuleInstance phases for explicit ordering rather than addRuleCollection, which is documented as non-deterministic in firing order. Adds two regression tests in CalcitePPLDedupTest: one that asserts LogicalDedup is produced under the fixed order, and one that pins the buggy behavior under the swapped order. Signed-off-by: ryan-gh-bot --- .../sql/calcite/utils/CalciteToolsHelper.java | 25 +++-- .../ppl/calcite/CalcitePPLAbstractTest.java | 14 +++ .../sql/ppl/calcite/CalcitePPLDedupTest.java | 91 +++++++++++++++++++ 3 files changed, 123 insertions(+), 7 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index 54b9d4ffbaf..b8049c2d159 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -36,7 +36,6 @@ import java.sql.PreparedStatement; import java.sql.SQLException; import java.time.Instant; -import java.util.List; import java.util.Properties; import java.util.function.Consumer; import org.apache.calcite.adapter.enumerable.EnumerableConvention; @@ -59,7 +58,6 @@ import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; -import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptTable.ViewExpander; @@ -551,12 +549,25 @@ public RelNode visit(TableScan scan) { } } - /** Try to optimize the plan by using HepPlanner */ - private static final List hepRuleList = - List.of(FilterMergeRule.Config.DEFAULT.toRule(), PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE); - + /** + * Try to optimize the plan by using HepPlanner. + * + *

Rules are added as separate {@code addRuleInstance} phases so each rule is run to a fixed + * point before the next phase begins. {@code PPLSimplifyDedupRule} must run before {@code + * FilterMergeRule}: PPL emits the dedup pattern with an {@code IS NOT NULL} bucket-non-null + * filter directly above the scan, and the simplify rule's operand requires that filter to remain + * a pure {@code IS NOT NULL} (or {@code AND} of {@code IS NOT NULL}s). When a user {@code where} + * clause precedes the dedup, the user's filter sits adjacent to the bucket-non-null filter; if + * {@code FilterMergeRule} fires first it folds the two into a single conjunction, the simplify + * rule's operand match fails, no {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} is + * produced, and dedup pushdown is silently disabled. See + * https://github.com/opensearch-project/sql/issues/5482. + */ private static final HepProgram HEP_PROGRAM = - new HepProgramBuilder().addRuleCollection(hepRuleList).build(); + new HepProgramBuilder() + .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) + .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) + .build(); public static RelNode optimize(RelNode plan, CalcitePlanContext context) { Util.discard(context); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java index ab07cd9b5c1..ec3b57d02cc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java @@ -110,6 +110,20 @@ public RelNode getRelNode(String ppl) { return root; } + /** + * Get the root RelNode of the given PPL query without merging adjacent filters. Useful for + * regression tests that need to exercise rule ordering against the un-merged shape that PPL + * actually emits to the production HEP optimizer. + */ + public RelNode getRelNodeRaw(String ppl) { + CalcitePlanContext context = createBuilderContext(); + Query query = (Query) plan(pplParser, ppl); + planTransformer.analyze(query.getPlan(), context); + RelNode root = context.relBuilder.build(); + System.out.println(root.explain()); + return root; + } + private RelNode mergeAdjacentFilters(RelNode relNode) { HepProgram program = new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index ca1a789b0f4..0be07bffcff 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -5,9 +5,15 @@ package org.opensearch.sql.ppl.calcite; +import org.apache.calcite.plan.hep.HepPlanner; +import org.apache.calcite.plan.hep.HepProgram; +import org.apache.calcite.plan.hep.HepProgramBuilder; import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.rules.FilterMergeRule; import org.apache.calcite.test.CalciteAssert; +import org.junit.Assert; import org.junit.Test; +import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule; public class CalcitePPLDedupTest extends CalcitePPLAbstractTest { @@ -353,4 +359,89 @@ public void testSortFieldProjectedAwayBeforeDedup() { + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } + + /** + * Regression test for https://github.com/opensearch-project/sql/issues/5482 + * + *

When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the + * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link + * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the + * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER} + * window form, defeating dedup pushdown to the shard. + * + *

This test mirrors the production HEP rule sequence in {@code CalciteToolsHelper#HEP_PROGRAM} + * and asserts that a {@code LogicalDedup} is produced in the presence of a user {@code where}. + */ + @Test + public void testDedupAfterWhereProducesLogicalDedup() { + String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO"; + RelNode raw = getRelNodeRaw(ppl); + + // Sanity: the un-merged plan has both the bucket-non-null filter and the user where filter + // adjacent to each other above the scan — exactly the shape that triggers the bug. + String rawExplain = raw.explain(); + Assert.assertTrue( + "Raw plan should contain the bucket-non-null filter:\n" + rawExplain, + rawExplain.contains("IS NOT NULL")); + Assert.assertTrue( + "Raw plan should contain the user where filter:\n" + rawExplain, + rawExplain.contains("1000")); + Assert.assertTrue( + "Raw plan should contain ROW_NUMBER prior to simplification:\n" + rawExplain, + rawExplain.contains("ROW_NUMBER")); + + // Apply the production HEP rule sequence: PPLSimplifyDedupRule first, then FilterMergeRule. + HepProgram program = + new HepProgramBuilder() + .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) + .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) + .build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(raw); + RelNode optimized = planner.findBestExp(); + + String optimizedExplain = optimized.explain(); + Assert.assertTrue( + "Optimized plan should contain LogicalDedup so DedupPushdownRule can match it:\n" + + optimizedExplain, + optimizedExplain.contains("LogicalDedup")); + Assert.assertFalse( + "Optimized plan should not retain ROW_NUMBER after simplification:\n" + optimizedExplain, + optimizedExplain.contains("ROW_NUMBER")); + } + + /** + * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the buggy behavior: + * when {@link FilterMergeRule} runs before {@link PPLSimplifyDedupRule}, the merged conjunction + * defeats the simplify-rule's operand match and no {@code LogicalDedup} is produced. This guards + * against accidentally swapping the rule order in {@code CalciteToolsHelper#HEP_PROGRAM} back to + * the buggy ordering. + */ + @Test + public void testDedupAfterWhereWithFilterMergeFirstFailsToSimplify() { + String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO"; + RelNode raw = getRelNodeRaw(ppl); + + // Buggy order: FilterMergeRule first, then PPLSimplifyDedupRule. + HepProgram program = + new HepProgramBuilder() + .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) + .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) + .build(); + HepPlanner planner = new HepPlanner(program); + planner.setRoot(raw); + RelNode optimized = planner.findBestExp(); + + String optimizedExplain = optimized.explain(); + Assert.assertFalse( + "Buggy order: PPLSimplifyDedupRule should not match after FilterMergeRule consolidates" + + " the bucket-non-null filter with the user where filter, so no LogicalDedup is" + + " produced:\n" + + optimizedExplain, + optimizedExplain.contains("LogicalDedup")); + Assert.assertTrue( + "Buggy order: ROW_NUMBER window form should remain since simplification was skipped:\n" + + optimizedExplain, + optimizedExplain.contains("ROW_NUMBER")); + } } From 19f506ddbf6edb13624d1c0fb0666c8ee1854ea8 Mon Sep 17 00:00:00 2001 From: ryan-gh-bot Date: Fri, 29 May 2026 02:02:34 +0000 Subject: [PATCH 2/7] [BugFix] Drop issue-link reference from regression-test JavaDoc (#5488) Per maintainer review feedback, the regression-test JavaDoc for testDedupAfterWhereProducesLogicalDedup mentioned the originating issue URL. The remaining JavaDoc paragraphs already describe the bug shape and the rule-ordering invariant, so the explicit issue link is unnecessary noise. Signed-off-by: ryan-gh-bot --- .../org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 0be07bffcff..b130708b185 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -361,9 +361,7 @@ public void testSortFieldProjectedAwayBeforeDedup() { } /** - * Regression test for https://github.com/opensearch-project/sql/issues/5482 - * - *

When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the + * When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER} From 0bf8f7c73d18eb7a0976999e44be7f389b4539e0 Mon Sep 17 00:00:00 2001 From: ryan-gh-bot Date: Fri, 29 May 2026 21:59:58 +0000 Subject: [PATCH 3/7] [BugFix] Make dedup simplify operand order-independent (#5488) Address review feedback on #5488: extend mayBeFilterFromBucketNonNull to accept the merged conjunction shape FilterMergeRule produces, so PPLSimplifyDedupRule fires regardless of whether FilterMergeRule has already merged the user where clause into the bucket-non-null filter. PPLSimplifyDedupRule.apply now splits the bottom filter into IS_NOT_NULL conjuncts on partition keys (absorbed into LogicalDedup semantics) and any remaining conjuncts (preserved as a separate filter below the new LogicalDedup), so a user predicate that was folded in is no longer dropped. With the operand predicate order-independent, the HEP rule order is no longer a load-bearing invariant. Revert the addRuleCollection -> addRuleInstance change in CalciteToolsHelper.HEP_PROGRAM that the previous patch introduced. Replace the regression test that pinned the buggy rule order with one that asserts the user-visible contract: with where preceding dedup, a LogicalDedup is produced and the user predicate is preserved regardless of which order FilterMergeRule and PPLSimplifyDedupRule fire. Signed-off-by: ryan-gh-bot --- .../plan/rule/PPLSimplifyDedupRule.java | 45 ++++++++++++ .../sql/calcite/utils/CalciteToolsHelper.java | 25 ++----- .../sql/calcite/utils/PlanUtils.java | 38 ++++++++-- .../sql/ppl/calcite/CalcitePPLDedupTest.java | 70 +++++++++++-------- 4 files changed, 126 insertions(+), 52 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java index 11eabfd483c..ff00eee485b 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java @@ -6,11 +6,14 @@ package org.opensearch.sql.calcite.plan.rule; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.function.Predicate; import java.util.stream.Collectors; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.plan.RelOptUtil; import org.apache.calcite.plan.RelRule; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; @@ -22,6 +25,7 @@ import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; import org.apache.calcite.rex.RexWindow; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.type.SqlTypeName; @@ -115,8 +119,41 @@ protected void apply( RelCollation inputCollation = extractCollationFromWindow(windows.get(0)); + // Split the bucket-non-null filter into two parts: + // 1) IS_NOT_NULL conjuncts on a partition key — these are the bucket-non-null guards PPL + // emits as part of the dedup pattern; LogicalDedup absorbs their semantics. + // 2) Everything else — for example, a user `where` predicate that FilterMergeRule may + // have folded into the same conjunction, or a user IS_NOT_NULL filter on a non-partition + // column. These must be preserved as a separate filter below the new LogicalDedup so + // user-visible behavior is unchanged regardless of whether FilterMergeRule fired. + Set partitionKeyIndices = new HashSet<>(); + for (RexNode key : dedupColumns) { + if (key instanceof RexInputRef ref) { + partitionKeyIndices.add(ref.getIndex()); + } + } + List bucketNonNullConjuncts = new ArrayList<>(); + List remainingConjuncts = new ArrayList<>(); + for (RexNode conjunct : RelOptUtil.conjunctions(bucketNonNullFilter.getCondition())) { + if (isNotNullOnPartitionKey(conjunct, partitionKeyIndices)) { + bucketNonNullConjuncts.add(conjunct); + } else { + remainingConjuncts.add(conjunct); + } + } + // Defensive: if no IS_NOT_NULL conjunct on a partition key is present, this filter is not + // actually a bucket-non-null filter — bail out without transforming. The loose operand + // predicate may have matched on an unrelated AND that happens to contain an IS_NOT_NULL on + // some other ref. + if (bucketNonNullConjuncts.isEmpty()) { + return; + } + RelBuilder relBuilder = call.builder(); relBuilder.push(bucketNonNullFilter.getInput()); + if (!remainingConjuncts.isEmpty()) { + relBuilder.filter(RexUtil.composeConjunction(relBuilder.getRexBuilder(), remainingConjuncts)); + } List> targetProjections = projectWithWindow.getNamedProjects().stream() .filter(p -> !p.getKey().isA(SqlKind.ROW_NUMBER)) @@ -134,6 +171,14 @@ protected void apply( call.transformTo(relBuilder.build()); } + private static boolean isNotNullOnPartitionKey(RexNode rex, Set partitionKeyIndices) { + return rex instanceof RexCall rexCall + && rexCall.isA(SqlKind.IS_NOT_NULL) + && !rexCall.getOperands().isEmpty() + && rexCall.getOperands().get(0) instanceof RexInputRef ref + && partitionKeyIndices.contains(ref.getIndex()); + } + private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) { if (window.orderKeys.isEmpty()) { return null; diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java index b8049c2d159..54b9d4ffbaf 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java @@ -36,6 +36,7 @@ import java.sql.PreparedStatement; import java.sql.SQLException; import java.time.Instant; +import java.util.List; import java.util.Properties; import java.util.function.Consumer; import org.apache.calcite.adapter.enumerable.EnumerableConvention; @@ -58,6 +59,7 @@ import org.apache.calcite.plan.Convention; import org.apache.calcite.plan.RelOptCluster; import org.apache.calcite.plan.RelOptPlanner; +import org.apache.calcite.plan.RelOptRule; import org.apache.calcite.plan.RelOptSchema; import org.apache.calcite.plan.RelOptTable; import org.apache.calcite.plan.RelOptTable.ViewExpander; @@ -549,25 +551,12 @@ public RelNode visit(TableScan scan) { } } - /** - * Try to optimize the plan by using HepPlanner. - * - *

Rules are added as separate {@code addRuleInstance} phases so each rule is run to a fixed - * point before the next phase begins. {@code PPLSimplifyDedupRule} must run before {@code - * FilterMergeRule}: PPL emits the dedup pattern with an {@code IS NOT NULL} bucket-non-null - * filter directly above the scan, and the simplify rule's operand requires that filter to remain - * a pure {@code IS NOT NULL} (or {@code AND} of {@code IS NOT NULL}s). When a user {@code where} - * clause precedes the dedup, the user's filter sits adjacent to the bucket-non-null filter; if - * {@code FilterMergeRule} fires first it folds the two into a single conjunction, the simplify - * rule's operand match fails, no {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} is - * produced, and dedup pushdown is silently disabled. See - * https://github.com/opensearch-project/sql/issues/5482. - */ + /** Try to optimize the plan by using HepPlanner */ + private static final List hepRuleList = + List.of(FilterMergeRule.Config.DEFAULT.toRule(), PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE); + private static final HepProgram HEP_PROGRAM = - new HepProgramBuilder() - .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) - .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) - .build(); + new HepProgramBuilder().addRuleCollection(hepRuleList).build(); public static RelNode optimize(RelNode plan, CalcitePlanContext context) { Util.discard(context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index 42ece7dc509..ef86fb7a1eb 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -785,17 +785,45 @@ static Mapping mapping(List rexNodes, RelDataType schema) { return Mappings.target(getSelectColumns(rexNodes), schema.getFieldCount()); } + /** + * Loose structural check for the bucket-non-null filter PPL emits as part of the {@code dedup} + * pattern. Returns true for either: + * + *

    + *
  • a pure {@code IS NOT NULL($ref)}, or + *
  • any {@code AND} whose conjuncts contain at least one {@code IS NOT NULL($ref)}. + *
+ * + *

This intentionally accepts the un-merged shape ({@code IS NOT NULL} alone) and the merged + * shape that {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code + * where} clause precedes the dedup ({@code AND(IS NOT NULL($pk), )}). The + * concrete partition-key match — and the split-out of any user predicate that was folded in — + * happens inside {@code PPLSimplifyDedupRule#apply}, which has access to the partition keys + * declared by the {@code ROW_NUMBER} window above this filter. Keeping the operand predicate + * order-independent means the simplify rule fires regardless of whether {@code FilterMergeRule} + * has already run, so dedup pushdown is no longer load-bearing on HEP rule ordering. + * + *

Misclassification is bounded by the surrounding four-level operand chain: a project that + * does not contain the {@code _row_number_dedup_} column, above a filter whose condition is + * {@code _row_number_ <= N} and which does contain {@code _row_number_dedup_}, above a project + * that does contain {@code _row_number_dedup_} (with a {@code ROW_NUMBER} window function), above + * this filter. That signature is unique to the PPL-emitted dedup pattern, so a user-written + * filter such as {@code IS NOT NULL(x) AND ...} cannot accidentally match. + */ static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { RexNode condition = filter.getCondition(); - return isNotNullOnRef(condition) - || (condition instanceof RexCall rexCall - && rexCall.getOperator().equals(SqlStdOperatorTable.AND) - && rexCall.getOperands().stream().allMatch(PlanUtils::isNotNullOnRef)); + if (isNotNullOnRef(condition)) { + return true; + } + return condition instanceof RexCall rexCall + && rexCall.getOperator().equals(SqlStdOperatorTable.AND) + && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef); } - private static boolean isNotNullOnRef(RexNode rex) { + static boolean isNotNullOnRef(RexNode rex) { return rex instanceof RexCall rexCall && rexCall.isA(SqlKind.IS_NOT_NULL) + && !rexCall.getOperands().isEmpty() && rexCall.getOperands().get(0) instanceof RexInputRef; } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index b130708b185..67200c023fc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -362,13 +362,11 @@ public void testSortFieldProjectedAwayBeforeDedup() { /** * When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the - * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link - * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the - * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER} - * window form, defeating dedup pushdown to the shard. - * - *

This test mirrors the production HEP rule sequence in {@code CalciteToolsHelper#HEP_PROGRAM} - * and asserts that a {@code LogicalDedup} is produced in the presence of a user {@code where}. + * bucket-non-null filter that PPL emits for the dedup pattern. {@link PPLSimplifyDedupRule} must + * fold the dedup pattern into a {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} so that + * {@code DedupPushdownRule} can match it; otherwise dedup falls through to the in-memory {@code + * ROW_NUMBER} window form, defeating dedup pushdown to the shard. The user predicate must be + * preserved as a separate filter below the new {@code LogicalDedup}. */ @Test public void testDedupAfterWhereProducesLogicalDedup() { @@ -376,27 +374,25 @@ public void testDedupAfterWhereProducesLogicalDedup() { RelNode raw = getRelNodeRaw(ppl); // Sanity: the un-merged plan has both the bucket-non-null filter and the user where filter - // adjacent to each other above the scan — exactly the shape that triggers the bug. + // adjacent to each other above the scan — exactly the shape that triggered the original bug. String rawExplain = raw.explain(); Assert.assertTrue( "Raw plan should contain the bucket-non-null filter:\n" + rawExplain, rawExplain.contains("IS NOT NULL")); Assert.assertTrue( "Raw plan should contain the user where filter:\n" + rawExplain, - rawExplain.contains("1000")); + rawExplain.contains(">($5, 1000)")); Assert.assertTrue( "Raw plan should contain ROW_NUMBER prior to simplification:\n" + rawExplain, rawExplain.contains("ROW_NUMBER")); - // Apply the production HEP rule sequence: PPLSimplifyDedupRule first, then FilterMergeRule. + // Apply rules in the order PPLSimplifyDedupRule -> FilterMergeRule. HepProgram program = new HepProgramBuilder() .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) .build(); - HepPlanner planner = new HepPlanner(program); - planner.setRoot(raw); - RelNode optimized = planner.findBestExp(); + RelNode optimized = runHepPlanner(raw, program); String optimizedExplain = optimized.explain(); Assert.assertTrue( @@ -406,40 +402,56 @@ public void testDedupAfterWhereProducesLogicalDedup() { Assert.assertFalse( "Optimized plan should not retain ROW_NUMBER after simplification:\n" + optimizedExplain, optimizedExplain.contains("ROW_NUMBER")); + Assert.assertTrue( + "User where predicate must be preserved as a filter below LogicalDedup:\n" + + optimizedExplain, + optimizedExplain.contains(">($5, 1000)")); } /** - * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the buggy behavior: - * when {@link FilterMergeRule} runs before {@link PPLSimplifyDedupRule}, the merged conjunction - * defeats the simplify-rule's operand match and no {@code LogicalDedup} is produced. This guards - * against accidentally swapping the rule order in {@code CalciteToolsHelper#HEP_PROGRAM} back to - * the buggy ordering. + * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the user-visible + * contract: with a {@code where} preceding {@code dedup}, a {@code LogicalDedup} must be produced + * regardless of the order in which {@link FilterMergeRule} and {@link PPLSimplifyDedupRule} fire. + * + *

The simplify rule's bucket-non-null operand predicate is order-independent — it accepts both + * a pure {@code IS NOT NULL} and an {@code AND} that contains an {@code IS NOT NULL} on a + * partition key — so {@code FilterMergeRule} firing first no longer disables dedup pushdown. This + * guards against re-introducing the original regression by reordering, removing, or adding rules + * to {@code CalciteToolsHelper#HEP_PROGRAM}. */ @Test - public void testDedupAfterWhereWithFilterMergeFirstFailsToSimplify() { + public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() { String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO"; RelNode raw = getRelNodeRaw(ppl); - // Buggy order: FilterMergeRule first, then PPLSimplifyDedupRule. + // Swapped order: FilterMergeRule first, then PPLSimplifyDedupRule. This is the order that + // produced the original bug (#5482) before the fix. HepProgram program = new HepProgramBuilder() .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE) .build(); - HepPlanner planner = new HepPlanner(program); - planner.setRoot(raw); - RelNode optimized = planner.findBestExp(); + RelNode optimized = runHepPlanner(raw, program); String optimizedExplain = optimized.explain(); - Assert.assertFalse( - "Buggy order: PPLSimplifyDedupRule should not match after FilterMergeRule consolidates" - + " the bucket-non-null filter with the user where filter, so no LogicalDedup is" - + " produced:\n" + Assert.assertTrue( + "Even with FilterMergeRule firing first, PPLSimplifyDedupRule must still produce" + + " LogicalDedup so DedupPushdownRule can match it:\n" + optimizedExplain, optimizedExplain.contains("LogicalDedup")); + Assert.assertFalse( + "ROW_NUMBER window form should be removed after simplification:\n" + optimizedExplain, + optimizedExplain.contains("ROW_NUMBER")); Assert.assertTrue( - "Buggy order: ROW_NUMBER window form should remain since simplification was skipped:\n" + "User where predicate must be preserved as a filter below LogicalDedup, even when" + + " FilterMergeRule fired first and folded it into the bucket-non-null filter:\n" + optimizedExplain, - optimizedExplain.contains("ROW_NUMBER")); + optimizedExplain.contains(">($5, 1000)")); + } + + private static RelNode runHepPlanner(RelNode root, HepProgram program) { + HepPlanner planner = new HepPlanner(program); + planner.setRoot(root); + return planner.findBestExp(); } } From ae86ecea3f8b1ef84e0dd6fa3b4d68dd09a4c505 Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 5 Jun 2026 09:37:11 -0700 Subject: [PATCH 4/7] Address review comments on #5488 Per @penghuo review: PlanUtils.java - Revert mayBeFilterFromBucketNonNull to the original ternary form; drop the early-return refactor (no behavior change, just cleaner). - Drop the !rexCall.getOperands().isEmpty() guard before .get(0): IS NOT NULL is always unary in Calcite, so the check is dead. - Trim the JavaDoc to the essentials (un-merged vs merged-AND shape; concrete partition-key match happens in PPLSimplifyDedupRule#apply). - Promote isNotNullOnRef from package-private to public so the dedup rule can reuse it from a different package. PPLSimplifyDedupRule.java - isNotNullOnPartitionKey now delegates the IS NOT NULL($ref) structural check to PlanUtils.isNotNullOnRef and adds the partition-key index check on top. CalciteExplainIT.java - Add testDedupAfterWherePushDown: an end-to-end regression that runs the shape `... | where | dedup ` and asserts (a) LogicalDedup appears in the explain output (PPLSimplifyDedupRule fired even after FilterMergeRule had a chance to merge the two filters), and (b) EnumerableWindow does NOT appear (the in-memory ROW_NUMBER fallback the bug caused is gone). Signed-off-by: Jialiang Liang --- .../plan/rule/PPLSimplifyDedupRule.java | 10 ++--- .../sql/calcite/utils/PlanUtils.java | 40 +++++-------------- .../sql/calcite/remote/CalciteExplainIT.java | 23 +++++++++++ 3 files changed, 37 insertions(+), 36 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java index ff00eee485b..676b1e9a776 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java @@ -172,11 +172,11 @@ protected void apply( } private static boolean isNotNullOnPartitionKey(RexNode rex, Set partitionKeyIndices) { - return rex instanceof RexCall rexCall - && rexCall.isA(SqlKind.IS_NOT_NULL) - && !rexCall.getOperands().isEmpty() - && rexCall.getOperands().get(0) instanceof RexInputRef ref - && partitionKeyIndices.contains(ref.getIndex()); + if (!PlanUtils.isNotNullOnRef(rex)) { + return false; + } + RexInputRef ref = (RexInputRef) ((RexCall) rex).getOperands().get(0); + return partitionKeyIndices.contains(ref.getIndex()); } private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) { diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index ef86fb7a1eb..b63c4a0237f 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -786,44 +786,22 @@ static Mapping mapping(List rexNodes, RelDataType schema) { } /** - * Loose structural check for the bucket-non-null filter PPL emits as part of the {@code dedup} - * pattern. Returns true for either: - * - *

    - *
  • a pure {@code IS NOT NULL($ref)}, or - *
  • any {@code AND} whose conjuncts contain at least one {@code IS NOT NULL($ref)}. - *
- * - *

This intentionally accepts the un-merged shape ({@code IS NOT NULL} alone) and the merged - * shape that {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code - * where} clause precedes the dedup ({@code AND(IS NOT NULL($pk), )}). The - * concrete partition-key match — and the split-out of any user predicate that was folded in — - * happens inside {@code PPLSimplifyDedupRule#apply}, which has access to the partition keys - * declared by the {@code ROW_NUMBER} window above this filter. Keeping the operand predicate - * order-independent means the simplify rule fires regardless of whether {@code FilterMergeRule} - * has already run, so dedup pushdown is no longer load-bearing on HEP rule ordering. - * - *

Misclassification is bounded by the surrounding four-level operand chain: a project that - * does not contain the {@code _row_number_dedup_} column, above a filter whose condition is - * {@code _row_number_ <= N} and which does contain {@code _row_number_dedup_}, above a project - * that does contain {@code _row_number_dedup_} (with a {@code ROW_NUMBER} window function), above - * this filter. That signature is unique to the PPL-emitted dedup pattern, so a user-written - * filter such as {@code IS NOT NULL(x) AND ...} cannot accidentally match. + * Accepts the un-merged {@code IS NOT NULL($ref)} shape and the merged-{@code AND} shape that + * {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code where} + * precedes the dedup. The concrete partition-key match — and the split-out of any user predicate + * folded into the AND — happens in {@link PPLSimplifyDedupRule#apply}. */ static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) { RexNode condition = filter.getCondition(); - if (isNotNullOnRef(condition)) { - return true; - } - return condition instanceof RexCall rexCall - && rexCall.getOperator().equals(SqlStdOperatorTable.AND) - && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef); + return isNotNullOnRef(condition) + || (condition instanceof RexCall rexCall + && rexCall.getOperator().equals(SqlStdOperatorTable.AND) + && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef)); } - static boolean isNotNullOnRef(RexNode rex) { + public static boolean isNotNullOnRef(RexNode rex) { return rex instanceof RexCall rexCall && rexCall.isA(SqlKind.IS_NOT_NULL) - && !rexCall.getOperands().isEmpty() && rexCall.getOperands().get(0) instanceof RexInputRef; } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index 8ad0be5cc88..c432c966d42 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2153,6 +2153,29 @@ public void testTransposeExplain() throws IOException { + "| transpose 4 column_name='column_names'")); } + /** + * Regression test for #5482: a user {@code where} clause adjacent to dedup's bucket-non-null + * filter must still let {@code PPLSimplifyDedupRule} fire even after {@code FilterMergeRule} has + * merged the two filters into a single AND. Without the fix, the plan keeps the in-memory {@code + * ROW_NUMBER} window over a row-fetching scan and dedup pushdown is silently disabled. + */ + @Test + public void testDedupAfterWherePushDown() throws IOException { + enabledOnlyWhenPushdownIsEnabled(); + String result = + explainQueryToString( + "source=opensearch-sql_test_index_account | where age > 25 | dedup gender"); + assertTrue( + "Expected LogicalDedup in the plan (simplify rule should fire after FilterMergeRule):\n" + + result, + result.contains("LogicalDedup")); + assertFalse( + "Unexpected EnumerableWindow in the plan — dedup fell back to the in-memory ROW_NUMBER" + + " form, which means PPLSimplifyDedupRule did not fire:\n" + + result, + result.contains("EnumerableWindow")); + } + public void testComplexDedup() throws IOException { enabledOnlyWhenPushdownIsEnabled(); String expected = loadExpectedPlan("explain_dedup_complex1.yaml"); From a66ad5beb440d5cdc7a87d57f3cf2205a43ec80b Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 5 Jun 2026 13:15:30 -0700 Subject: [PATCH 5/7] Push user where filter into scan when blocking dedup pushdown PPLSimplifyDedupRule correctly produces Dedup -> Filter(user where) -> Scan when a `where` precedes `dedup`. The Filter between Dedup and Scan blocks DedupPushdownRule's strict Dedup -> Project -> Scan operand chain, so Volcano falls back to PPLDedupConvertRule and the plan ends up with an in-memory ROW_NUMBER window instead of the pushed-down composite + top_hits aggregation. Add a WITH_FILTER operand variant to DedupPushdownRule that matches Dedup -> Filter -> Scan, pushes the filter into the scan, then runs the standard apply() on the resulting Dedup -> Project -> Scan shape. Signed-off-by: Jialiang Liang --- .../sql/calcite/remote/CalciteExplainIT.java | 18 ++++---- .../planner/rules/DedupPushdownRule.java | 41 +++++++++++++++++++ .../planner/rules/OpenSearchIndexRules.java | 3 ++ .../sql/ppl/calcite/CalcitePPLDedupTest.java | 32 +++++++++++++++ 4 files changed, 85 insertions(+), 9 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java index c432c966d42..d25631c5ace 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java @@ -2154,10 +2154,9 @@ public void testTransposeExplain() throws IOException { } /** - * Regression test for #5482: a user {@code where} clause adjacent to dedup's bucket-non-null - * filter must still let {@code PPLSimplifyDedupRule} fire even after {@code FilterMergeRule} has - * merged the two filters into a single AND. Without the fix, the plan keeps the in-memory {@code - * ROW_NUMBER} window over a row-fetching scan and dedup pushdown is silently disabled. + * With a user {@code where} clause preceding {@code dedup}, the physical plan must push both the + * filter and the dedup-as-aggregation into the OpenSearch scan, not fall back to an in-memory + * {@code ROW_NUMBER} window above a row-fetching scan. */ @Test public void testDedupAfterWherePushDown() throws IOException { @@ -2166,12 +2165,13 @@ public void testDedupAfterWherePushDown() throws IOException { explainQueryToString( "source=opensearch-sql_test_index_account | where age > 25 | dedup gender"); assertTrue( - "Expected LogicalDedup in the plan (simplify rule should fire after FilterMergeRule):\n" - + result, - result.contains("LogicalDedup")); + "Expected user where filter pushed down to the scan:\n" + result, + result.contains("FILTER->>($8, 25)")); + assertTrue( + "Expected dedup pushed down as AGGREGATION (composite + top_hits):\n" + result, + result.contains("AGGREGATION->")); assertFalse( - "Unexpected EnumerableWindow in the plan — dedup fell back to the in-memory ROW_NUMBER" - + " form, which means PPLSimplifyDedupRule did not fire:\n" + "Unexpected EnumerableWindow — dedup fell back to the in-memory ROW_NUMBER form:\n" + result, result.contains("EnumerableWindow")); } diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index d37957d4a9f..7224a1ebda3 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -13,10 +13,12 @@ import java.util.stream.IntStream; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; +import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; import org.apache.calcite.rel.logical.LogicalAggregate; +import org.apache.calcite.rel.logical.LogicalFilter; import org.apache.calcite.rel.logical.LogicalProject; import org.apache.calcite.rel.rules.SubstitutionRule; import org.apache.calcite.rex.RexInputRef; @@ -47,6 +49,24 @@ protected DedupPushdownRule(Config config) { @Override protected void onMatchImpl(RelOptRuleCall call) { final LogicalDedup logicalDedup = call.rel(0); + if (call.rels[1] instanceof LogicalFilter filter) { + // Push the filter into the scan, then synthesize an identity project so the standard + // apply() can run on the resulting Dedup → Project → Scan shape. + final CalciteLogicalIndexScan scan = call.rel(2); + AbstractRelNode scanWithFilter = scan.pushDownFilter(filter); + if (scanWithFilter == null) { + return; + } + CalciteLogicalIndexScan newScan = (CalciteLogicalIndexScan) scanWithFilter; + RelBuilder relBuilder = call.builder(); + relBuilder.push(newScan); + // force=true so the identity project is materialized (apply() requires a LogicalProject) + relBuilder.project( + relBuilder.fields(), newScan.getRowType().getFieldNames(), /* force= */ true); + LogicalProject identityProject = (LogicalProject) relBuilder.build(); + apply(call, logicalDedup, identityProject, newScan); + return; + } final LogicalProject projectWithExpr = call.rel(1); final CalciteLogicalIndexScan scan = call.rel(2); apply(call, logicalDedup, projectWithExpr, scan); @@ -226,6 +246,27 @@ public interface Config extends OpenSearchRuleConfig { .predicate(Config::tableScanChecker) .noInputs()))); + // +- LogicalDedup + // +- LogicalFilter (e.g. a `where` predicate preserved below dedup by + // +- CalciteLogicalIndexScan PPLSimplifyDedupRule when an upstream `where` is folded + // into the bucket-non-null filter) + Config WITH_FILTER = + ImmutableDedupPushdownRule.Config.builder() + .build() + .withDescription("Dedup-to-Aggregate-WithFilter") + .withOperandSupplier( + b0 -> + b0.operand(LogicalDedup.class) + .predicate(dedup -> !dedup.getKeepEmpty()) + .oneInput( + b1 -> + b1.operand(LogicalFilter.class) + .oneInput( + b2 -> + b2.operand(CalciteLogicalIndexScan.class) + .predicate(Config::tableScanChecker) + .noInputs()))); + /** * Project must be not pushed since the name of expression would lose after project pushed. E.g. * in query "eval new_a = a + 1 | dedup b", the "new_a" will lose. diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java index 0068f445ce7..3c8508cc455 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java @@ -50,6 +50,8 @@ public class OpenSearchIndexRules { SortIndexScanRule.Config.DEFAULT.toRule(); private static final DedupPushdownRule DEDUP_PUSH_DOWN = DedupPushdownRule.Config.DEFAULT.toRule(); + private static final DedupPushdownRule DEDUP_PUSH_DOWN_WITH_FILTER = + DedupPushdownRule.Config.WITH_FILTER.toRule(); private static final SortProjectExprTransposeRule SORT_PROJECT_EXPR_TRANSPOSE = SortProjectExprTransposeRule.Config.DEFAULT.toRule(); private static final ExpandCollationOnProjectExprRule EXPAND_COLLATION_ON_PROJECT_EXPR = @@ -75,6 +77,7 @@ public class OpenSearchIndexRules { LIMIT_INDEX_SCAN, SORT_INDEX_SCAN, DEDUP_PUSH_DOWN, + DEDUP_PUSH_DOWN_WITH_FILTER, SORT_PROJECT_EXPR_TRANSPOSE, SORT_AGGREGATION_METRICS_RULE, RARE_TOP_PUSH_DOWN, diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 67200c023fc..92f09dc5f29 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -449,6 +449,38 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() { optimizedExplain.contains(">($5, 1000)")); } + /** + * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production + * ({@code addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two + * sequenced-{@code addRuleInstance} tests above prove the rule fires under either ordering, + * but production runs both rules in a single collection instruction. This test pins that exact + * shape to catch any addRuleCollection-vs-addRuleInstance traversal differences. + */ + @Test + public void testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram() { + String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO"; + RelNode raw = getRelNodeRaw(ppl); + + HepProgram program = + new HepProgramBuilder() + .addRuleCollection( + java.util.List.of( + FilterMergeRule.Config.DEFAULT.toRule(), + PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)) + .build(); + RelNode optimized = runHepPlanner(raw, program); + + String optimizedExplain = optimized.explain(); + Assert.assertTrue( + "Production HEP_PROGRAM (addRuleCollection) must still produce LogicalDedup:\n" + + optimizedExplain, + optimizedExplain.contains("LogicalDedup")); + Assert.assertFalse( + "ROW_NUMBER window form should be removed under production HEP_PROGRAM:\n" + + optimizedExplain, + optimizedExplain.contains("ROW_NUMBER")); + } + private static RelNode runHepPlanner(RelNode root, HepProgram program) { HepPlanner planner = new HepPlanner(program); planner.setRoot(root); From 95e2aeb976bcbd71fd11ce3ecf7fa7d968679ece Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 5 Jun 2026 13:44:17 -0700 Subject: [PATCH 6/7] Bail when filter is only partially pushable pushDownFilter returns a Filter (not a CalciteLogicalIndexScan) when the predicate analyzer can only partially push the condition. The previous cast would have thrown ClassCastException in that case. Use an instanceof-pattern check so the rule bails out cleanly and leaves the plan untouched, letting other rules handle the residual. Also drop a stale issue-link reference from a test comment. Signed-off-by: Jialiang Liang --- .../sql/opensearch/planner/rules/DedupPushdownRule.java | 9 ++++----- .../opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java | 2 -- 2 files changed, 4 insertions(+), 7 deletions(-) diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java index 7224a1ebda3..648888ce9b7 100644 --- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java +++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java @@ -13,7 +13,6 @@ import java.util.stream.IntStream; import javax.annotation.Nullable; import org.apache.calcite.plan.RelOptRuleCall; -import org.apache.calcite.rel.AbstractRelNode; import org.apache.calcite.rel.RelCollation; import org.apache.calcite.rel.RelCollations; import org.apache.calcite.rel.RelFieldCollation; @@ -51,13 +50,13 @@ protected void onMatchImpl(RelOptRuleCall call) { final LogicalDedup logicalDedup = call.rel(0); if (call.rels[1] instanceof LogicalFilter filter) { // Push the filter into the scan, then synthesize an identity project so the standard - // apply() can run on the resulting Dedup → Project → Scan shape. + // apply() can run on the resulting Dedup → Project → Scan shape. If the filter is only + // partially pushable, pushDownFilter returns a Filter wrapping the residual condition over + // the new scan; we can't strip a residual filter without breaking semantics, so bail. final CalciteLogicalIndexScan scan = call.rel(2); - AbstractRelNode scanWithFilter = scan.pushDownFilter(filter); - if (scanWithFilter == null) { + if (!(scan.pushDownFilter(filter) instanceof CalciteLogicalIndexScan newScan)) { return; } - CalciteLogicalIndexScan newScan = (CalciteLogicalIndexScan) scanWithFilter; RelBuilder relBuilder = call.builder(); relBuilder.push(newScan); // force=true so the identity project is materialized (apply() requires a LogicalProject) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 92f09dc5f29..35acdb1f7f6 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -424,8 +424,6 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() { String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO"; RelNode raw = getRelNodeRaw(ppl); - // Swapped order: FilterMergeRule first, then PPLSimplifyDedupRule. This is the order that - // produced the original bug (#5482) before the fix. HepProgram program = new HepProgramBuilder() .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()) From 38fc6c3d45419cfd5ce5c865520a54659ca7147c Mon Sep 17 00:00:00 2001 From: Jialiang Liang Date: Fri, 5 Jun 2026 15:33:30 -0700 Subject: [PATCH 7/7] Apply spotless formatting to dedup unit test Reflowed the JavaDoc on testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram to match Google Java Format's preferred line break, fixing the spotlessJavaCheck violation that failed the unit-test matrix on CI. Signed-off-by: Jialiang Liang --- .../sql/ppl/calcite/CalcitePPLDedupTest.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java index 35acdb1f7f6..5f32c1b85bb 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java @@ -448,11 +448,11 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() { } /** - * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production - * ({@code addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two - * sequenced-{@code addRuleInstance} tests above prove the rule fires under either ordering, - * but production runs both rules in a single collection instruction. This test pins that exact - * shape to catch any addRuleCollection-vs-addRuleInstance traversal differences. + * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production ({@code + * addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two sequenced-{@code + * addRuleInstance} tests above prove the rule fires under either ordering, but production runs + * both rules in a single collection instruction. This test pins that exact shape to catch any + * addRuleCollection-vs-addRuleInstance traversal differences. */ @Test public void testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram() {