From 8bfb649e79945872a714b1147c578e879fa05284 Mon Sep 17 00:00:00 2001
From: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
Date: Fri, 29 May 2026 01:47:15 +0000
Subject: [PATCH 1/7] [BugFix] Restore dedup pushdown when combined with WHERE
 clause (#5482)

Run PPLSimplifyDedupRule before FilterMergeRule in the HEP optimizer so the bucket-non-null filter PPL emits for dedup is matched as-is. With the previous order, an upstream user where filter sat adjacent to the bucket-non-null filter; FilterMergeRule fired first and merged them into a conjunction that no longer satisfied PPLSimplifyDedupRule's operand predicate, defeating dedup pushdown to the shard.

Use sequential addRuleInstance phases for explicit ordering rather than addRuleCollection, which is documented as non-deterministic in firing order. Adds two regression tests in CalcitePPLDedupTest: one that asserts LogicalDedup is produced under the fixed order, and one that pins the buggy behavior under the swapped order.

Signed-off-by: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
---
 .../sql/calcite/utils/CalciteToolsHelper.java | 25 +++--
 .../ppl/calcite/CalcitePPLAbstractTest.java   | 14 +++
 .../sql/ppl/calcite/CalcitePPLDedupTest.java  | 91 +++++++++++++++++++
 3 files changed, 123 insertions(+), 7 deletions(-)
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
index 54b9d4ffbaf..b8049c2d159 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
@@ -36,7 +36,6 @@
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.time.Instant;
-import java.util.List;
 import java.util.Properties;
 import java.util.function.Consumer;
 import org.apache.calcite.adapter.enumerable.EnumerableConvention;
@@ -59,7 +58,6 @@
 import org.apache.calcite.plan.Convention;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptPlanner;
-import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptSchema;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.plan.RelOptTable.ViewExpander;
@@ -551,12 +549,25 @@ public RelNode visit(TableScan scan) {
     }
   }
 
-  /** Try to optimize the plan by using HepPlanner */
-  private static final List<RelOptRule> hepRuleList =
-      List.of(FilterMergeRule.Config.DEFAULT.toRule(), PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE);
-
+  /**
+   * Try to optimize the plan by using HepPlanner.
+   *
+   * <p>Rules are added as separate {@code addRuleInstance} phases so each rule is run to a fixed
+   * point before the next phase begins. {@code PPLSimplifyDedupRule} must run before {@code
+   * FilterMergeRule}: PPL emits the dedup pattern with an {@code IS NOT NULL} bucket-non-null
+   * filter directly above the scan, and the simplify rule's operand requires that filter to remain
+   * a pure {@code IS NOT NULL} (or {@code AND} of {@code IS NOT NULL}s). When a user {@code where}
+   * clause precedes the dedup, the user's filter sits adjacent to the bucket-non-null filter; if
+   * {@code FilterMergeRule} fires first it folds the two into a single conjunction, the simplify
+   * rule's operand match fails, no {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} is
+   * produced, and dedup pushdown is silently disabled. See
+   * https://github.com/opensearch-project/sql/issues/5482.
+   */
   private static final HepProgram HEP_PROGRAM =
-      new HepProgramBuilder().addRuleCollection(hepRuleList).build();
+      new HepProgramBuilder()
+          .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
+          .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
+          .build();
 
   public static RelNode optimize(RelNode plan, CalcitePlanContext context) {
     Util.discard(context);
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java
index ab07cd9b5c1..ec3b57d02cc 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java
@@ -110,6 +110,20 @@ public RelNode getRelNode(String ppl) {
     return root;
   }
 
+  /**
+   * Get the root RelNode of the given PPL query without merging adjacent filters. Useful for
+   * regression tests that need to exercise rule ordering against the un-merged shape that PPL
+   * actually emits to the production HEP optimizer.
+   */
+  public RelNode getRelNodeRaw(String ppl) {
+    CalcitePlanContext context = createBuilderContext();
+    Query query = (Query) plan(pplParser, ppl);
+    planTransformer.analyze(query.getPlan(), context);
+    RelNode root = context.relBuilder.build();
+    System.out.println(root.explain());
+    return root;
+  }
+
   private RelNode mergeAdjacentFilters(RelNode relNode) {
     HepProgram program =
         new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build();
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index ca1a789b0f4..0be07bffcff 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -5,9 +5,15 @@
 
 package org.opensearch.sql.ppl.calcite;
 
+import org.apache.calcite.plan.hep.HepPlanner;
+import org.apache.calcite.plan.hep.HepProgram;
+import org.apache.calcite.plan.hep.HepProgramBuilder;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.rules.FilterMergeRule;
 import org.apache.calcite.test.CalciteAssert;
+import org.junit.Assert;
 import org.junit.Test;
+import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule;
 
 public class CalcitePPLDedupTest extends CalcitePPLAbstractTest {
 
@@ -353,4 +359,89 @@ public void testSortFieldProjectedAwayBeforeDedup() {
             + "            LogicalTableScan(table=[[scott, EMP]])\n";
     verifyLogical(root, expectedLogical);
   }
+
+  /**
+   * Regression test for https://github.com/opensearch-project/sql/issues/5482
+   *
+   * <p>When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the
+   * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link
+   * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the
+   * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER}
+   * window form, defeating dedup pushdown to the shard.
+   *
+   * <p>This test mirrors the production HEP rule sequence in {@code CalciteToolsHelper#HEP_PROGRAM}
+   * and asserts that a {@code LogicalDedup} is produced in the presence of a user {@code where}.
+   */
+  @Test
+  public void testDedupAfterWhereProducesLogicalDedup() {
+    String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
+    RelNode raw = getRelNodeRaw(ppl);
+
+    // Sanity: the un-merged plan has both the bucket-non-null filter and the user where filter
+    // adjacent to each other above the scan — exactly the shape that triggers the bug.
+    String rawExplain = raw.explain();
+    Assert.assertTrue(
+        "Raw plan should contain the bucket-non-null filter:\n" + rawExplain,
+        rawExplain.contains("IS NOT NULL"));
+    Assert.assertTrue(
+        "Raw plan should contain the user where filter:\n" + rawExplain,
+        rawExplain.contains("1000"));
+    Assert.assertTrue(
+        "Raw plan should contain ROW_NUMBER prior to simplification:\n" + rawExplain,
+        rawExplain.contains("ROW_NUMBER"));
+
+    // Apply the production HEP rule sequence: PPLSimplifyDedupRule first, then FilterMergeRule.
+    HepProgram program =
+        new HepProgramBuilder()
+            .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
+            .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
+            .build();
+    HepPlanner planner = new HepPlanner(program);
+    planner.setRoot(raw);
+    RelNode optimized = planner.findBestExp();
+
+    String optimizedExplain = optimized.explain();
+    Assert.assertTrue(
+        "Optimized plan should contain LogicalDedup so DedupPushdownRule can match it:\n"
+            + optimizedExplain,
+        optimizedExplain.contains("LogicalDedup"));
+    Assert.assertFalse(
+        "Optimized plan should not retain ROW_NUMBER after simplification:\n" + optimizedExplain,
+        optimizedExplain.contains("ROW_NUMBER"));
+  }
+
+  /**
+   * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the buggy behavior:
+   * when {@link FilterMergeRule} runs before {@link PPLSimplifyDedupRule}, the merged conjunction
+   * defeats the simplify-rule's operand match and no {@code LogicalDedup} is produced. This guards
+   * against accidentally swapping the rule order in {@code CalciteToolsHelper#HEP_PROGRAM} back to
+   * the buggy ordering.
+   */
+  @Test
+  public void testDedupAfterWhereWithFilterMergeFirstFailsToSimplify() {
+    String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
+    RelNode raw = getRelNodeRaw(ppl);
+
+    // Buggy order: FilterMergeRule first, then PPLSimplifyDedupRule.
+    HepProgram program =
+        new HepProgramBuilder()
+            .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
+            .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
+            .build();
+    HepPlanner planner = new HepPlanner(program);
+    planner.setRoot(raw);
+    RelNode optimized = planner.findBestExp();
+
+    String optimizedExplain = optimized.explain();
+    Assert.assertFalse(
+        "Buggy order: PPLSimplifyDedupRule should not match after FilterMergeRule consolidates"
+            + " the bucket-non-null filter with the user where filter, so no LogicalDedup is"
+            + " produced:\n"
+            + optimizedExplain,
+        optimizedExplain.contains("LogicalDedup"));
+    Assert.assertTrue(
+        "Buggy order: ROW_NUMBER window form should remain since simplification was skipped:\n"
+            + optimizedExplain,
+        optimizedExplain.contains("ROW_NUMBER"));
+  }
 }

From 19f506ddbf6edb13624d1c0fb0666c8ee1854ea8 Mon Sep 17 00:00:00 2001
From: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
Date: Fri, 29 May 2026 02:02:34 +0000
Subject: [PATCH 2/7] [BugFix] Drop issue-link reference from regression-test
 JavaDoc (#5488)

Per maintainer review feedback, the regression-test JavaDoc for testDedupAfterWhereProducesLogicalDedup mentioned the originating issue URL. The remaining JavaDoc paragraphs already describe the bug shape and the rule-ordering invariant, so the explicit issue link is unnecessary noise.

Signed-off-by: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
---
 .../org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java   | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index 0be07bffcff..b130708b185 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -361,9 +361,7 @@ public void testSortFieldProjectedAwayBeforeDedup() {
   }
 
   /**
-   * Regression test for https://github.com/opensearch-project/sql/issues/5482
-   *
-   * <p>When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the
+   * When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the
    * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link
    * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the
    * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER}

From 0bf8f7c73d18eb7a0976999e44be7f389b4539e0 Mon Sep 17 00:00:00 2001
From: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
Date: Fri, 29 May 2026 21:59:58 +0000
Subject: [PATCH 3/7] [BugFix] Make dedup simplify operand order-independent
 (#5488)

Address review feedback on #5488: extend mayBeFilterFromBucketNonNull to accept the merged conjunction shape FilterMergeRule produces, so PPLSimplifyDedupRule fires regardless of whether FilterMergeRule has already merged the user where clause into the bucket-non-null filter. PPLSimplifyDedupRule.apply now splits the bottom filter into IS_NOT_NULL conjuncts on partition keys (absorbed into LogicalDedup semantics) and any remaining conjuncts (preserved as a separate filter below the new LogicalDedup), so a user predicate that was folded in is no longer dropped.

With the operand predicate order-independent, the HEP rule order is no longer a load-bearing invariant. Revert the addRuleCollection -> addRuleInstance change in CalciteToolsHelper.HEP_PROGRAM that the previous patch introduced. Replace the regression test that pinned the buggy rule order with one that asserts the user-visible contract: with where preceding dedup, a LogicalDedup is produced and the user predicate is preserved regardless of which order FilterMergeRule and PPLSimplifyDedupRule fire.

Signed-off-by: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
---
 .../plan/rule/PPLSimplifyDedupRule.java       | 45 ++++++++++++
 .../sql/calcite/utils/CalciteToolsHelper.java | 25 ++-----
 .../sql/calcite/utils/PlanUtils.java          | 38 ++++++++--
 .../sql/ppl/calcite/CalcitePPLDedupTest.java  | 70 +++++++++++--------
 4 files changed, 126 insertions(+), 52 deletions(-)

diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
index 11eabfd483c..ff00eee485b 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
@@ -6,11 +6,14 @@
 package org.opensearch.sql.calcite.plan.rule;
 
 import java.util.ArrayList;
+import java.util.HashSet;
 import java.util.List;
+import java.util.Set;
 import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import javax.annotation.Nullable;
 import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.plan.RelRule;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollations;
@@ -22,6 +25,7 @@
 import org.apache.calcite.rex.RexInputRef;
 import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexWindow;
 import org.apache.calcite.sql.SqlKind;
 import org.apache.calcite.sql.type.SqlTypeName;
@@ -115,8 +119,41 @@ protected void apply(
 
     RelCollation inputCollation = extractCollationFromWindow(windows.get(0));
 
+    // Split the bucket-non-null filter into two parts:
+    //   1) IS_NOT_NULL conjuncts on a partition key — these are the bucket-non-null guards PPL
+    //      emits as part of the dedup pattern; LogicalDedup absorbs their semantics.
+    //   2) Everything else — for example, a user `where` predicate that FilterMergeRule may
+    //      have folded into the same conjunction, or a user IS_NOT_NULL filter on a non-partition
+    //      column. These must be preserved as a separate filter below the new LogicalDedup so
+    //      user-visible behavior is unchanged regardless of whether FilterMergeRule fired.
+    Set<Integer> partitionKeyIndices = new HashSet<>();
+    for (RexNode key : dedupColumns) {
+      if (key instanceof RexInputRef ref) {
+        partitionKeyIndices.add(ref.getIndex());
+      }
+    }
+    List<RexNode> bucketNonNullConjuncts = new ArrayList<>();
+    List<RexNode> remainingConjuncts = new ArrayList<>();
+    for (RexNode conjunct : RelOptUtil.conjunctions(bucketNonNullFilter.getCondition())) {
+      if (isNotNullOnPartitionKey(conjunct, partitionKeyIndices)) {
+        bucketNonNullConjuncts.add(conjunct);
+      } else {
+        remainingConjuncts.add(conjunct);
+      }
+    }
+    // Defensive: if no IS_NOT_NULL conjunct on a partition key is present, this filter is not
+    // actually a bucket-non-null filter — bail out without transforming. The loose operand
+    // predicate may have matched on an unrelated AND that happens to contain an IS_NOT_NULL on
+    // some other ref.
+    if (bucketNonNullConjuncts.isEmpty()) {
+      return;
+    }
+
     RelBuilder relBuilder = call.builder();
     relBuilder.push(bucketNonNullFilter.getInput());
+    if (!remainingConjuncts.isEmpty()) {
+      relBuilder.filter(RexUtil.composeConjunction(relBuilder.getRexBuilder(), remainingConjuncts));
+    }
     List<Pair<RexNode, String>> targetProjections =
         projectWithWindow.getNamedProjects().stream()
             .filter(p -> !p.getKey().isA(SqlKind.ROW_NUMBER))
@@ -134,6 +171,14 @@ protected void apply(
     call.transformTo(relBuilder.build());
   }
 
+  private static boolean isNotNullOnPartitionKey(RexNode rex, Set<Integer> partitionKeyIndices) {
+    return rex instanceof RexCall rexCall
+        && rexCall.isA(SqlKind.IS_NOT_NULL)
+        && !rexCall.getOperands().isEmpty()
+        && rexCall.getOperands().get(0) instanceof RexInputRef ref
+        && partitionKeyIndices.contains(ref.getIndex());
+  }
+
   private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) {
     if (window.orderKeys.isEmpty()) {
       return null;
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
index b8049c2d159..54b9d4ffbaf 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java
@@ -36,6 +36,7 @@
 import java.sql.PreparedStatement;
 import java.sql.SQLException;
 import java.time.Instant;
+import java.util.List;
 import java.util.Properties;
 import java.util.function.Consumer;
 import org.apache.calcite.adapter.enumerable.EnumerableConvention;
@@ -58,6 +59,7 @@
 import org.apache.calcite.plan.Convention;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptPlanner;
+import org.apache.calcite.plan.RelOptRule;
 import org.apache.calcite.plan.RelOptSchema;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.plan.RelOptTable.ViewExpander;
@@ -549,25 +551,12 @@ public RelNode visit(TableScan scan) {
     }
   }
 
-  /**
-   * Try to optimize the plan by using HepPlanner.
-   *
-   * <p>Rules are added as separate {@code addRuleInstance} phases so each rule is run to a fixed
-   * point before the next phase begins. {@code PPLSimplifyDedupRule} must run before {@code
-   * FilterMergeRule}: PPL emits the dedup pattern with an {@code IS NOT NULL} bucket-non-null
-   * filter directly above the scan, and the simplify rule's operand requires that filter to remain
-   * a pure {@code IS NOT NULL} (or {@code AND} of {@code IS NOT NULL}s). When a user {@code where}
-   * clause precedes the dedup, the user's filter sits adjacent to the bucket-non-null filter; if
-   * {@code FilterMergeRule} fires first it folds the two into a single conjunction, the simplify
-   * rule's operand match fails, no {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} is
-   * produced, and dedup pushdown is silently disabled. See
-   * https://github.com/opensearch-project/sql/issues/5482.
-   */
+  /** Try to optimize the plan by using HepPlanner */
+  private static final List<RelOptRule> hepRuleList =
+      List.of(FilterMergeRule.Config.DEFAULT.toRule(), PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE);
+
   private static final HepProgram HEP_PROGRAM =
-      new HepProgramBuilder()
-          .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
-          .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
-          .build();
+      new HepProgramBuilder().addRuleCollection(hepRuleList).build();
 
   public static RelNode optimize(RelNode plan, CalcitePlanContext context) {
     Util.discard(context);
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
index 42ece7dc509..ef86fb7a1eb 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
@@ -785,17 +785,45 @@ static Mapping mapping(List<RexNode> rexNodes, RelDataType schema) {
     return Mappings.target(getSelectColumns(rexNodes), schema.getFieldCount());
   }
 
+  /**
+   * Loose structural check for the bucket-non-null filter PPL emits as part of the {@code dedup}
+   * pattern. Returns true for either:
+   *
+   * <ul>
+   *   <li>a pure {@code IS NOT NULL($ref)}, or
+   *   <li>any {@code AND} whose conjuncts contain at least one {@code IS NOT NULL($ref)}.
+   * </ul>
+   *
+   * <p>This intentionally accepts the un-merged shape ({@code IS NOT NULL} alone) and the merged
+   * shape that {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code
+   * where} clause precedes the dedup ({@code AND(IS NOT NULL($pk), <user predicate>)}). The
+   * concrete partition-key match — and the split-out of any user predicate that was folded in —
+   * happens inside {@code PPLSimplifyDedupRule#apply}, which has access to the partition keys
+   * declared by the {@code ROW_NUMBER} window above this filter. Keeping the operand predicate
+   * order-independent means the simplify rule fires regardless of whether {@code FilterMergeRule}
+   * has already run, so dedup pushdown is no longer load-bearing on HEP rule ordering.
+   *
+   * <p>Misclassification is bounded by the surrounding four-level operand chain: a project that
+   * does not contain the {@code _row_number_dedup_} column, above a filter whose condition is
+   * {@code _row_number_ <= N} and which does contain {@code _row_number_dedup_}, above a project
+   * that does contain {@code _row_number_dedup_} (with a {@code ROW_NUMBER} window function), above
+   * this filter. That signature is unique to the PPL-emitted dedup pattern, so a user-written
+   * filter such as {@code IS NOT NULL(x) AND ...} cannot accidentally match.
+   */
   static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) {
     RexNode condition = filter.getCondition();
-    return isNotNullOnRef(condition)
-        || (condition instanceof RexCall rexCall
-            && rexCall.getOperator().equals(SqlStdOperatorTable.AND)
-            && rexCall.getOperands().stream().allMatch(PlanUtils::isNotNullOnRef));
+    if (isNotNullOnRef(condition)) {
+      return true;
+    }
+    return condition instanceof RexCall rexCall
+        && rexCall.getOperator().equals(SqlStdOperatorTable.AND)
+        && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef);
   }
 
-  private static boolean isNotNullOnRef(RexNode rex) {
+  static boolean isNotNullOnRef(RexNode rex) {
     return rex instanceof RexCall rexCall
         && rexCall.isA(SqlKind.IS_NOT_NULL)
+        && !rexCall.getOperands().isEmpty()
         && rexCall.getOperands().get(0) instanceof RexInputRef;
   }
 
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index b130708b185..67200c023fc 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -362,13 +362,11 @@ public void testSortFieldProjectedAwayBeforeDedup() {
 
   /**
    * When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the
-   * bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link
-   * PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the
-   * simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER}
-   * window form, defeating dedup pushdown to the shard.
-   *
-   * <p>This test mirrors the production HEP rule sequence in {@code CalciteToolsHelper#HEP_PROGRAM}
-   * and asserts that a {@code LogicalDedup} is produced in the presence of a user {@code where}.
+   * bucket-non-null filter that PPL emits for the dedup pattern. {@link PPLSimplifyDedupRule} must
+   * fold the dedup pattern into a {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} so that
+   * {@code DedupPushdownRule} can match it; otherwise dedup falls through to the in-memory {@code
+   * ROW_NUMBER} window form, defeating dedup pushdown to the shard. The user predicate must be
+   * preserved as a separate filter below the new {@code LogicalDedup}.
    */
   @Test
   public void testDedupAfterWhereProducesLogicalDedup() {
@@ -376,27 +374,25 @@ public void testDedupAfterWhereProducesLogicalDedup() {
     RelNode raw = getRelNodeRaw(ppl);
 
     // Sanity: the un-merged plan has both the bucket-non-null filter and the user where filter
-    // adjacent to each other above the scan — exactly the shape that triggers the bug.
+    // adjacent to each other above the scan — exactly the shape that triggered the original bug.
     String rawExplain = raw.explain();
     Assert.assertTrue(
         "Raw plan should contain the bucket-non-null filter:\n" + rawExplain,
         rawExplain.contains("IS NOT NULL"));
     Assert.assertTrue(
         "Raw plan should contain the user where filter:\n" + rawExplain,
-        rawExplain.contains("1000"));
+        rawExplain.contains(">($5, 1000)"));
     Assert.assertTrue(
         "Raw plan should contain ROW_NUMBER prior to simplification:\n" + rawExplain,
         rawExplain.contains("ROW_NUMBER"));
 
-    // Apply the production HEP rule sequence: PPLSimplifyDedupRule first, then FilterMergeRule.
+    // Apply rules in the order PPLSimplifyDedupRule -> FilterMergeRule.
     HepProgram program =
         new HepProgramBuilder()
             .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
             .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
             .build();
-    HepPlanner planner = new HepPlanner(program);
-    planner.setRoot(raw);
-    RelNode optimized = planner.findBestExp();
+    RelNode optimized = runHepPlanner(raw, program);
 
     String optimizedExplain = optimized.explain();
     Assert.assertTrue(
@@ -406,40 +402,56 @@ public void testDedupAfterWhereProducesLogicalDedup() {
     Assert.assertFalse(
         "Optimized plan should not retain ROW_NUMBER after simplification:\n" + optimizedExplain,
         optimizedExplain.contains("ROW_NUMBER"));
+    Assert.assertTrue(
+        "User where predicate must be preserved as a filter below LogicalDedup:\n"
+            + optimizedExplain,
+        optimizedExplain.contains(">($5, 1000)"));
   }
 
   /**
-   * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the buggy behavior:
-   * when {@link FilterMergeRule} runs before {@link PPLSimplifyDedupRule}, the merged conjunction
-   * defeats the simplify-rule's operand match and no {@code LogicalDedup} is produced. This guards
-   * against accidentally swapping the rule order in {@code CalciteToolsHelper#HEP_PROGRAM} back to
-   * the buggy ordering.
+   * Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the user-visible
+   * contract: with a {@code where} preceding {@code dedup}, a {@code LogicalDedup} must be produced
+   * regardless of the order in which {@link FilterMergeRule} and {@link PPLSimplifyDedupRule} fire.
+   *
+   * <p>The simplify rule's bucket-non-null operand predicate is order-independent — it accepts both
+   * a pure {@code IS NOT NULL} and an {@code AND} that contains an {@code IS NOT NULL} on a
+   * partition key — so {@code FilterMergeRule} firing first no longer disables dedup pushdown. This
+   * guards against re-introducing the original regression by reordering, removing, or adding rules
+   * to {@code CalciteToolsHelper#HEP_PROGRAM}.
    */
   @Test
-  public void testDedupAfterWhereWithFilterMergeFirstFailsToSimplify() {
+  public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() {
     String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
     RelNode raw = getRelNodeRaw(ppl);
 
-    // Buggy order: FilterMergeRule first, then PPLSimplifyDedupRule.
+    // Swapped order: FilterMergeRule first, then PPLSimplifyDedupRule. This is the order that
+    // produced the original bug (#5482) before the fix.
     HepProgram program =
         new HepProgramBuilder()
             .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
             .addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
             .build();
-    HepPlanner planner = new HepPlanner(program);
-    planner.setRoot(raw);
-    RelNode optimized = planner.findBestExp();
+    RelNode optimized = runHepPlanner(raw, program);
 
     String optimizedExplain = optimized.explain();
-    Assert.assertFalse(
-        "Buggy order: PPLSimplifyDedupRule should not match after FilterMergeRule consolidates"
-            + " the bucket-non-null filter with the user where filter, so no LogicalDedup is"
-            + " produced:\n"
+    Assert.assertTrue(
+        "Even with FilterMergeRule firing first, PPLSimplifyDedupRule must still produce"
+            + " LogicalDedup so DedupPushdownRule can match it:\n"
             + optimizedExplain,
         optimizedExplain.contains("LogicalDedup"));
+    Assert.assertFalse(
+        "ROW_NUMBER window form should be removed after simplification:\n" + optimizedExplain,
+        optimizedExplain.contains("ROW_NUMBER"));
     Assert.assertTrue(
-        "Buggy order: ROW_NUMBER window form should remain since simplification was skipped:\n"
+        "User where predicate must be preserved as a filter below LogicalDedup, even when"
+            + " FilterMergeRule fired first and folded it into the bucket-non-null filter:\n"
             + optimizedExplain,
-        optimizedExplain.contains("ROW_NUMBER"));
+        optimizedExplain.contains(">($5, 1000)"));
+  }
+
+  private static RelNode runHepPlanner(RelNode root, HepProgram program) {
+    HepPlanner planner = new HepPlanner(program);
+    planner.setRoot(root);
+    return planner.findBestExp();
   }
 }

From ae86ecea3f8b1ef84e0dd6fa3b4d68dd09a4c505 Mon Sep 17 00:00:00 2001
From: Jialiang Liang <jiallian@amazon.com>
Date: Fri, 5 Jun 2026 09:37:11 -0700
Subject: [PATCH 4/7] Address review comments on #5488

Per @penghuo review:

PlanUtils.java
- Revert mayBeFilterFromBucketNonNull to the original ternary form; drop the
  early-return refactor (no behavior change, just cleaner).
- Drop the !rexCall.getOperands().isEmpty() guard before .get(0): IS NOT NULL
  is always unary in Calcite, so the check is dead.
- Trim the JavaDoc to the essentials (un-merged vs merged-AND shape; concrete
  partition-key match happens in PPLSimplifyDedupRule#apply).
- Promote isNotNullOnRef from package-private to public so the dedup rule can
  reuse it from a different package.

PPLSimplifyDedupRule.java
- isNotNullOnPartitionKey now delegates the IS NOT NULL($ref) structural check
  to PlanUtils.isNotNullOnRef and adds the partition-key index check on top.

CalciteExplainIT.java
- Add testDedupAfterWherePushDown: an end-to-end regression that runs the
  shape `... | where <pred> | dedup <field>` and asserts (a) LogicalDedup
  appears in the explain output (PPLSimplifyDedupRule fired even after
  FilterMergeRule had a chance to merge the two filters), and (b)
  EnumerableWindow does NOT appear (the in-memory ROW_NUMBER fallback the
  bug caused is gone).

Signed-off-by: Jialiang Liang <jiallian@amazon.com>
---
 .../plan/rule/PPLSimplifyDedupRule.java       | 10 ++---
 .../sql/calcite/utils/PlanUtils.java          | 40 +++++--------------
 .../sql/calcite/remote/CalciteExplainIT.java  | 23 +++++++++++
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
index ff00eee485b..676b1e9a776 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLSimplifyDedupRule.java
@@ -172,11 +172,11 @@ protected void apply(
   }
 
   private static boolean isNotNullOnPartitionKey(RexNode rex, Set<Integer> partitionKeyIndices) {
-    return rex instanceof RexCall rexCall
-        && rexCall.isA(SqlKind.IS_NOT_NULL)
-        && !rexCall.getOperands().isEmpty()
-        && rexCall.getOperands().get(0) instanceof RexInputRef ref
-        && partitionKeyIndices.contains(ref.getIndex());
+    if (!PlanUtils.isNotNullOnRef(rex)) {
+      return false;
+    }
+    RexInputRef ref = (RexInputRef) ((RexCall) rex).getOperands().get(0);
+    return partitionKeyIndices.contains(ref.getIndex());
   }
 
   private static @Nullable RelCollation extractCollationFromWindow(RexWindow window) {
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
index ef86fb7a1eb..b63c4a0237f 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java
@@ -786,44 +786,22 @@ static Mapping mapping(List<RexNode> rexNodes, RelDataType schema) {
   }
 
   /**
-   * Loose structural check for the bucket-non-null filter PPL emits as part of the {@code dedup}
-   * pattern. Returns true for either:
-   *
-   * <ul>
-   *   <li>a pure {@code IS NOT NULL($ref)}, or
-   *   <li>any {@code AND} whose conjuncts contain at least one {@code IS NOT NULL($ref)}.
-   * </ul>
-   *
-   * <p>This intentionally accepts the un-merged shape ({@code IS NOT NULL} alone) and the merged
-   * shape that {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code
-   * where} clause precedes the dedup ({@code AND(IS NOT NULL($pk), <user predicate>)}). The
-   * concrete partition-key match — and the split-out of any user predicate that was folded in —
-   * happens inside {@code PPLSimplifyDedupRule#apply}, which has access to the partition keys
-   * declared by the {@code ROW_NUMBER} window above this filter. Keeping the operand predicate
-   * order-independent means the simplify rule fires regardless of whether {@code FilterMergeRule}
-   * has already run, so dedup pushdown is no longer load-bearing on HEP rule ordering.
-   *
-   * <p>Misclassification is bounded by the surrounding four-level operand chain: a project that
-   * does not contain the {@code _row_number_dedup_} column, above a filter whose condition is
-   * {@code _row_number_ <= N} and which does contain {@code _row_number_dedup_}, above a project
-   * that does contain {@code _row_number_dedup_} (with a {@code ROW_NUMBER} window function), above
-   * this filter. That signature is unique to the PPL-emitted dedup pattern, so a user-written
-   * filter such as {@code IS NOT NULL(x) AND ...} cannot accidentally match.
+   * Accepts the un-merged {@code IS NOT NULL($ref)} shape and the merged-{@code AND} shape that
+   * {@link org.apache.calcite.rel.rules.FilterMergeRule} produces when a user {@code where}
+   * precedes the dedup. The concrete partition-key match — and the split-out of any user predicate
+   * folded into the AND — happens in {@link PPLSimplifyDedupRule#apply}.
    */
   static boolean mayBeFilterFromBucketNonNull(LogicalFilter filter) {
     RexNode condition = filter.getCondition();
-    if (isNotNullOnRef(condition)) {
-      return true;
-    }
-    return condition instanceof RexCall rexCall
-        && rexCall.getOperator().equals(SqlStdOperatorTable.AND)
-        && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef);
+    return isNotNullOnRef(condition)
+        || (condition instanceof RexCall rexCall
+            && rexCall.getOperator().equals(SqlStdOperatorTable.AND)
+            && rexCall.getOperands().stream().anyMatch(PlanUtils::isNotNullOnRef));
   }
 
-  static boolean isNotNullOnRef(RexNode rex) {
+  public static boolean isNotNullOnRef(RexNode rex) {
     return rex instanceof RexCall rexCall
         && rexCall.isA(SqlKind.IS_NOT_NULL)
-        && !rexCall.getOperands().isEmpty()
         && rexCall.getOperands().get(0) instanceof RexInputRef;
   }
 
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
index 8ad0be5cc88..c432c966d42 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
@@ -2153,6 +2153,29 @@ public void testTransposeExplain() throws IOException {
                 + "|  transpose 4 column_name='column_names'"));
   }
 
+  /**
+   * Regression test for #5482: a user {@code where} clause adjacent to dedup's bucket-non-null
+   * filter must still let {@code PPLSimplifyDedupRule} fire even after {@code FilterMergeRule} has
+   * merged the two filters into a single AND. Without the fix, the plan keeps the in-memory {@code
+   * ROW_NUMBER} window over a row-fetching scan and dedup pushdown is silently disabled.
+   */
+  @Test
+  public void testDedupAfterWherePushDown() throws IOException {
+    enabledOnlyWhenPushdownIsEnabled();
+    String result =
+        explainQueryToString(
+            "source=opensearch-sql_test_index_account | where age > 25 | dedup gender");
+    assertTrue(
+        "Expected LogicalDedup in the plan (simplify rule should fire after FilterMergeRule):\n"
+            + result,
+        result.contains("LogicalDedup"));
+    assertFalse(
+        "Unexpected EnumerableWindow in the plan — dedup fell back to the in-memory ROW_NUMBER"
+            + " form, which means PPLSimplifyDedupRule did not fire:\n"
+            + result,
+        result.contains("EnumerableWindow"));
+  }
+
   public void testComplexDedup() throws IOException {
     enabledOnlyWhenPushdownIsEnabled();
     String expected = loadExpectedPlan("explain_dedup_complex1.yaml");

From a66ad5beb440d5cdc7a87d57f3cf2205a43ec80b Mon Sep 17 00:00:00 2001
From: Jialiang Liang <jiallian@amazon.com>
Date: Fri, 5 Jun 2026 13:15:30 -0700
Subject: [PATCH 5/7] Push user where filter into scan when blocking dedup
 pushdown

PPLSimplifyDedupRule correctly produces Dedup -> Filter(user where) -> Scan
when a `where` precedes `dedup`. The Filter between Dedup and Scan blocks
DedupPushdownRule's strict Dedup -> Project -> Scan operand chain, so
Volcano falls back to PPLDedupConvertRule and the plan ends up with an
in-memory ROW_NUMBER window instead of the pushed-down composite +
top_hits aggregation.

Add a WITH_FILTER operand variant to DedupPushdownRule that matches
Dedup -> Filter -> Scan, pushes the filter into the scan, then runs the
standard apply() on the resulting Dedup -> Project -> Scan shape.

Signed-off-by: Jialiang Liang <jiallian@amazon.com>
---
 .../sql/calcite/remote/CalciteExplainIT.java  | 18 ++++----
 .../planner/rules/DedupPushdownRule.java      | 41 +++++++++++++++++++
 .../planner/rules/OpenSearchIndexRules.java   |  3 ++
 .../sql/ppl/calcite/CalcitePPLDedupTest.java  | 32 +++++++++++++++
 4 files changed, 85 insertions(+), 9 deletions(-)

diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
index c432c966d42..d25631c5ace 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteExplainIT.java
@@ -2154,10 +2154,9 @@ public void testTransposeExplain() throws IOException {
   }
 
   /**
-   * Regression test for #5482: a user {@code where} clause adjacent to dedup's bucket-non-null
-   * filter must still let {@code PPLSimplifyDedupRule} fire even after {@code FilterMergeRule} has
-   * merged the two filters into a single AND. Without the fix, the plan keeps the in-memory {@code
-   * ROW_NUMBER} window over a row-fetching scan and dedup pushdown is silently disabled.
+   * With a user {@code where} clause preceding {@code dedup}, the physical plan must push both the
+   * filter and the dedup-as-aggregation into the OpenSearch scan, not fall back to an in-memory
+   * {@code ROW_NUMBER} window above a row-fetching scan.
    */
   @Test
   public void testDedupAfterWherePushDown() throws IOException {
@@ -2166,12 +2165,13 @@ public void testDedupAfterWherePushDown() throws IOException {
         explainQueryToString(
             "source=opensearch-sql_test_index_account | where age > 25 | dedup gender");
     assertTrue(
-        "Expected LogicalDedup in the plan (simplify rule should fire after FilterMergeRule):\n"
-            + result,
-        result.contains("LogicalDedup"));
+        "Expected user where filter pushed down to the scan:\n" + result,
+        result.contains("FILTER->>($8, 25)"));
+    assertTrue(
+        "Expected dedup pushed down as AGGREGATION (composite + top_hits):\n" + result,
+        result.contains("AGGREGATION->"));
     assertFalse(
-        "Unexpected EnumerableWindow in the plan — dedup fell back to the in-memory ROW_NUMBER"
-            + " form, which means PPLSimplifyDedupRule did not fire:\n"
+        "Unexpected EnumerableWindow — dedup fell back to the in-memory ROW_NUMBER form:\n"
             + result,
         result.contains("EnumerableWindow"));
   }
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
index d37957d4a9f..7224a1ebda3 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
@@ -13,10 +13,12 @@
 import java.util.stream.IntStream;
 import javax.annotation.Nullable;
 import org.apache.calcite.plan.RelOptRuleCall;
+import org.apache.calcite.rel.AbstractRelNode;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelFieldCollation;
 import org.apache.calcite.rel.logical.LogicalAggregate;
+import org.apache.calcite.rel.logical.LogicalFilter;
 import org.apache.calcite.rel.logical.LogicalProject;
 import org.apache.calcite.rel.rules.SubstitutionRule;
 import org.apache.calcite.rex.RexInputRef;
@@ -47,6 +49,24 @@ protected DedupPushdownRule(Config config) {
   @Override
   protected void onMatchImpl(RelOptRuleCall call) {
     final LogicalDedup logicalDedup = call.rel(0);
+    if (call.rels[1] instanceof LogicalFilter filter) {
+      // Push the filter into the scan, then synthesize an identity project so the standard
+      // apply() can run on the resulting Dedup → Project → Scan shape.
+      final CalciteLogicalIndexScan scan = call.rel(2);
+      AbstractRelNode scanWithFilter = scan.pushDownFilter(filter);
+      if (scanWithFilter == null) {
+        return;
+      }
+      CalciteLogicalIndexScan newScan = (CalciteLogicalIndexScan) scanWithFilter;
+      RelBuilder relBuilder = call.builder();
+      relBuilder.push(newScan);
+      // force=true so the identity project is materialized (apply() requires a LogicalProject)
+      relBuilder.project(
+          relBuilder.fields(), newScan.getRowType().getFieldNames(), /* force= */ true);
+      LogicalProject identityProject = (LogicalProject) relBuilder.build();
+      apply(call, logicalDedup, identityProject, newScan);
+      return;
+    }
     final LogicalProject projectWithExpr = call.rel(1);
     final CalciteLogicalIndexScan scan = call.rel(2);
     apply(call, logicalDedup, projectWithExpr, scan);
@@ -226,6 +246,27 @@ public interface Config extends OpenSearchRuleConfig {
                                                 .predicate(Config::tableScanChecker)
                                                 .noInputs())));
 
+    // +- LogicalDedup
+    //    +- LogicalFilter      (e.g. a `where` predicate preserved below dedup by
+    //    +- CalciteLogicalIndexScan   PPLSimplifyDedupRule when an upstream `where` is folded
+    //                                 into the bucket-non-null filter)
+    Config WITH_FILTER =
+        ImmutableDedupPushdownRule.Config.builder()
+            .build()
+            .withDescription("Dedup-to-Aggregate-WithFilter")
+            .withOperandSupplier(
+                b0 ->
+                    b0.operand(LogicalDedup.class)
+                        .predicate(dedup -> !dedup.getKeepEmpty())
+                        .oneInput(
+                            b1 ->
+                                b1.operand(LogicalFilter.class)
+                                    .oneInput(
+                                        b2 ->
+                                            b2.operand(CalciteLogicalIndexScan.class)
+                                                .predicate(Config::tableScanChecker)
+                                                .noInputs())));
+
     /**
      * Project must be not pushed since the name of expression would lose after project pushed. E.g.
      * in query "eval new_a = a + 1 | dedup b", the "new_a" will lose.
diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java
index 0068f445ce7..3c8508cc455 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/OpenSearchIndexRules.java
@@ -50,6 +50,8 @@ public class OpenSearchIndexRules {
       SortIndexScanRule.Config.DEFAULT.toRule();
   private static final DedupPushdownRule DEDUP_PUSH_DOWN =
       DedupPushdownRule.Config.DEFAULT.toRule();
+  private static final DedupPushdownRule DEDUP_PUSH_DOWN_WITH_FILTER =
+      DedupPushdownRule.Config.WITH_FILTER.toRule();
   private static final SortProjectExprTransposeRule SORT_PROJECT_EXPR_TRANSPOSE =
       SortProjectExprTransposeRule.Config.DEFAULT.toRule();
   private static final ExpandCollationOnProjectExprRule EXPAND_COLLATION_ON_PROJECT_EXPR =
@@ -75,6 +77,7 @@ public class OpenSearchIndexRules {
           LIMIT_INDEX_SCAN,
           SORT_INDEX_SCAN,
           DEDUP_PUSH_DOWN,
+          DEDUP_PUSH_DOWN_WITH_FILTER,
           SORT_PROJECT_EXPR_TRANSPOSE,
           SORT_AGGREGATION_METRICS_RULE,
           RARE_TOP_PUSH_DOWN,
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index 67200c023fc..92f09dc5f29 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -449,6 +449,38 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() {
         optimizedExplain.contains(">($5, 1000)"));
   }
 
+  /**
+   * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production
+   * ({@code addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two
+   * sequenced-{@code addRuleInstance} tests above prove the rule fires under either ordering,
+   * but production runs both rules in a single collection instruction. This test pins that exact
+   * shape to catch any addRuleCollection-vs-addRuleInstance traversal differences.
+   */
+  @Test
+  public void testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram() {
+    String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
+    RelNode raw = getRelNodeRaw(ppl);
+
+    HepProgram program =
+        new HepProgramBuilder()
+            .addRuleCollection(
+                java.util.List.of(
+                    FilterMergeRule.Config.DEFAULT.toRule(),
+                    PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE))
+            .build();
+    RelNode optimized = runHepPlanner(raw, program);
+
+    String optimizedExplain = optimized.explain();
+    Assert.assertTrue(
+        "Production HEP_PROGRAM (addRuleCollection) must still produce LogicalDedup:\n"
+            + optimizedExplain,
+        optimizedExplain.contains("LogicalDedup"));
+    Assert.assertFalse(
+        "ROW_NUMBER window form should be removed under production HEP_PROGRAM:\n"
+            + optimizedExplain,
+        optimizedExplain.contains("ROW_NUMBER"));
+  }
+
   private static RelNode runHepPlanner(RelNode root, HepProgram program) {
     HepPlanner planner = new HepPlanner(program);
     planner.setRoot(root);

From 95e2aeb976bcbd71fd11ce3ecf7fa7d968679ece Mon Sep 17 00:00:00 2001
From: Jialiang Liang <jiallian@amazon.com>
Date: Fri, 5 Jun 2026 13:44:17 -0700
Subject: [PATCH 6/7] Bail when filter is only partially pushable

pushDownFilter returns a Filter (not a CalciteLogicalIndexScan) when the
predicate analyzer can only partially push the condition. The previous
cast would have thrown ClassCastException in that case. Use an
instanceof-pattern check so the rule bails out cleanly and leaves the
plan untouched, letting other rules handle the residual.

Also drop a stale issue-link reference from a test comment.

Signed-off-by: Jialiang Liang <jiallian@amazon.com>
---
 .../sql/opensearch/planner/rules/DedupPushdownRule.java  | 9 ++++-----
 .../opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java  | 2 --
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
index 7224a1ebda3..648888ce9b7 100644
--- a/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
+++ b/opensearch/src/main/java/org/opensearch/sql/opensearch/planner/rules/DedupPushdownRule.java
@@ -13,7 +13,6 @@
 import java.util.stream.IntStream;
 import javax.annotation.Nullable;
 import org.apache.calcite.plan.RelOptRuleCall;
-import org.apache.calcite.rel.AbstractRelNode;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelCollations;
 import org.apache.calcite.rel.RelFieldCollation;
@@ -51,13 +50,13 @@ protected void onMatchImpl(RelOptRuleCall call) {
     final LogicalDedup logicalDedup = call.rel(0);
     if (call.rels[1] instanceof LogicalFilter filter) {
       // Push the filter into the scan, then synthesize an identity project so the standard
-      // apply() can run on the resulting Dedup → Project → Scan shape.
+      // apply() can run on the resulting Dedup → Project → Scan shape. If the filter is only
+      // partially pushable, pushDownFilter returns a Filter wrapping the residual condition over
+      // the new scan; we can't strip a residual filter without breaking semantics, so bail.
       final CalciteLogicalIndexScan scan = call.rel(2);
-      AbstractRelNode scanWithFilter = scan.pushDownFilter(filter);
-      if (scanWithFilter == null) {
+      if (!(scan.pushDownFilter(filter) instanceof CalciteLogicalIndexScan newScan)) {
         return;
       }
-      CalciteLogicalIndexScan newScan = (CalciteLogicalIndexScan) scanWithFilter;
       RelBuilder relBuilder = call.builder();
       relBuilder.push(newScan);
       // force=true so the identity project is materialized (apply() requires a LogicalProject)
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index 92f09dc5f29..35acdb1f7f6 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -424,8 +424,6 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() {
     String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
     RelNode raw = getRelNodeRaw(ppl);
 
-    // Swapped order: FilterMergeRule first, then PPLSimplifyDedupRule. This is the order that
-    // produced the original bug (#5482) before the fix.
     HepProgram program =
         new HepProgramBuilder()
             .addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())

From 38fc6c3d45419cfd5ce5c865520a54659ca7147c Mon Sep 17 00:00:00 2001
From: Jialiang Liang <jiallian@amazon.com>
Date: Fri, 5 Jun 2026 15:33:30 -0700
Subject: [PATCH 7/7] Apply spotless formatting to dedup unit test

Reflowed the JavaDoc on testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram
to match Google Java Format's preferred line break, fixing the
spotlessJavaCheck violation that failed the unit-test matrix on CI.

Signed-off-by: Jialiang Liang <jiallian@amazon.com>
---
 .../sql/ppl/calcite/CalcitePPLDedupTest.java           | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
index 35acdb1f7f6..5f32c1b85bb 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java
@@ -448,11 +448,11 @@ public void testDedupAfterWhereProducesLogicalDedupRegardlessOfRuleOrder() {
   }
 
   /**
-   * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production
-   * ({@code addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two
-   * sequenced-{@code addRuleInstance} tests above prove the rule fires under either ordering,
-   * but production runs both rules in a single collection instruction. This test pins that exact
-   * shape to catch any addRuleCollection-vs-addRuleInstance traversal differences.
+   * Mirrors the exact shape of {@code CalciteToolsHelper.HEP_PROGRAM} used in production ({@code
+   * addRuleCollection(List.of(FilterMergeRule, PPLSimplifyDedupRule))}). The two sequenced-{@code
+   * addRuleInstance} tests above prove the rule fires under either ordering, but production runs
+   * both rules in a single collection instruction. This test pins that exact shape to catch any
+   * addRuleCollection-vs-addRuleInstance traversal differences.
    */
   @Test
   public void testDedupAfterWhereProducesLogicalDedupWithProductionHepProgram() {