Skip to content

Commit 8bfb649

Browse files
committed
[BugFix] Restore dedup pushdown when combined with WHERE clause (#5482)
Run PPLSimplifyDedupRule before FilterMergeRule in the HEP optimizer so the bucket-non-null filter PPL emits for dedup is matched as-is. With the previous order, an upstream user where filter sat adjacent to the bucket-non-null filter; FilterMergeRule fired first and merged them into a conjunction that no longer satisfied PPLSimplifyDedupRule's operand predicate, defeating dedup pushdown to the shard. Use sequential addRuleInstance phases for explicit ordering rather than addRuleCollection, which is documented as non-deterministic in firing order. Adds two regression tests in CalcitePPLDedupTest: one that asserts LogicalDedup is produced under the fixed order, and one that pins the buggy behavior under the swapped order. Signed-off-by: ryan-gh-bot <ryan-gh-bot@users.noreply.github.com>
1 parent 9300574 commit 8bfb649

3 files changed

Lines changed: 123 additions & 7 deletions

File tree

core/src/main/java/org/opensearch/sql/calcite/utils/CalciteToolsHelper.java

Lines changed: 18 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,6 @@
3636
import java.sql.PreparedStatement;
3737
import java.sql.SQLException;
3838
import java.time.Instant;
39-
import java.util.List;
4039
import java.util.Properties;
4140
import java.util.function.Consumer;
4241
import org.apache.calcite.adapter.enumerable.EnumerableConvention;
@@ -59,7 +58,6 @@
5958
import org.apache.calcite.plan.Convention;
6059
import org.apache.calcite.plan.RelOptCluster;
6160
import org.apache.calcite.plan.RelOptPlanner;
62-
import org.apache.calcite.plan.RelOptRule;
6361
import org.apache.calcite.plan.RelOptSchema;
6462
import org.apache.calcite.plan.RelOptTable;
6563
import org.apache.calcite.plan.RelOptTable.ViewExpander;
@@ -551,12 +549,25 @@ public RelNode visit(TableScan scan) {
551549
}
552550
}
553551

554-
/** Try to optimize the plan by using HepPlanner */
555-
private static final List<RelOptRule> hepRuleList =
556-
List.of(FilterMergeRule.Config.DEFAULT.toRule(), PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE);
557-
552+
/**
553+
* Try to optimize the plan by using HepPlanner.
554+
*
555+
* <p>Rules are added as separate {@code addRuleInstance} phases so each rule is run to a fixed
556+
* point before the next phase begins. {@code PPLSimplifyDedupRule} must run before {@code
557+
* FilterMergeRule}: PPL emits the dedup pattern with an {@code IS NOT NULL} bucket-non-null
558+
* filter directly above the scan, and the simplify rule's operand requires that filter to remain
559+
* a pure {@code IS NOT NULL} (or {@code AND} of {@code IS NOT NULL}s). When a user {@code where}
560+
* clause precedes the dedup, the user's filter sits adjacent to the bucket-non-null filter; if
561+
* {@code FilterMergeRule} fires first it folds the two into a single conjunction, the simplify
562+
* rule's operand match fails, no {@link org.opensearch.sql.calcite.plan.rel.LogicalDedup} is
563+
* produced, and dedup pushdown is silently disabled. See
564+
* https://github.com/opensearch-project/sql/issues/5482.
565+
*/
558566
private static final HepProgram HEP_PROGRAM =
559-
new HepProgramBuilder().addRuleCollection(hepRuleList).build();
567+
new HepProgramBuilder()
568+
.addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
569+
.addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
570+
.build();
560571

561572
public static RelNode optimize(RelNode plan, CalcitePlanContext context) {
562573
Util.discard(context);

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLAbstractTest.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,20 @@ public RelNode getRelNode(String ppl) {
110110
return root;
111111
}
112112

113+
/**
114+
* Get the root RelNode of the given PPL query without merging adjacent filters. Useful for
115+
* regression tests that need to exercise rule ordering against the un-merged shape that PPL
116+
* actually emits to the production HEP optimizer.
117+
*/
118+
public RelNode getRelNodeRaw(String ppl) {
119+
CalcitePlanContext context = createBuilderContext();
120+
Query query = (Query) plan(pplParser, ppl);
121+
planTransformer.analyze(query.getPlan(), context);
122+
RelNode root = context.relBuilder.build();
123+
System.out.println(root.explain());
124+
return root;
125+
}
126+
113127
private RelNode mergeAdjacentFilters(RelNode relNode) {
114128
HepProgram program =
115129
new HepProgramBuilder().addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule()).build();

ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLDedupTest.java

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,15 @@
55

66
package org.opensearch.sql.ppl.calcite;
77

8+
import org.apache.calcite.plan.hep.HepPlanner;
9+
import org.apache.calcite.plan.hep.HepProgram;
10+
import org.apache.calcite.plan.hep.HepProgramBuilder;
811
import org.apache.calcite.rel.RelNode;
12+
import org.apache.calcite.rel.rules.FilterMergeRule;
913
import org.apache.calcite.test.CalciteAssert;
14+
import org.junit.Assert;
1015
import org.junit.Test;
16+
import org.opensearch.sql.calcite.plan.rule.PPLSimplifyDedupRule;
1117

1218
public class CalcitePPLDedupTest extends CalcitePPLAbstractTest {
1319

@@ -353,4 +359,89 @@ public void testSortFieldProjectedAwayBeforeDedup() {
353359
+ " LogicalTableScan(table=[[scott, EMP]])\n";
354360
verifyLogical(root, expectedLogical);
355361
}
362+
363+
/**
364+
* Regression test for https://github.com/opensearch-project/sql/issues/5482
365+
*
366+
* <p>When a user {@code where} precedes {@code dedup}, the user filter sits adjacent to the
367+
* bucket-non-null filter that PPL emits for the dedup pattern. The HEP optimizer must run {@link
368+
* PPLSimplifyDedupRule} before {@link FilterMergeRule}; otherwise the merged condition breaks the
369+
* simplify-rule's operand match and dedup falls through to the in-memory {@code ROW_NUMBER}
370+
* window form, defeating dedup pushdown to the shard.
371+
*
372+
* <p>This test mirrors the production HEP rule sequence in {@code CalciteToolsHelper#HEP_PROGRAM}
373+
* and asserts that a {@code LogicalDedup} is produced in the presence of a user {@code where}.
374+
*/
375+
@Test
376+
public void testDedupAfterWhereProducesLogicalDedup() {
377+
String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
378+
RelNode raw = getRelNodeRaw(ppl);
379+
380+
// Sanity: the un-merged plan has both the bucket-non-null filter and the user where filter
381+
// adjacent to each other above the scan — exactly the shape that triggers the bug.
382+
String rawExplain = raw.explain();
383+
Assert.assertTrue(
384+
"Raw plan should contain the bucket-non-null filter:\n" + rawExplain,
385+
rawExplain.contains("IS NOT NULL"));
386+
Assert.assertTrue(
387+
"Raw plan should contain the user where filter:\n" + rawExplain,
388+
rawExplain.contains("1000"));
389+
Assert.assertTrue(
390+
"Raw plan should contain ROW_NUMBER prior to simplification:\n" + rawExplain,
391+
rawExplain.contains("ROW_NUMBER"));
392+
393+
// Apply the production HEP rule sequence: PPLSimplifyDedupRule first, then FilterMergeRule.
394+
HepProgram program =
395+
new HepProgramBuilder()
396+
.addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
397+
.addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
398+
.build();
399+
HepPlanner planner = new HepPlanner(program);
400+
planner.setRoot(raw);
401+
RelNode optimized = planner.findBestExp();
402+
403+
String optimizedExplain = optimized.explain();
404+
Assert.assertTrue(
405+
"Optimized plan should contain LogicalDedup so DedupPushdownRule can match it:\n"
406+
+ optimizedExplain,
407+
optimizedExplain.contains("LogicalDedup"));
408+
Assert.assertFalse(
409+
"Optimized plan should not retain ROW_NUMBER after simplification:\n" + optimizedExplain,
410+
optimizedExplain.contains("ROW_NUMBER"));
411+
}
412+
413+
/**
414+
* Companion to {@link #testDedupAfterWhereProducesLogicalDedup} that pins the buggy behavior:
415+
* when {@link FilterMergeRule} runs before {@link PPLSimplifyDedupRule}, the merged conjunction
416+
* defeats the simplify-rule's operand match and no {@code LogicalDedup} is produced. This guards
417+
* against accidentally swapping the rule order in {@code CalciteToolsHelper#HEP_PROGRAM} back to
418+
* the buggy ordering.
419+
*/
420+
@Test
421+
public void testDedupAfterWhereWithFilterMergeFirstFailsToSimplify() {
422+
String ppl = "source=EMP | where SAL > 1000 | dedup DEPTNO";
423+
RelNode raw = getRelNodeRaw(ppl);
424+
425+
// Buggy order: FilterMergeRule first, then PPLSimplifyDedupRule.
426+
HepProgram program =
427+
new HepProgramBuilder()
428+
.addRuleInstance(FilterMergeRule.Config.DEFAULT.toRule())
429+
.addRuleInstance(PPLSimplifyDedupRule.DEDUP_SIMPLIFY_RULE)
430+
.build();
431+
HepPlanner planner = new HepPlanner(program);
432+
planner.setRoot(raw);
433+
RelNode optimized = planner.findBestExp();
434+
435+
String optimizedExplain = optimized.explain();
436+
Assert.assertFalse(
437+
"Buggy order: PPLSimplifyDedupRule should not match after FilterMergeRule consolidates"
438+
+ " the bucket-non-null filter with the user where filter, so no LogicalDedup is"
439+
+ " produced:\n"
440+
+ optimizedExplain,
441+
optimizedExplain.contains("LogicalDedup"));
442+
Assert.assertTrue(
443+
"Buggy order: ROW_NUMBER window form should remain since simplification was skipped:\n"
444+
+ optimizedExplain,
445+
optimizedExplain.contains("ROW_NUMBER"));
446+
}
356447
}

0 commit comments

Comments
 (0)