opensearch-project
diff --git a/‎core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java‎
Lines changed: 7 additions & 60 deletions b/‎core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java‎
Lines changed: 7 additions & 60 deletions
diff --git a/‎core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java‎
Lines changed: 34 additions & 6 deletions b/‎core/src/main/java/org/opensearch/sql/calcite/plan/rel/Dedup.java‎
Lines changed: 34 additions & 6 deletions
diff --git a/‎core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java‎
Lines changed: 51 additions & 5 deletions b/‎core/src/main/java/org/opensearch/sql/calcite/plan/rel/LogicalDedup.java‎
Lines changed: 51 additions & 5 deletions
@@ -22,6 +22,7 @@
 import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_SUBSEARCH;
 import static org.opensearch.sql.calcite.utils.PlanUtils.getRelation;
 import static org.opensearch.sql.calcite.utils.PlanUtils.getRexCall;
+import static org.opensearch.sql.calcite.utils.PlanUtils.stripInputSort;
 import static org.opensearch.sql.calcite.utils.PlanUtils.transformPlanToAttachChild;
 import static org.opensearch.sql.utils.SystemIndexUtils.DATASOURCES_TABLE_NAME;
 
@@ -48,16 +49,12 @@
 import org.apache.calcite.adapter.enumerable.RexToLixTranslator;
 import org.apache.calcite.plan.RelOptTable;
 import org.apache.calcite.plan.ViewExpanders;
-import org.apache.calcite.rel.BiRel;
 import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelHomogeneousShuttle;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.core.Aggregate;
 import org.apache.calcite.rel.core.JoinRelType;
-import org.apache.calcite.rel.core.SetOp;
 import org.apache.calcite.rel.core.Sort;
-import org.apache.calcite.rel.core.Uncollect;
-import org.apache.calcite.rel.logical.LogicalProject;
 import org.apache.calcite.rel.logical.LogicalSort;
 import org.apache.calcite.rel.logical.LogicalValues;
 import org.apache.calcite.rel.type.RelDataType;
@@ -766,57 +763,6 @@ public RelNode visitHead(Head node, CalcitePlanContext context) {
     return context.relBuilder.peek();
   }
 
-  /**
-   * Backtrack through the RelNode tree to find the first Sort node with non-empty collation. Stops
-   * at blocking operators that break ordering:
-   *
-   * <ul>
-   *   <li>Aggregate - aggregation destroys input ordering
-   *   <li>BiRel - covers Join, Correlate, and other binary relations
-   *   <li>SetOp - covers Union, Intersect, Except
-   *   <li>Uncollect - unnesting operation that may change ordering
-   *   <li>Project with window functions (RexOver) - ordering determined by window's ORDER BY
-   * </ul>
-   *
-   * @param node the starting RelNode to backtrack from
-   * @return the collation found, or null if no sort or blocking operator encountered
-   */
-  private RelCollation backtrackForCollation(RelNode node) {
-    while (node != null) {
-      // Check for blocking operators that destroy collation
-      // BiRel covers Join, Correlate, and other binary relations
-      // SetOp covers Union, Intersect, Except
-      // Uncollect unnests arrays/multisets which may change ordering
-      if (node instanceof Aggregate
-          || node instanceof BiRel
-          || node instanceof SetOp
-          || node instanceof Uncollect) {
-        return null;
-      }
-
-      // Project with window functions has ordering determined by the window's ORDER BY clause
-      // We should not destroy its output order by inserting a reversed sort
-      if (node instanceof LogicalProject && ((LogicalProject) node).containsOver()) {
-        return null;
-      }
-
-      // Check for Sort node with collation
-      if (node instanceof Sort) {
-        Sort sort = (Sort) node;
-        if (sort.getCollation() != null && !sort.getCollation().getFieldCollations().isEmpty()) {
-          return sort.getCollation();
-        }
-      }
-
-      // Continue to child node
-      if (node.getInputs().isEmpty()) {
-        break;
-      }
-      node = node.getInput(0);
-    }
-    return null;
-  }
-
   /**
    * Insert a reversed sort node after finding the original sort in the tree. This rebuilds the tree
    * with the reversed sort inserted right after the original sort.
@@ -899,7 +845,7 @@ public RelNode visitReverse(
     } else {
       // Collation not found on current node - try backtracking
       RelNode currentNode = context.relBuilder.peek();
-      RelCollation backtrackCollation = backtrackForCollation(currentNode);
+      RelCollation backtrackCollation = PlanUtils.findInputCollation(currentNode);
 
       if (backtrackCollation != null && !backtrackCollation.getFieldCollations().isEmpty()) {
         // Found collation through backtracking - rebuild tree with reversed sort
@@ -1765,7 +1711,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) {
                 : duplicatedFieldNames.stream()
                     .map(a -> (RexNode) context.relBuilder.field(a))
                     .toList();
-        buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication);
+        buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null);
       }
       // add LogicalSystemLimit after dedup
       addSysLimitForJoinSubsearch(context);
@@ -1823,7 +1769,7 @@ public RelNode visitJoin(Join node, CalcitePlanContext context) {
         List<RexNode> dedupeFields =
             getRightColumnsInJoinCriteria(context.relBuilder, joinCondition);
 
-        buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication);
+        buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, null);
       }
       // add LogicalSystemLimit after dedup
       addSysLimitForJoinSubsearch(context);
@@ -1999,10 +1945,11 @@ public RelNode visitDedupe(Dedupe node, CalcitePlanContext context) {
     // Columns to deduplicate
     List<RexNode> dedupeFields =
         node.getFields().stream().map(f -> rexVisitor.analyze(f, context)).toList();
+    RelCollation inputCollation = stripInputSort(context.relBuilder);
     if (keepEmpty) {
-      buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication);
+      buildDedupOrNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation);
     } else {
-      buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication);
+      buildDedupNotNull(context.relBuilder, dedupeFields, allowedDuplication, inputCollation);
     }
     return context.relBuilder.peek();
   }
 
@@ -6,10 +6,12 @@
 package org.opensearch.sql.calcite.plan.rel;
 
 import java.util.List;
+import javax.annotation.Nullable;
 import lombok.Getter;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rel.RelWriter;
 import org.apache.calcite.rel.SingleRel;
@@ -23,16 +25,33 @@ public abstract class Dedup extends SingleRel {
   final Integer allowedDuplication;
   final Boolean keepEmpty;
   final Boolean consecutive;
+  final @Nullable RelCollation inputCollation;
+
+  /**
+   * Field names of the row type that {@link #inputCollation} was captured against. Used as a
+   * name-based anchor so callers can resolve the collation's stale indices after a planner rule has
+   * narrowed or replaced the dedup's input (typically a scan absorbing a narrowing project).
+   *
+   * <p>Renames are handled by Calcite's own {@code Project.getMapping} propagation when a {@code
+   * Project} sits between dedup's old and new input — see {@code Dedup.copy}. This name list is
+   * only the fallback for cases where the replacement is not a {@code Project} (e.g. a scan that
+   * swaps in a narrower row type without a {@code Project} RelNode). Scans don't rename, so name
+   * equality is a stable identifier for that specific fallback.
+   *
+   * <p>{@code null} iff {@link #inputCollation} is {@code null}.
+   */
+  final @Nullable List<String> inputCollationFieldNames;
 
-  /** */
   protected Dedup(
       RelOptCluster cluster,
       RelTraitSet traitSet,
       RelNode input,
       List<RexNode> dedupeFields,
       Integer allowedDuplication,
       Boolean keepEmpty,
-      Boolean consecutive) {
+      Boolean consecutive,
+      @Nullable RelCollation inputCollation,
+      @Nullable List<String> inputCollationFieldNames) {
     super(cluster, traitSet, input);
     if (allowedDuplication <= 0) {
       throw new IllegalArgumentException("Number of duplicate events must be greater than 0");
@@ -44,6 +63,8 @@ protected Dedup(
     this.allowedDuplication = allowedDuplication;
     this.keepEmpty = keepEmpty;
     this.consecutive = consecutive;
+    this.inputCollation = inputCollation;
+    this.inputCollationFieldNames = inputCollationFieldNames;
   }
 
   @Override
@@ -54,7 +75,9 @@ public final RelNode copy(RelTraitSet traitSet, List<RelNode> inputs) {
         this.dedupeFields,
         this.allowedDuplication,
         this.keepEmpty,
-        this.consecutive);
+        this.consecutive,
+        this.inputCollation,
+        this.inputCollationFieldNames);
   }
 
   public abstract Dedup copy(
@@ -63,7 +86,9 @@ public abstract Dedup copy(
       List<RexNode> dedupeFields,
       Integer allowedDuplication,
       Boolean keepEmpty,
-      Boolean consecutive);
+      Boolean consecutive,
+      @Nullable RelCollation inputCollation,
+      @Nullable List<String> inputCollationFieldNames);
 
   public Dedup copy(RelNode input, List<RexNode> dedupeFields) {
     return this.copy(
@@ -72,7 +97,9 @@ public Dedup copy(RelNode input, List<RexNode> dedupeFields) {
         dedupeFields,
         this.allowedDuplication,
         this.keepEmpty,
-        this.consecutive);
+        this.consecutive,
+        this.inputCollation,
+        this.inputCollationFieldNames);
   }
 
   @Override
@@ -81,7 +108,8 @@ public RelWriter explainTerms(RelWriter pw) {
         .item("dedup_fields", dedupeFields)
         .item("allowed_dedup", allowedDuplication)
         .item("keepEmpty", keepEmpty)
-        .item("consecutive", consecutive);
+        .item("consecutive", consecutive)
+        .itemIf("inputCollation", inputCollation, inputCollation != null);
   }
 
   @Override
 
@@ -8,10 +8,12 @@
 import static org.opensearch.sql.calcite.plan.rule.PPLDedupConvertRule.DEDUP_CONVERT_RULE;
 
 import java.util.List;
+import javax.annotation.Nullable;
 import org.apache.calcite.plan.Convention;
 import org.apache.calcite.plan.RelOptCluster;
 import org.apache.calcite.plan.RelOptPlanner;
 import org.apache.calcite.plan.RelTraitSet;
+import org.apache.calcite.rel.RelCollation;
 import org.apache.calcite.rel.RelNode;
 import org.apache.calcite.rex.RexNode;
 
@@ -24,8 +26,19 @@ protected LogicalDedup(
       List<RexNode> dedupeFields,
       Integer allowedDuplication,
       Boolean keepEmpty,
-      Boolean consecutive) {
-    super(cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive);
+      Boolean consecutive,
+      @Nullable RelCollation inputCollation,
+      @Nullable List<String> inputCollationFieldNames) {
+    super(
+        cluster,
+        traitSet,
+        input,
+        dedupeFields,
+        allowedDuplication,
+        keepEmpty,
+        consecutive,
+        inputCollation,
+        inputCollationFieldNames);
   }
 
   @Override
@@ -35,10 +48,20 @@ public Dedup copy(
       List<RexNode> dedupeFields,
       Integer allowedDuplication,
       Boolean keepEmpty,
-      Boolean consecutive) {
+      Boolean consecutive,
+      @Nullable RelCollation inputCollation,
+      @Nullable List<String> inputCollationFieldNames) {
     assert traitSet.containsIfApplicable(Convention.NONE);
     return new LogicalDedup(
-        getCluster(), traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive);
+        getCluster(),
+        traitSet,
+        input,
+        dedupeFields,
+        allowedDuplication,
+        keepEmpty,
+        consecutive,
+        inputCollation,
+        inputCollationFieldNames);
   }
 
   public static LogicalDedup create(
@@ -47,10 +70,33 @@ public static LogicalDedup create(
       Integer allowedDuplication,
       Boolean keepEmpty,
       Boolean consecutive) {
+    return create(input, dedupeFields, allowedDuplication, keepEmpty, consecutive, null);
+  }
+
+  public static LogicalDedup create(
+      RelNode input,
+      List<RexNode> dedupeFields,
+      Integer allowedDuplication,
+      Boolean keepEmpty,
+      Boolean consecutive,
+      @Nullable RelCollation inputCollation) {
+    // Record the field names from the current input's row type so callers that encounter a stale
+    // collation (after a planner rule has swapped in a different, non-Project-derived input) can
+    // still resolve the sort keys to positions in the new input by name. See
+    // Dedup.inputCollationFieldNames.
+    List<String> fieldNames = inputCollation == null ? null : input.getRowType().getFieldNames();
     final RelOptCluster cluster = input.getCluster();
     RelTraitSet traitSet = cluster.traitSetOf(Convention.NONE);
     return new LogicalDedup(
-        cluster, traitSet, input, dedupeFields, allowedDuplication, keepEmpty, consecutive);
+        cluster,
+        traitSet,
+        input,
+        dedupeFields,
+        allowedDuplication,
+        keepEmpty,
+        consecutive,
+        inputCollation,
+        fieldNames);
   }
 
   @Override