Fix chained appendpipe planner mismatch (#5173)

songkant-aws · songkant-aws · commit e07ed449c6ee · 2026-04-08T16:54:19.000+08:00
Chained appendpipe queries can produce literal-only projections
during prepare-time field trimming. Calcite may simplify those
projections into Values, which can trigger planner mismatch
assertions during execution. Preserve the Project shape for this
narrow case in OpenSearchRelFieldTrimmer and add YAML REST
regression coverage for double and triple appendpipe.

Signed-off-by: Songkan Tang &lt;songkant@amazon.com&gt;
diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
@@ -299,15 +299,22 @@ public RelNode visitFilter(Filter node, CalcitePlanContext context) {
   @Override
   public RelNode visitAppendPipe(AppendPipe node, CalcitePlanContext context) {
     visitChildren(node, context);
-    // Use the main plan from the relBuilder stack directly instead of re-visiting
-    // the parent AST. Re-visiting causes "belongs to a different planner" assertion
-    // when multiple appendpipe commands are chained.
-    RelNode mainNode = context.relBuilder.peek();
-    context.relBuilder.push(mainNode);
-    node.getSubQuery().accept(this, context);
+    UnresolvedPlan subqueryPlan = node.getSubQuery();
+    UnresolvedPlan childNode = subqueryPlan;
+    while (childNode.getChild() != null
+        && !childNode.getChild().isEmpty()
+        && !(childNode.getChild().getFirst() instanceof Values)) {
+      if (childNode.getChild().size() > 1) {
+        throw new RuntimeException("AppendPipe doesn't support multiply children subquery.");
+      }
+      childNode = (UnresolvedPlan) childNode.getChild().getFirst();
+    }
+    childNode.attach(node.getChild().getFirst());
+
+    subqueryPlan.accept(this, context);
 
     RelNode subPipelineNode = context.relBuilder.build();
-    mainNode = context.relBuilder.build();
+    RelNode mainNode = context.relBuilder.build();
     return mergeTableAndResolveColumnConflict(mainNode, subPipelineNode, context);
   }
 
diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchRelFieldTrimmer.java b/core/src/main/java/org/opensearch/sql/calcite/utils/OpenSearchRelFieldTrimmer.java
@@ -5,21 +5,31 @@
 
 package org.opensearch.sql.calcite.utils;
 
+import java.util.ArrayList;
 import java.util.LinkedHashSet;
 import java.util.List;
 import java.util.Set;
+import org.apache.calcite.linq4j.Ord;
 import org.apache.calcite.plan.RelOptUtil;
 import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.core.Aggregate;
+import org.apache.calcite.rel.core.CorrelationId;
+import org.apache.calcite.rel.core.Project;
+import org.apache.calcite.rel.core.Values;
 import org.apache.calcite.rel.type.RelDataType;
 import org.apache.calcite.rel.type.RelDataTypeField;
+import org.apache.calcite.rex.RexLiteral;
 import org.apache.calcite.rex.RexNode;
 import org.apache.calcite.rex.RexPermuteInputsShuttle;
+import org.apache.calcite.rex.RexSubQuery;
+import org.apache.calcite.rex.RexUtil;
 import org.apache.calcite.rex.RexVisitor;
 import org.apache.calcite.sql.validate.SqlValidator;
 import org.apache.calcite.sql2rel.RelFieldTrimmer;
 import org.apache.calcite.tools.RelBuilder;
 import org.apache.calcite.util.ImmutableBitSet;
 import org.apache.calcite.util.mapping.Mapping;
+import org.apache.calcite.util.mapping.MappingType;
 import org.apache.calcite.util.mapping.Mappings;
 import org.checkerframework.checker.nullness.qual.Nullable;
 import org.opensearch.sql.calcite.plan.rel.Dedup;
@@ -30,9 +40,94 @@
  * <p>This class extends Calcite's RelFieldTrimmer to support trimming customized operators.
  */
 public class OpenSearchRelFieldTrimmer extends RelFieldTrimmer {
+  private final RelBuilder openSearchRelBuilder;
 
   public OpenSearchRelFieldTrimmer(@Nullable SqlValidator validator, RelBuilder relBuilder) {
     super(validator, relBuilder);
+    this.openSearchRelBuilder = relBuilder;
+  }
+
+  @Override
+  public TrimResult trimFields(
+      Project project, ImmutableBitSet fieldsUsed, Set<RelDataTypeField> extraFields) {
+    final RelDataType rowType = project.getRowType();
+    final int fieldCount = rowType.getFieldCount();
+    final RelNode input = project.getInput();
+
+    final Set<RelDataTypeField> inputExtraFields = new LinkedHashSet<>(extraFields);
+    RelOptUtil.InputFinder inputFinder = new RelOptUtil.InputFinder(inputExtraFields);
+    for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
+      if (fieldsUsed.get(ord.i)) {
+        ord.e.accept(inputFinder);
+      }
+    }
+
+    List<RexSubQuery> subQueries = RexUtil.SubQueryCollector.collect(project);
+    Set<CorrelationId> correlationIds = RelOptUtil.getVariablesUsed(subQueries);
+    ImmutableBitSet requiredColumns = ImmutableBitSet.of();
+    if (!correlationIds.isEmpty()) {
+      assert correlationIds.size() == 1;
+      requiredColumns = RelOptUtil.correlationColumns(correlationIds.iterator().next(), project);
+    }
+
+    ImmutableBitSet finderFields = inputFinder.build();
+    ImmutableBitSet inputFieldsUsed =
+        ImmutableBitSet.builder().addAll(requiredColumns).addAll(finderFields).build();
+
+    TrimResult trimResult = trimChild(project, input, inputFieldsUsed, inputExtraFields);
+    RelNode newInput = trimResult.left;
+    final Mapping inputMapping = trimResult.right;
+
+    if (newInput == input && fieldsUsed.cardinality() == fieldCount) {
+      return result(project, Mappings.createIdentity(fieldCount));
+    }
+
+    if (fieldsUsed.cardinality() == 0) {
+      return dummyProject(fieldCount, newInput, project);
+    }
+
+    final List<RexNode> newProjects = new ArrayList<>();
+    final RexVisitor<RexNode> shuttle;
+    if (!correlationIds.isEmpty()) {
+      assert correlationIds.size() == 1;
+      shuttle =
+          new RexPermuteInputsShuttle(inputMapping, newInput) {
+            @Override
+            public RexNode visitSubQuery(RexSubQuery subQuery) {
+              subQuery = (RexSubQuery) super.visitSubQuery(subQuery);
+              return RelOptUtil.remapCorrelatesInSuqQuery(
+                  openSearchRelBuilder.getRexBuilder(),
+                  subQuery,
+                  correlationIds.iterator().next(),
+                  newInput.getRowType(),
+                  inputMapping);
+            }
+          };
+    } else {
+      shuttle = new RexPermuteInputsShuttle(inputMapping, newInput);
+    }
+
+    final Mapping mapping =
+        Mappings.create(MappingType.INVERSE_SURJECTION, fieldCount, fieldsUsed.cardinality());
+    for (Ord<RexNode> ord : Ord.zip(project.getProjects())) {
+      if (fieldsUsed.get(ord.i)) {
+        mapping.set(ord.i, newProjects.size());
+        RexNode newProjectExpr = ord.e.accept(shuttle);
+        newProjects.add(newProjectExpr);
+      }
+    }
+
+    final RelDataType newRowType =
+        RelOptUtil.permute(project.getCluster().getTypeFactory(), rowType, mapping);
+
+    if (shouldAvoidSimplifyValues(newProjects, newInput)) {
+      return result(
+          project.copy(project.getTraitSet(), newInput, newProjects, newRowType), mapping, project);
+    }
+
+    openSearchRelBuilder.push(newInput);
+    openSearchRelBuilder.project(newProjects, newRowType.getFieldNames(), false, correlationIds);
+    return result(openSearchRelBuilder.build(), mapping, project);
   }
 
   public TrimResult trimFields(
@@ -67,4 +162,19 @@ public TrimResult trimFields(
     // needs them for its condition.
     return result(dedup.copy(newInput, newDedupFields), inputMapping);
   }
+
+  private boolean shouldAvoidSimplifyValues(List<RexNode> projects, RelNode input) {
+    return projects.stream().allMatch(RexLiteral.class::isInstance) && isFixedRowCount(input);
+  }
+
+  private boolean isFixedRowCount(RelNode input) {
+    if (input instanceof Values) {
+      return true;
+    }
+    if (input instanceof Aggregate aggregate) {
+      return aggregate.getGroupSet().isEmpty()
+          && aggregate.getGroupType() == Aggregate.Group.SIMPLE;
+    }
+    return false;
+  }
 }
diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5173.yml
@@ -0,0 +1,99 @@
+setup:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: true
+
+  - do:
+      indices.create:
+        index: issue5173
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+          mappings:
+            properties:
+              gender:
+                type: keyword
+              age:
+                type: integer
+
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - '{"index": {"_index": "issue5173", "_id": "1"}}'
+          - '{"gender": "F", "age": 10}'
+          - '{"index": {"_index": "issue5173", "_id": "2"}}'
+          - '{"gender": "F", "age": 20}'
+          - '{"index": {"_index": "issue5173", "_id": "3"}}'
+          - '{"gender": "M", "age": 30}'
+          - '{"index": {"_index": "issue5173", "_id": "4"}}'
+          - '{"gender": "M", "age": 40}'
+
+---
+teardown:
+  - do:
+      indices.delete:
+        index: issue5173
+        ignore_unavailable: true
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: false
+
+---
+"Issue 5173: double appendpipe with different aggregations should succeed":
+  - skip:
+      features:
+        - headers
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ]"
+
+  - match: { total: 4 }
+  - match:
+      schema:
+        - { name: sum_age, type: bigint }
+        - { name: gender, type: string }
+        - { name: avg_sum_age, type: double }
+        - { name: max_sum_age, type: bigint }
+  - match:
+      datarows:
+        - [ 30, "F", null, null ]
+        - [ 70, "M", null, null ]
+        - [ null, null, 50.0, null ]
+        - [ null, null, null, 70 ]
+
+---
+"Issue 5173: triple appendpipe with different aggregations should succeed":
+  - skip:
+      features:
+        - headers
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: "source=issue5173 | stats sum(age) as sum_age by gender | appendpipe [ stats avg(sum_age) as avg_sum_age ] | appendpipe [ stats max(sum_age) as max_sum_age ] | appendpipe [ stats min(sum_age) as min_sum_age ]"
+
+  - match: { total: 5 }
+  - match:
+      schema:
+        - { name: sum_age, type: bigint }
+        - { name: gender, type: string }
+        - { name: avg_sum_age, type: double }
+        - { name: max_sum_age, type: bigint }
+        - { name: min_sum_age, type: bigint }
+  - match:
+      datarows:
+        - [ 30, "F", null, null, null ]
+        - [ 70, "M", null, null, null ]
+        - [ null, null, 50.0, null, null ]
+        - [ null, null, null, 70, null ]
+        - [ null, null, null, null, 30 ]