From ac9092ced34cc51ea02c817118b83ce11373eb83 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Fri, 10 Apr 2026 15:09:31 +0800
Subject: [PATCH 1/7] [BugFix] Fix bin/chart NPE with null values (#5174)

Bin bucket functions declared non-nullable VARCHAR return type, causing
Calcite to optimize away IS NOT NULL filters and allowing null group
keys to reach TreeMap which crashes in compareTo.

Fix: declare return type as nullable using FORCE_NULLABLE and add
nullsLast to chart command sorts as a defensive measure.

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .../sql/calcite/CalciteRelNodeVisitor.java    |   5 +-
 .../udf/binning/MinspanBucketFunction.java    |   3 +-
 .../udf/binning/RangeBucketFunction.java      |   3 +-
 .../udf/binning/SpanBucketFunction.java       |   3 +-
 .../calcite/remote/CalciteBinChartNullIT.java |  63 +++++++
 .../rest-api-spec/test/issues/5174.yml        |  81 +++++++++
 .../ppl/calcite/CalcitePPLChartNullTest.java  | 170 ++++++++++++++++++
 7 files changed, 323 insertions(+), 5 deletions(-)
 create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java
 create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
 create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java

diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
index e4e036da3a6..b165f5ccda0 100644
--- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
+++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java
@@ -3205,7 +3205,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
         || node.getColumnSplit() == null
         || Objects.equals(config.limit, 0)) {
       // The output of chart is expected to be ordered by row split names
-      relBuilder.sort(relBuilder.field(0));
+      relBuilder.sort(relBuilder.nullsLast(relBuilder.field(0)));
       return relBuilder.peek();
     }
 
@@ -3275,7 +3275,8 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) {
                 relBuilder.field(2))
             .as(aggFieldName));
     // The output of chart is expected to be ordered by row and column split names
-    relBuilder.sort(relBuilder.field(0), relBuilder.field(1));
+    relBuilder.sort(
+        relBuilder.nullsLast(relBuilder.field(0)), relBuilder.nullsLast(relBuilder.field(1)));
     return relBuilder.peek();
   }
 
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java
index 11e1a33afbd..fcc7d1a4640 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java
@@ -14,6 +14,7 @@
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeTransforms;
 import org.opensearch.sql.calcite.utils.PPLOperandTypes;
 import org.opensearch.sql.expression.function.ImplementorUDF;
 import org.opensearch.sql.expression.function.UDFOperandMetadata;
@@ -43,7 +44,7 @@ public MinspanBucketFunction() {
 
   @Override
   public SqlReturnTypeInference getReturnTypeInference() {
-    return ReturnTypes.VARCHAR_2000;
+    return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE);
   }
 
   @Override
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java
index a8f2625b20f..e0b10803ea4 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java
@@ -14,6 +14,7 @@
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeTransforms;
 import org.opensearch.sql.calcite.utils.PPLOperandTypes;
 import org.opensearch.sql.expression.function.ImplementorUDF;
 import org.opensearch.sql.expression.function.UDFOperandMetadata;
@@ -47,7 +48,7 @@ public RangeBucketFunction() {
 
   @Override
   public SqlReturnTypeInference getReturnTypeInference() {
-    return ReturnTypes.VARCHAR_2000;
+    return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE);
   }
 
   @Override
diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java
index 6970e485525..8610eb8ee9c 100644
--- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java
+++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java
@@ -14,6 +14,7 @@
 import org.apache.calcite.rex.RexCall;
 import org.apache.calcite.sql.type.ReturnTypes;
 import org.apache.calcite.sql.type.SqlReturnTypeInference;
+import org.apache.calcite.sql.type.SqlTypeTransforms;
 import org.opensearch.sql.calcite.utils.PPLOperandTypes;
 import org.opensearch.sql.expression.function.ImplementorUDF;
 import org.opensearch.sql.expression.function.UDFOperandMetadata;
@@ -41,7 +42,7 @@ public SpanBucketFunction() {
 
   @Override
   public SqlReturnTypeInference getReturnTypeInference() {
-    return ReturnTypes.VARCHAR_2000;
+    return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE);
   }
 
   @Override
diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java
new file mode 100644
index 00000000000..83b3807ff14
--- /dev/null
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java
@@ -0,0 +1,63 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.calcite.remote;
+
+import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES;
+import static org.opensearch.sql.util.MatcherUtils.rows;
+import static org.opensearch.sql.util.MatcherUtils.schema;
+import static org.opensearch.sql.util.MatcherUtils.verifyDataRows;
+import static org.opensearch.sql.util.MatcherUtils.verifySchema;
+
+import java.io.IOException;
+import org.json.JSONObject;
+import org.junit.jupiter.api.Test;
+import org.opensearch.sql.ppl.PPLIntegTestCase;
+
+/** Integration test for GitHub issue #5174: bin/chart NPE with null values. */
+public class CalciteBinChartNullIT extends PPLIntegTestCase {
+  @Override
+  public void init() throws Exception {
+    super.init();
+    enableCalcite();
+    loadIndex(Index.BANK_WITH_NULL_VALUES);
+  }
+
+  @Test
+  public void testBinThenChartWithNullValuesShouldNotCauseNPE() throws IOException {
+    // bin balance span=10000 produces null for documents without a balance field.
+    // chart count() over bal_bin by gender should handle these null bin values safely.
+    JSONObject result =
+        executeQuery(
+            String.format(
+                "source=%s | bin balance span=10000 as bal_bin"
+                    + " | chart count() over bal_bin by gender",
+                TEST_INDEX_BANK_WITH_NULL_VALUES));
+    verifySchema(
+        result,
+        schema("bal_bin", "string"),
+        schema("gender", "string"),
+        schema("count()", "bigint"));
+    // Should only contain rows for non-null balance values (4 records with balance)
+    verifyDataRows(
+        result,
+        rows("0-10000", "M", 1),
+        rows("30000-40000", "F", 1),
+        rows("30000-40000", "M", 1),
+        rows("40000-50000", "F", 1));
+  }
+
+  @Test
+  public void testBinThenChartSingleGroupWithNullValues() throws IOException {
+    // chart with only row split (no column split): the simpler sort path
+    JSONObject result =
+        executeQuery(
+            String.format(
+                "source=%s | bin balance span=10000 as bal_bin | chart count() over bal_bin",
+                TEST_INDEX_BANK_WITH_NULL_VALUES));
+    verifySchema(result, schema("bal_bin", "string"), schema("count()", "bigint"));
+    verifyDataRows(result, rows("0-10000", 1), rows("30000-40000", 2), rows("40000-50000", 1));
+  }
+}
diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
new file mode 100644
index 00000000000..90ff3846384
--- /dev/null
+++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
@@ -0,0 +1,81 @@
+setup:
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: true
+
+  - do:
+      indices.create:
+        index: issue5174
+        body:
+          settings:
+            number_of_shards: 1
+            number_of_replicas: 0
+          mappings:
+            properties:
+              count:
+                type: integer
+              category:
+                type: keyword
+              subcategory:
+                type: keyword
+              value:
+                type: double
+              ts:
+                type: date
+
+  - do:
+      bulk:
+        refresh: true
+        body:
+          - '{"index": {"_index": "issue5174", "_id": "1"}}'
+          - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}'
+          - '{"index": {"_index": "issue5174", "_id": "2"}}'
+          - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}'
+          - '{"index": {"_index": "issue5174", "_id": "3"}}'
+          - '{"count": 10, "category": "B", "subcategory": "X", "value": 100.0, "ts": "2024-01-03"}'
+          - '{"index": {"_index": "issue5174", "_id": "4"}}'
+          - '{"count": null, "category": "B", "subcategory": "Y", "value": null, "ts": "2024-01-04"}'
+
+---
+teardown:
+  - do:
+      indices.delete:
+        index: issue5174
+        ignore_unavailable: true
+  - do:
+      query.settings:
+        body:
+          transient:
+            plugins.calcite.enabled: false
+
+---
+"Issue 5174: bin then chart with null values should not cause NPE":
+  - skip:
+      features:
+        - headers
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category
+
+  - match: { total: 3 }
+  - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] }
+
+---
+"Issue 5174: bin then chart with single group and null values":
+  - skip:
+      features:
+        - headers
+  - do:
+      headers:
+        Content-Type: 'application/json'
+      ppl:
+        body:
+          query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin
+
+  - match: { total: 2 }
+  - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java
new file mode 100644
index 00000000000..5b16b9d656e
--- /dev/null
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java
@@ -0,0 +1,170 @@
+/*
+ * Copyright OpenSearch Contributors
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+package org.opensearch.sql.ppl.calcite;
+
+import com.google.common.collect.ImmutableList;
+import java.util.List;
+import lombok.RequiredArgsConstructor;
+import org.apache.calcite.DataContext;
+import org.apache.calcite.config.CalciteConnectionConfig;
+import org.apache.calcite.linq4j.Enumerable;
+import org.apache.calcite.linq4j.Linq4j;
+import org.apache.calcite.plan.RelTraitDef;
+import org.apache.calcite.rel.RelCollations;
+import org.apache.calcite.rel.RelNode;
+import org.apache.calcite.rel.type.RelDataType;
+import org.apache.calcite.rel.type.RelDataTypeFactory;
+import org.apache.calcite.rel.type.RelProtoDataType;
+import org.apache.calcite.schema.ScannableTable;
+import org.apache.calcite.schema.Schema;
+import org.apache.calcite.schema.SchemaPlus;
+import org.apache.calcite.schema.Statistic;
+import org.apache.calcite.schema.Statistics;
+import org.apache.calcite.sql.SqlCall;
+import org.apache.calcite.sql.SqlNode;
+import org.apache.calcite.sql.parser.SqlParser;
+import org.apache.calcite.sql.type.SqlTypeName;
+import org.apache.calcite.test.CalciteAssert;
+import org.apache.calcite.tools.Frameworks;
+import org.apache.calcite.tools.Programs;
+import org.checkerframework.checker.nullness.qual.Nullable;
+import org.junit.Test;
+
+/**
+ * Unit test for GitHub issue #5174: bin/chart NPE with null values.
+ *
+ * <p>Verifies that the chart command generates correct logical plans when the input contains null
+ * values from binning, and that the sort operations properly handle nulls.
+ */
+public class CalcitePPLChartNullTest extends CalcitePPLAbstractTest {
+
+  public CalcitePPLChartNullTest() {
+    super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL);
+  }
+
+  @Override
+  protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) {
+    final SchemaPlus rootSchema = Frameworks.createRootSchema(true);
+    final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs);
+    // Table with null values matching the issue's bounty-numbers schema
+    ImmutableList<Object[]> rows =
+        ImmutableList.of(
+            new Object[] {1, "A", "X", 10.5},
+            new Object[] {2, "A", "Y", 20.3},
+            new Object[] {10, "B", "X", 100.0},
+            new Object[] {null, "B", "Y", null});
+    schema.add("bounty_numbers", new BountyNumbersTable(rows));
+    return Frameworks.newConfigBuilder()
+        .parserConfig(SqlParser.Config.DEFAULT)
+        .defaultSchema(schema)
+        .traitDefs((List<RelTraitDef>) null)
+        .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2));
+  }
+
+  @Test
+  public void testBinThenChartWithNullValuesLogicalPlan() {
+    String ppl =
+        "source=bounty_numbers | bin value span=50 as val_bin"
+            + " | chart count() over val_bin by category";
+    RelNode root = getRelNode(ppl);
+    // Verify the SQL plan contains WHERE val_bin IS NOT NULL to filter null bin values,
+    // and NULLS LAST in ORDER BY for proper null handling in sort
+    String expectedSparkSql =
+        "SELECT `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN"
+            + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END"
+            + " `category`, SUM(`t2`.`count()`) `count()`\n"
+            + "FROM (SELECT `val_bin`, `category`, COUNT(*) `count()`\n"
+            + "FROM (SELECT `count`, `category`, `subcategory`, `value`,"
+            + " SPAN_BUCKET(`value`, 50) `val_bin`\n"
+            + "FROM `scott`.`bounty_numbers`) `t`\n"
+            + "WHERE `val_bin` IS NOT NULL\n"
+            + "GROUP BY `val_bin`, `category`) `t2`\n"
+            + "LEFT JOIN (SELECT `category`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER"
+            + " (ORDER BY SUM(`count()`) DESC) `_row_number_chart_`\n"
+            + "FROM (SELECT `category`, COUNT(*) `count()`\n"
+            + "FROM (SELECT `count`, `category`, `subcategory`, `value`,"
+            + " SPAN_BUCKET(`value`, 50) `val_bin`\n"
+            + "FROM `scott`.`bounty_numbers`) `t3`\n"
+            + "WHERE `val_bin` IS NOT NULL\n"
+            + "GROUP BY `val_bin`, `category`) `t7`\n"
+            + "WHERE `category` IS NOT NULL\n"
+            + "GROUP BY `category`) `t10` ON `t2`.`category` = `t10`.`category`\n"
+            + "GROUP BY `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN"
+            + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END\n"
+            + "ORDER BY `t2`.`val_bin` NULLS LAST, 2 NULLS LAST";
+    verifyPPLToSparkSQL(root, expectedSparkSql);
+  }
+
+  @Test
+  public void testBinThenChartSingleGroupWithNullValuesLogicalPlan() {
+    String ppl =
+        "source=bounty_numbers | bin value span=50 as val_bin | chart count() over val_bin";
+    RelNode root = getRelNode(ppl);
+    // Verify null bin values are filtered and sort uses NULLS LAST
+    String expectedSparkSql =
+        "SELECT `val_bin`, COUNT(*) `count()`\n"
+            + "FROM (SELECT `count`, `category`, `subcategory`, `value`,"
+            + " SPAN_BUCKET(`value`, 50) `val_bin`\n"
+            + "FROM `scott`.`bounty_numbers`) `t`\n"
+            + "WHERE `val_bin` IS NOT NULL\n"
+            + "GROUP BY `val_bin`\n"
+            + "ORDER BY `val_bin` NULLS LAST";
+    verifyPPLToSparkSQL(root, expectedSparkSql);
+  }
+
+  @RequiredArgsConstructor
+  public static class BountyNumbersTable implements ScannableTable {
+    private final ImmutableList<Object[]> rows;
+
+    protected final RelProtoDataType protoRowType =
+        factory ->
+            factory
+                .builder()
+                .add("count", SqlTypeName.INTEGER)
+                .nullable(true)
+                .add("category", SqlTypeName.VARCHAR)
+                .nullable(true)
+                .add("subcategory", SqlTypeName.VARCHAR)
+                .nullable(true)
+                .add("value", SqlTypeName.DOUBLE)
+                .nullable(true)
+                .build();
+
+    @Override
+    public Enumerable<@Nullable Object[]> scan(DataContext root) {
+      return Linq4j.asEnumerable(rows);
+    }
+
+    @Override
+    public RelDataType getRowType(RelDataTypeFactory typeFactory) {
+      return protoRowType.apply(typeFactory);
+    }
+
+    @Override
+    public Statistic getStatistic() {
+      return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0));
+    }
+
+    @Override
+    public Schema.TableType getJdbcTableType() {
+      return Schema.TableType.TABLE;
+    }
+
+    @Override
+    public boolean isRolledUp(String column) {
+      return false;
+    }
+
+    @Override
+    public boolean rolledUpColumnValidInsideAgg(
+        String column,
+        SqlCall call,
+        @Nullable SqlNode parent,
+        @Nullable CalciteConnectionConfig config) {
+      return false;
+    }
+  }
+}

From 7edbc2064f71df476f716c3d985b7529c9e23da4 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Fri, 10 Apr 2026 15:17:39 +0800
Subject: [PATCH 2/7] Remove stale /dedupe section from CLAUDE_GUIDE.md

The dedupe command and scripts/comment-on-duplicates.sh were removed
from main, but CLAUDE_GUIDE.md still referenced them, causing
linkchecker CI failure.

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 CLAUDE_GUIDE.md | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/CLAUDE_GUIDE.md b/CLAUDE_GUIDE.md
index f5eff489d65..034b403f5c6 100644
--- a/CLAUDE_GUIDE.md
+++ b/CLAUDE_GUIDE.md
@@ -30,25 +30,3 @@ Fix a PPL bug end-to-end or follow up on an existing PR.
 
 **Related files:** [`.claude/harness/ppl-bugfix-harness.md`](.claude/harness/ppl-bugfix-harness.md)
 
----
-
-## `/dedupe`
-
-Find duplicate GitHub issues for a given issue.
-
-**Usage:**
-
-```
-/dedupe 1234
-```
-
-**What it does:**
-
-1. Reads the target issue
-2. Runs 3+ parallel search strategies to find potential duplicates (only older issues)
-3. Verifies candidates by reading each one
-4. Posts a structured comment on the issue listing 1-3 confirmed duplicates (if any)
-
-**Skips:** closed issues, broad feedback issues, issues already checked
-
-**Related files:** [`scripts/comment-on-duplicates.sh`](scripts/comment-on-duplicates.sh)

From 1f7053da86bcfec97b6141a60a03d50b6a830d51 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Fri, 10 Apr 2026 15:32:25 +0800
Subject: [PATCH 3/7] Fix incorrect YAML test assertion and add datarows
 validation

- Fix total count from 3 to 2 in YAML REST test (bin+chart with
  category produces 2 groups, not 3)
- Add datarows assertions to both YAML test cases for row-level
  validation
- Correct table statistics row count in unit test

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .../yamlRestTest/resources/rest-api-spec/test/issues/5174.yml | 4 +++-
 .../opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java   | 2 +-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
index 90ff3846384..c2f861b8194 100644
--- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
+++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml
@@ -62,8 +62,9 @@ teardown:
         body:
           query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category
 
-  - match: { total: 3 }
+  - match: { total: 2 }
   - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] }
+  - match: { datarows: [ [ "0-50", "A", 2 ], [ "100-150", "B", 1 ] ] }
 
 ---
 "Issue 5174: bin then chart with single group and null values":
@@ -79,3 +80,4 @@ teardown:
 
   - match: { total: 2 }
   - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] }
+  - match: { datarows: [ [ "0-50", 2 ], [ "100-150", 1 ] ] }
diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java
index 5b16b9d656e..6d62dbb2cde 100644
--- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java
+++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java
@@ -145,7 +145,7 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) {
 
     @Override
     public Statistic getStatistic() {
-      return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0));
+      return Statistics.of(4d, ImmutableList.of(), RelCollations.createSingleton(0));
     }
 
     @Override

From d41e598a726a44246025416df6c1d508bf878efc Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Fri, 10 Apr 2026 15:41:15 +0800
Subject: [PATCH 4/7] Update ppl-bugfix-reference: prefer datarows match over
 length check

YAML REST tests should assert actual row content, not just count.
Learned from #5174 where total-only assertion missed incorrect values.

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .claude/harness/ppl-bugfix-reference.md | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/.claude/harness/ppl-bugfix-reference.md b/.claude/harness/ppl-bugfix-reference.md
index 615f91fb75f..9437ab88e1a 100644
--- a/.claude/harness/ppl-bugfix-reference.md
+++ b/.claude/harness/ppl-bugfix-reference.md
@@ -109,9 +109,13 @@ teardown:
       headers: { Content-Type: 'application/json' }
       ppl: { body: { query: "source=test_issue_<ISSUE> | <your PPL>" } }
   - match: { total: <expected> }
-  - length: { datarows: <expected> }
+  - match: { datarows: [ [ <row1_val1>, <row1_val2> ], [ <row2_val1>, <row2_val2> ] ] }
 ```
 
+> **Always include `datarows` assertions** — verifying only `total` and `schema` will miss
+> wrong values. Count the expected output groups carefully (e.g., for `chart ... by <col>`,
+> count distinct (row_split, col_split) groups after null filtering, not the number of input rows).
+
 ---
 
 ## Symptom → Fix Path

From ccb3283b0a39bfae56b9ce7357d5d41493d1bced Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Fri, 10 Apr 2026 16:01:41 +0800
Subject: [PATCH 5/7] Improve harness: worktree tracking, completion gate, pwd
 reporting
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Add pwd/branch reporting at harness start so caller knows the
  working directory
- Add Completion Gate with git status check to prevent uncommitted
  changes from being left behind
- Skill Step 2B reuses existing worktree if PR branch is already
  checked out, instead of always creating a new one
- Step 3 requires main agent to record worktree→PR mapping for
  directing future operations to the correct directory

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .claude/commands/ppl-bugfix.md         | 37 ++++++++++++++++++++++++++
 .claude/harness/ppl-bugfix-followup.md | 24 ++++++++++++++++-
 .claude/harness/ppl-bugfix-harness.md  | 18 +++++++++++++
 3 files changed, 78 insertions(+), 1 deletion(-)

diff --git a/.claude/commands/ppl-bugfix.md b/.claude/commands/ppl-bugfix.md
index e12cefce9c8..f2ac6d6807d 100644
--- a/.claude/commands/ppl-bugfix.md
+++ b/.claude/commands/ppl-bugfix.md
@@ -83,6 +83,33 @@ Agent(
 
 ### 2B: Follow-up
 
+Before dispatching, check if an existing worktree already has the PR branch checked out:
+
+```bash
+# List worktrees and find one on the PR branch
+for wt in .claude/worktrees/agent-*/; do
+  branch=$(git -C "$wt" branch --show-current 2>/dev/null)
+  if [ "$branch" = "<pr_branch>" ]; then
+    echo "REUSE: $wt (branch: $branch)"
+  fi
+done
+```
+
+**If existing worktree found**: Do NOT use `isolation: "worktree"`. Pass the worktree path in the prompt so the subagent works there directly.
+
+```
+Agent(
+  mode: "<resolved_mode>",
+  name: "bugfix-<issue_number>",
+  description: "PPL bugfix #<issue_number> followup",
+  prompt: "cd <worktree_path> first, then read .claude/harness/ppl-bugfix-followup.md and follow it.
+           PR: <pr_number> (<pr_url>), Issue: #<issue_number>
+           Working directory: <worktree_path>"
+)
+```
+
+**If no existing worktree**: Create a new one.
+
 ```
 Agent(
   mode: "<resolved_mode>",
@@ -100,6 +127,16 @@ After all subagents complete, report a summary for each:
 - Classification, fix summary, PR URL, worktree path and branch, items needing human attention (2A)
 - What was addressed, current PR state, whether another round is needed (2B)
 
+**Always include the worktree→PR mapping** from the subagent's output, e.g.:
+
+```
+Worktree: /path/to/.claude/worktrees/agent-xxxx
+Branch: bugfix-1234
+PR: #5678
+```
+
+**Important**: After reporting, the main agent must remember this mapping. When the user later asks to make changes to the PR (e.g., "commit this to PR #5678"), operate in the worktree directory — not the main session directory.
+
 ## Subagent Lifecycle
 
 Subagents are task-scoped. They complete and release context — they cannot poll for events.
diff --git a/.claude/harness/ppl-bugfix-followup.md b/.claude/harness/ppl-bugfix-followup.md
index b9eb18d6296..4b31ab6029e 100644
--- a/.claude/harness/ppl-bugfix-followup.md
+++ b/.claude/harness/ppl-bugfix-followup.md
@@ -6,9 +6,18 @@
 
 ---
 
+## Report Working Directory
+
+```bash
+echo "Worktree: $(pwd)"
+echo "Branch: $(git branch --show-current)"
+```
+
+Include this in your output so the caller knows where changes are happening.
+
 ## Reconstruct Context
 
-The follow-up agent runs in a fresh worktree. First checkout the PR branch, then load state:
+First checkout the PR branch, then load state:
 
 ```bash
 # Checkout the PR branch in this worktree
@@ -101,3 +110,16 @@ For each comment addressed (bot or human):
 - **Did the follow-up workflow itself miss this signal?** → Update this file
 
 If any improvement is needed, make the edit and include it in the same commit.
+
+## Completion Gate
+
+Before reporting "done":
+
+1. Run `git status --porcelain` — if any uncommitted changes remain, commit and push them. This includes harness edits from Retrospective.
+2. Report in your final output:
+
+```
+Worktree: <absolute path>
+Branch: <branch name>
+PR: <pr_number>
+```
diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md
index 5845385b550..0819d71fc5b 100644
--- a/.claude/harness/ppl-bugfix-harness.md
+++ b/.claude/harness/ppl-bugfix-harness.md
@@ -2,6 +2,15 @@
 
 ## Phase 0: Triage
 
+### 0.0 Report Working Directory
+
+```bash
+echo "Worktree: $(pwd)"
+echo "Branch: $(git branch --show-current)"
+```
+
+Include this in your output so the caller knows where changes are happening.
+
 ### 0.1 Load & Reproduce
 
 ```bash
@@ -126,6 +135,8 @@ EOF
 
 ## Completion Gate
 
+Run `git status --porcelain` — if any uncommitted changes remain, commit and push them before proceeding.
+
 Do NOT report "done" until every item below is checked. List each in your final report:
 
 - [ ] **Unit tests**: New test class or methods
@@ -149,3 +160,10 @@ If any item is blocked, report which and why.
 - [ ] New pattern? Add to Case Index.
 
 Include harness improvements in the same PR.
+
+Report in your final output:
+```
+Worktree: <absolute path>
+Branch: <branch name>
+PR: <pr_number>
+```

From b423fcff7c66a341919bafb96e8736c65a59e7e2 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 13 Apr 2026 10:42:49 +0800
Subject: [PATCH 6/7] Add CalciteBinChartNullIT to CalciteNoPushdownIT suite

bin+chart with null values should also be tested with pushdown disabled.

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .../java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java     | 1 +
 1 file changed, 1 insertion(+)

diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java
index a56715845ab..f9db71f9251 100644
--- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java
+++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java
@@ -25,6 +25,7 @@
   CalciteAddColTotalsCommandIT.class,
   CalciteConvertCommandIT.class,
   CalciteArrayFunctionIT.class,
+  CalciteBinChartNullIT.class,
   CalciteBinCommandIT.class,
   CalciteConvertTZFunctionIT.class,
   CalciteCsvFormatIT.class,

From 2c288d3339482512cdc80c0771a6b883873d92d6 Mon Sep 17 00:00:00 2001
From: Heng Qian <qianheng@amazon.com>
Date: Mon, 13 Apr 2026 10:51:18 +0800
Subject: [PATCH 7/7] Prefer adding tests to existing IT suites over creating
 new ones

Note in harness Phase 2 that new IT classes should be added to
CalciteNoPushdownIT suite.

Signed-off-by: Heng Qian <qianheng@amazon.com>
---
 .claude/harness/ppl-bugfix-harness.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md
index 0819d71fc5b..54c2103fc9b 100644
--- a/.claude/harness/ppl-bugfix-harness.md
+++ b/.claude/harness/ppl-bugfix-harness.md
@@ -62,7 +62,7 @@ Consult `.claude/harness/ppl-bugfix-reference.md` for test templates.
 Required deliverables:
 - Failing test reproducing the bug (written BEFORE the fix)
 - Unit tests covering happy path and edge cases
-- Integration test (`*IT.java` extending `CalcitePPLIT`)
+- Integration test — add to an existing `*IT.java` when possible; if creating a new one, add it to `CalciteNoPushdownIT`
 - YAML REST test at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/<ISSUE>.yml`
 
 ---