From ac9092ced34cc51ea02c817118b83ce11373eb83 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 10 Apr 2026 15:09:31 +0800 Subject: [PATCH 1/7] [BugFix] Fix bin/chart NPE with null values (#5174) Bin bucket functions declared non-nullable VARCHAR return type, causing Calcite to optimize away IS NOT NULL filters and allowing null group keys to reach TreeMap which crashes in compareTo. Fix: declare return type as nullable using FORCE_NULLABLE and add nullsLast to chart command sorts as a defensive measure. Signed-off-by: Heng Qian --- .../sql/calcite/CalciteRelNodeVisitor.java | 5 +- .../udf/binning/MinspanBucketFunction.java | 3 +- .../udf/binning/RangeBucketFunction.java | 3 +- .../udf/binning/SpanBucketFunction.java | 3 +- .../calcite/remote/CalciteBinChartNullIT.java | 63 +++++++ .../rest-api-spec/test/issues/5174.yml | 81 +++++++++ .../ppl/calcite/CalcitePPLChartNullTest.java | 170 ++++++++++++++++++ 7 files changed, 323 insertions(+), 5 deletions(-) create mode 100644 integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java create mode 100644 integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml create mode 100644 ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index e4e036da3a6..b165f5ccda0 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -3205,7 +3205,7 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { || node.getColumnSplit() == null || Objects.equals(config.limit, 0)) { // The output of chart is expected to be ordered by row split names - relBuilder.sort(relBuilder.field(0)); + relBuilder.sort(relBuilder.nullsLast(relBuilder.field(0))); return relBuilder.peek(); } @@ -3275,7 +3275,8 @@ public RelNode visitChart(Chart node, CalcitePlanContext context) { relBuilder.field(2)) .as(aggFieldName)); // The output of chart is expected to be ordered by row and column split names - relBuilder.sort(relBuilder.field(0), relBuilder.field(1)); + relBuilder.sort( + relBuilder.nullsLast(relBuilder.field(0)), relBuilder.nullsLast(relBuilder.field(1))); return relBuilder.peek(); } diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java index 11e1a33afbd..fcc7d1a4640 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/MinspanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -43,7 +44,7 @@ public MinspanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java index a8f2625b20f..e0b10803ea4 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/RangeBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -47,7 +48,7 @@ public RangeBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java index 6970e485525..8610eb8ee9c 100644 --- a/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java +++ b/core/src/main/java/org/opensearch/sql/expression/function/udf/binning/SpanBucketFunction.java @@ -14,6 +14,7 @@ import org.apache.calcite.rex.RexCall; import org.apache.calcite.sql.type.ReturnTypes; import org.apache.calcite.sql.type.SqlReturnTypeInference; +import org.apache.calcite.sql.type.SqlTypeTransforms; import org.opensearch.sql.calcite.utils.PPLOperandTypes; import org.opensearch.sql.expression.function.ImplementorUDF; import org.opensearch.sql.expression.function.UDFOperandMetadata; @@ -41,7 +42,7 @@ public SpanBucketFunction() { @Override public SqlReturnTypeInference getReturnTypeInference() { - return ReturnTypes.VARCHAR_2000; + return ReturnTypes.VARCHAR_2000.andThen(SqlTypeTransforms.FORCE_NULLABLE); } @Override diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java new file mode 100644 index 00000000000..83b3807ff14 --- /dev/null +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteBinChartNullIT.java @@ -0,0 +1,63 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.calcite.remote; + +import static org.opensearch.sql.legacy.TestsConstants.TEST_INDEX_BANK_WITH_NULL_VALUES; +import static org.opensearch.sql.util.MatcherUtils.rows; +import static org.opensearch.sql.util.MatcherUtils.schema; +import static org.opensearch.sql.util.MatcherUtils.verifyDataRows; +import static org.opensearch.sql.util.MatcherUtils.verifySchema; + +import java.io.IOException; +import org.json.JSONObject; +import org.junit.jupiter.api.Test; +import org.opensearch.sql.ppl.PPLIntegTestCase; + +/** Integration test for GitHub issue #5174: bin/chart NPE with null values. */ +public class CalciteBinChartNullIT extends PPLIntegTestCase { + @Override + public void init() throws Exception { + super.init(); + enableCalcite(); + loadIndex(Index.BANK_WITH_NULL_VALUES); + } + + @Test + public void testBinThenChartWithNullValuesShouldNotCauseNPE() throws IOException { + // bin balance span=10000 produces null for documents without a balance field. + // chart count() over bal_bin by gender should handle these null bin values safely. + JSONObject result = + executeQuery( + String.format( + "source=%s | bin balance span=10000 as bal_bin" + + " | chart count() over bal_bin by gender", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema( + result, + schema("bal_bin", "string"), + schema("gender", "string"), + schema("count()", "bigint")); + // Should only contain rows for non-null balance values (4 records with balance) + verifyDataRows( + result, + rows("0-10000", "M", 1), + rows("30000-40000", "F", 1), + rows("30000-40000", "M", 1), + rows("40000-50000", "F", 1)); + } + + @Test + public void testBinThenChartSingleGroupWithNullValues() throws IOException { + // chart with only row split (no column split): the simpler sort path + JSONObject result = + executeQuery( + String.format( + "source=%s | bin balance span=10000 as bal_bin | chart count() over bal_bin", + TEST_INDEX_BANK_WITH_NULL_VALUES)); + verifySchema(result, schema("bal_bin", "string"), schema("count()", "bigint")); + verifyDataRows(result, rows("0-10000", 1), rows("30000-40000", 2), rows("40000-50000", 1)); + } +} diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml new file mode 100644 index 00000000000..90ff3846384 --- /dev/null +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml @@ -0,0 +1,81 @@ +setup: + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: true + + - do: + indices.create: + index: issue5174 + body: + settings: + number_of_shards: 1 + number_of_replicas: 0 + mappings: + properties: + count: + type: integer + category: + type: keyword + subcategory: + type: keyword + value: + type: double + ts: + type: date + + - do: + bulk: + refresh: true + body: + - '{"index": {"_index": "issue5174", "_id": "1"}}' + - '{"count": 1, "category": "A", "subcategory": "X", "value": 10.5, "ts": "2024-01-01"}' + - '{"index": {"_index": "issue5174", "_id": "2"}}' + - '{"count": 2, "category": "A", "subcategory": "Y", "value": 20.3, "ts": "2024-01-02"}' + - '{"index": {"_index": "issue5174", "_id": "3"}}' + - '{"count": 10, "category": "B", "subcategory": "X", "value": 100.0, "ts": "2024-01-03"}' + - '{"index": {"_index": "issue5174", "_id": "4"}}' + - '{"count": null, "category": "B", "subcategory": "Y", "value": null, "ts": "2024-01-04"}' + +--- +teardown: + - do: + indices.delete: + index: issue5174 + ignore_unavailable: true + - do: + query.settings: + body: + transient: + plugins.calcite.enabled: false + +--- +"Issue 5174: bin then chart with null values should not cause NPE": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category + + - match: { total: 3 } + - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] } + +--- +"Issue 5174: bin then chart with single group and null values": + - skip: + features: + - headers + - do: + headers: + Content-Type: 'application/json' + ppl: + body: + query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin + + - match: { total: 2 } + - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java new file mode 100644 index 00000000000..5b16b9d656e --- /dev/null +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java @@ -0,0 +1,170 @@ +/* + * Copyright OpenSearch Contributors + * SPDX-License-Identifier: Apache-2.0 + */ + +package org.opensearch.sql.ppl.calcite; + +import com.google.common.collect.ImmutableList; +import java.util.List; +import lombok.RequiredArgsConstructor; +import org.apache.calcite.DataContext; +import org.apache.calcite.config.CalciteConnectionConfig; +import org.apache.calcite.linq4j.Enumerable; +import org.apache.calcite.linq4j.Linq4j; +import org.apache.calcite.plan.RelTraitDef; +import org.apache.calcite.rel.RelCollations; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rel.type.RelDataTypeFactory; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.schema.ScannableTable; +import org.apache.calcite.schema.Schema; +import org.apache.calcite.schema.SchemaPlus; +import org.apache.calcite.schema.Statistic; +import org.apache.calcite.schema.Statistics; +import org.apache.calcite.sql.SqlCall; +import org.apache.calcite.sql.SqlNode; +import org.apache.calcite.sql.parser.SqlParser; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.test.CalciteAssert; +import org.apache.calcite.tools.Frameworks; +import org.apache.calcite.tools.Programs; +import org.checkerframework.checker.nullness.qual.Nullable; +import org.junit.Test; + +/** + * Unit test for GitHub issue #5174: bin/chart NPE with null values. + * + *

Verifies that the chart command generates correct logical plans when the input contains null + * values from binning, and that the sort operations properly handle nulls. + */ +public class CalcitePPLChartNullTest extends CalcitePPLAbstractTest { + + public CalcitePPLChartNullTest() { + super(CalciteAssert.SchemaSpec.SCOTT_WITH_TEMPORAL); + } + + @Override + protected Frameworks.ConfigBuilder config(CalciteAssert.SchemaSpec... schemaSpecs) { + final SchemaPlus rootSchema = Frameworks.createRootSchema(true); + final SchemaPlus schema = CalciteAssert.addSchema(rootSchema, schemaSpecs); + // Table with null values matching the issue's bounty-numbers schema + ImmutableList rows = + ImmutableList.of( + new Object[] {1, "A", "X", 10.5}, + new Object[] {2, "A", "Y", 20.3}, + new Object[] {10, "B", "X", 100.0}, + new Object[] {null, "B", "Y", null}); + schema.add("bounty_numbers", new BountyNumbersTable(rows)); + return Frameworks.newConfigBuilder() + .parserConfig(SqlParser.Config.DEFAULT) + .defaultSchema(schema) + .traitDefs((List) null) + .programs(Programs.heuristicJoinOrder(Programs.RULE_SET, true, 2)); + } + + @Test + public void testBinThenChartWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin" + + " | chart count() over val_bin by category"; + RelNode root = getRelNode(ppl); + // Verify the SQL plan contains WHERE val_bin IS NOT NULL to filter null bin values, + // and NULLS LAST in ORDER BY for proper null handling in sort + String expectedSparkSql = + "SELECT `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END" + + " `category`, SUM(`t2`.`count()`) `count()`\n" + + "FROM (SELECT `val_bin`, `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t2`\n" + + "LEFT JOIN (SELECT `category`, SUM(`count()`) `__grand_total__`, ROW_NUMBER() OVER" + + " (ORDER BY SUM(`count()`) DESC) `_row_number_chart_`\n" + + "FROM (SELECT `category`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t3`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`, `category`) `t7`\n" + + "WHERE `category` IS NOT NULL\n" + + "GROUP BY `category`) `t10` ON `t2`.`category` = `t10`.`category`\n" + + "GROUP BY `t2`.`val_bin`, CASE WHEN `t2`.`category` IS NULL THEN 'NULL' WHEN" + + " `t10`.`_row_number_chart_` <= 10 THEN `t2`.`category` ELSE 'OTHER' END\n" + + "ORDER BY `t2`.`val_bin` NULLS LAST, 2 NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @Test + public void testBinThenChartSingleGroupWithNullValuesLogicalPlan() { + String ppl = + "source=bounty_numbers | bin value span=50 as val_bin | chart count() over val_bin"; + RelNode root = getRelNode(ppl); + // Verify null bin values are filtered and sort uses NULLS LAST + String expectedSparkSql = + "SELECT `val_bin`, COUNT(*) `count()`\n" + + "FROM (SELECT `count`, `category`, `subcategory`, `value`," + + " SPAN_BUCKET(`value`, 50) `val_bin`\n" + + "FROM `scott`.`bounty_numbers`) `t`\n" + + "WHERE `val_bin` IS NOT NULL\n" + + "GROUP BY `val_bin`\n" + + "ORDER BY `val_bin` NULLS LAST"; + verifyPPLToSparkSQL(root, expectedSparkSql); + } + + @RequiredArgsConstructor + public static class BountyNumbersTable implements ScannableTable { + private final ImmutableList rows; + + protected final RelProtoDataType protoRowType = + factory -> + factory + .builder() + .add("count", SqlTypeName.INTEGER) + .nullable(true) + .add("category", SqlTypeName.VARCHAR) + .nullable(true) + .add("subcategory", SqlTypeName.VARCHAR) + .nullable(true) + .add("value", SqlTypeName.DOUBLE) + .nullable(true) + .build(); + + @Override + public Enumerable<@Nullable Object[]> scan(DataContext root) { + return Linq4j.asEnumerable(rows); + } + + @Override + public RelDataType getRowType(RelDataTypeFactory typeFactory) { + return protoRowType.apply(typeFactory); + } + + @Override + public Statistic getStatistic() { + return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + } + + @Override + public Schema.TableType getJdbcTableType() { + return Schema.TableType.TABLE; + } + + @Override + public boolean isRolledUp(String column) { + return false; + } + + @Override + public boolean rolledUpColumnValidInsideAgg( + String column, + SqlCall call, + @Nullable SqlNode parent, + @Nullable CalciteConnectionConfig config) { + return false; + } + } +} From 7edbc2064f71df476f716c3d985b7529c9e23da4 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 10 Apr 2026 15:17:39 +0800 Subject: [PATCH 2/7] Remove stale /dedupe section from CLAUDE_GUIDE.md The dedupe command and scripts/comment-on-duplicates.sh were removed from main, but CLAUDE_GUIDE.md still referenced them, causing linkchecker CI failure. Signed-off-by: Heng Qian --- CLAUDE_GUIDE.md | 22 ---------------------- 1 file changed, 22 deletions(-) diff --git a/CLAUDE_GUIDE.md b/CLAUDE_GUIDE.md index f5eff489d65..034b403f5c6 100644 --- a/CLAUDE_GUIDE.md +++ b/CLAUDE_GUIDE.md @@ -30,25 +30,3 @@ Fix a PPL bug end-to-end or follow up on an existing PR. **Related files:** [`.claude/harness/ppl-bugfix-harness.md`](.claude/harness/ppl-bugfix-harness.md) ---- - -## `/dedupe` - -Find duplicate GitHub issues for a given issue. - -**Usage:** - -``` -/dedupe 1234 -``` - -**What it does:** - -1. Reads the target issue -2. Runs 3+ parallel search strategies to find potential duplicates (only older issues) -3. Verifies candidates by reading each one -4. Posts a structured comment on the issue listing 1-3 confirmed duplicates (if any) - -**Skips:** closed issues, broad feedback issues, issues already checked - -**Related files:** [`scripts/comment-on-duplicates.sh`](scripts/comment-on-duplicates.sh) From 1f7053da86bcfec97b6141a60a03d50b6a830d51 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 10 Apr 2026 15:32:25 +0800 Subject: [PATCH 3/7] Fix incorrect YAML test assertion and add datarows validation - Fix total count from 3 to 2 in YAML REST test (bin+chart with category produces 2 groups, not 3) - Add datarows assertions to both YAML test cases for row-level validation - Correct table statistics row count in unit test Signed-off-by: Heng Qian --- .../yamlRestTest/resources/rest-api-spec/test/issues/5174.yml | 4 +++- .../opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java | 2 +- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml index 90ff3846384..c2f861b8194 100644 --- a/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml +++ b/integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/5174.yml @@ -62,8 +62,9 @@ teardown: body: query: source=issue5174 | bin value span=50 as val_bin | chart count() over val_bin by category - - match: { total: 3 } + - match: { total: 2 } - match: { schema: [ { name: val_bin, type: string }, { name: category, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", "A", 2 ], [ "100-150", "B", 1 ] ] } --- "Issue 5174: bin then chart with single group and null values": @@ -79,3 +80,4 @@ teardown: - match: { total: 2 } - match: { schema: [ { name: val_bin, type: string }, { name: "count()", type: bigint } ] } + - match: { datarows: [ [ "0-50", 2 ], [ "100-150", 1 ] ] } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java index 5b16b9d656e..6d62dbb2cde 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLChartNullTest.java @@ -145,7 +145,7 @@ public RelDataType getRowType(RelDataTypeFactory typeFactory) { @Override public Statistic getStatistic() { - return Statistics.of(0d, ImmutableList.of(), RelCollations.createSingleton(0)); + return Statistics.of(4d, ImmutableList.of(), RelCollations.createSingleton(0)); } @Override From d41e598a726a44246025416df6c1d508bf878efc Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 10 Apr 2026 15:41:15 +0800 Subject: [PATCH 4/7] Update ppl-bugfix-reference: prefer datarows match over length check YAML REST tests should assert actual row content, not just count. Learned from #5174 where total-only assertion missed incorrect values. Signed-off-by: Heng Qian --- .claude/harness/ppl-bugfix-reference.md | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/.claude/harness/ppl-bugfix-reference.md b/.claude/harness/ppl-bugfix-reference.md index 615f91fb75f..9437ab88e1a 100644 --- a/.claude/harness/ppl-bugfix-reference.md +++ b/.claude/harness/ppl-bugfix-reference.md @@ -109,9 +109,13 @@ teardown: headers: { Content-Type: 'application/json' } ppl: { body: { query: "source=test_issue_ | " } } - match: { total: } - - length: { datarows: } + - match: { datarows: [ [ , ], [ , ] ] } ``` +> **Always include `datarows` assertions** — verifying only `total` and `schema` will miss +> wrong values. Count the expected output groups carefully (e.g., for `chart ... by `, +> count distinct (row_split, col_split) groups after null filtering, not the number of input rows). + --- ## Symptom → Fix Path From ccb3283b0a39bfae56b9ce7357d5d41493d1bced Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Fri, 10 Apr 2026 16:01:41 +0800 Subject: [PATCH 5/7] Improve harness: worktree tracking, completion gate, pwd reporting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add pwd/branch reporting at harness start so caller knows the working directory - Add Completion Gate with git status check to prevent uncommitted changes from being left behind - Skill Step 2B reuses existing worktree if PR branch is already checked out, instead of always creating a new one - Step 3 requires main agent to record worktree→PR mapping for directing future operations to the correct directory Signed-off-by: Heng Qian --- .claude/commands/ppl-bugfix.md | 37 ++++++++++++++++++++++++++ .claude/harness/ppl-bugfix-followup.md | 24 ++++++++++++++++- .claude/harness/ppl-bugfix-harness.md | 18 +++++++++++++ 3 files changed, 78 insertions(+), 1 deletion(-) diff --git a/.claude/commands/ppl-bugfix.md b/.claude/commands/ppl-bugfix.md index e12cefce9c8..f2ac6d6807d 100644 --- a/.claude/commands/ppl-bugfix.md +++ b/.claude/commands/ppl-bugfix.md @@ -83,6 +83,33 @@ Agent( ### 2B: Follow-up +Before dispatching, check if an existing worktree already has the PR branch checked out: + +```bash +# List worktrees and find one on the PR branch +for wt in .claude/worktrees/agent-*/; do + branch=$(git -C "$wt" branch --show-current 2>/dev/null) + if [ "$branch" = "" ]; then + echo "REUSE: $wt (branch: $branch)" + fi +done +``` + +**If existing worktree found**: Do NOT use `isolation: "worktree"`. Pass the worktree path in the prompt so the subagent works there directly. + +``` +Agent( + mode: "", + name: "bugfix-", + description: "PPL bugfix # followup", + prompt: "cd first, then read .claude/harness/ppl-bugfix-followup.md and follow it. + PR: (), Issue: # + Working directory: " +) +``` + +**If no existing worktree**: Create a new one. + ``` Agent( mode: "", @@ -100,6 +127,16 @@ After all subagents complete, report a summary for each: - Classification, fix summary, PR URL, worktree path and branch, items needing human attention (2A) - What was addressed, current PR state, whether another round is needed (2B) +**Always include the worktree→PR mapping** from the subagent's output, e.g.: + +``` +Worktree: /path/to/.claude/worktrees/agent-xxxx +Branch: bugfix-1234 +PR: #5678 +``` + +**Important**: After reporting, the main agent must remember this mapping. When the user later asks to make changes to the PR (e.g., "commit this to PR #5678"), operate in the worktree directory — not the main session directory. + ## Subagent Lifecycle Subagents are task-scoped. They complete and release context — they cannot poll for events. diff --git a/.claude/harness/ppl-bugfix-followup.md b/.claude/harness/ppl-bugfix-followup.md index b9eb18d6296..4b31ab6029e 100644 --- a/.claude/harness/ppl-bugfix-followup.md +++ b/.claude/harness/ppl-bugfix-followup.md @@ -6,9 +6,18 @@ --- +## Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + ## Reconstruct Context -The follow-up agent runs in a fresh worktree. First checkout the PR branch, then load state: +First checkout the PR branch, then load state: ```bash # Checkout the PR branch in this worktree @@ -101,3 +110,16 @@ For each comment addressed (bot or human): - **Did the follow-up workflow itself miss this signal?** → Update this file If any improvement is needed, make the edit and include it in the same commit. + +## Completion Gate + +Before reporting "done": + +1. Run `git status --porcelain` — if any uncommitted changes remain, commit and push them. This includes harness edits from Retrospective. +2. Report in your final output: + +``` +Worktree: +Branch: +PR: +``` diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md index 5845385b550..0819d71fc5b 100644 --- a/.claude/harness/ppl-bugfix-harness.md +++ b/.claude/harness/ppl-bugfix-harness.md @@ -2,6 +2,15 @@ ## Phase 0: Triage +### 0.0 Report Working Directory + +```bash +echo "Worktree: $(pwd)" +echo "Branch: $(git branch --show-current)" +``` + +Include this in your output so the caller knows where changes are happening. + ### 0.1 Load & Reproduce ```bash @@ -126,6 +135,8 @@ EOF ## Completion Gate +Run `git status --porcelain` — if any uncommitted changes remain, commit and push them before proceeding. + Do NOT report "done" until every item below is checked. List each in your final report: - [ ] **Unit tests**: New test class or methods @@ -149,3 +160,10 @@ If any item is blocked, report which and why. - [ ] New pattern? Add to Case Index. Include harness improvements in the same PR. + +Report in your final output: +``` +Worktree: +Branch: +PR: +``` From b423fcff7c66a341919bafb96e8736c65a59e7e2 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 13 Apr 2026 10:42:49 +0800 Subject: [PATCH 6/7] Add CalciteBinChartNullIT to CalciteNoPushdownIT suite bin+chart with null values should also be tested with pushdown disabled. Signed-off-by: Heng Qian --- .../java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java | 1 + 1 file changed, 1 insertion(+) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java index a56715845ab..f9db71f9251 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/CalciteNoPushdownIT.java @@ -25,6 +25,7 @@ CalciteAddColTotalsCommandIT.class, CalciteConvertCommandIT.class, CalciteArrayFunctionIT.class, + CalciteBinChartNullIT.class, CalciteBinCommandIT.class, CalciteConvertTZFunctionIT.class, CalciteCsvFormatIT.class, From 2c288d3339482512cdc80c0771a6b883873d92d6 Mon Sep 17 00:00:00 2001 From: Heng Qian Date: Mon, 13 Apr 2026 10:51:18 +0800 Subject: [PATCH 7/7] Prefer adding tests to existing IT suites over creating new ones Note in harness Phase 2 that new IT classes should be added to CalciteNoPushdownIT suite. Signed-off-by: Heng Qian --- .claude/harness/ppl-bugfix-harness.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude/harness/ppl-bugfix-harness.md b/.claude/harness/ppl-bugfix-harness.md index 0819d71fc5b..54c2103fc9b 100644 --- a/.claude/harness/ppl-bugfix-harness.md +++ b/.claude/harness/ppl-bugfix-harness.md @@ -62,7 +62,7 @@ Consult `.claude/harness/ppl-bugfix-reference.md` for test templates. Required deliverables: - Failing test reproducing the bug (written BEFORE the fix) - Unit tests covering happy path and edge cases -- Integration test (`*IT.java` extending `CalcitePPLIT`) +- Integration test — add to an existing `*IT.java` when possible; if creating a new one, add it to `CalciteNoPushdownIT` - YAML REST test at `integ-test/src/yamlRestTest/resources/rest-api-spec/test/issues/.yml` ---