From 27322fdc5bdbcf9e7a1967aa464fc4dfb16925d8 Mon Sep 17 00:00:00 2001 From: Kai Huang Date: Mon, 8 Jun 2026 11:07:00 -0700 Subject: [PATCH] Branch percentile and sum-null IT expectations for the analytics-engine route CalcitePPLAggregationIT.testPercentile, testSumNull, and testSumGroupByNullValue hard-coded expectations from the Calcite DSL-pushdown path, so they failed when run through the analytics-engine (DataFusion) backend via -Dtests.analytics.parquet_indices=true: - percentile() is approximate. DataFusion's t-digest interpolation returns 46576 for percentile(balance, 90) where the OpenSearch/Calcite percentile_approx returns 48086 (p50 agrees). Both are valid approximations. - SUM over an all-null bucket is null per the SQL spec. The DSL-pushdown path returns 0 (a known quirk, #3408); DataFusion follows the spec like Calcite-no-pushdown and returns null. Branch the expected values on the existing isAnalyticsParquetIndicesEnabled() helper, matching the pattern already used in StatsCommandIT.testSumWithNull. No production code change; both engine paths now pass. Signed-off-by: Kai Huang --- .../remote/CalcitePPLAggregationIT.java | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java index ec80e27ba5a..bd4c68b85de 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalcitePPLAggregationIT.java @@ -980,7 +980,11 @@ public void testPercentile() throws IOException { "source=%s | stats percentile(balance, 50) as p50, percentile(balance, 90) as p90", TEST_INDEX_BANK)); verifySchema(actual, schema("p50", "bigint"), schema("p90", "bigint")); - verifyDataRows(actual, rows(32838, 48086)); + // percentile() is approximate. The analytics-engine backend (DataFusion) uses a different + // t-digest interpolation than the Calcite/OpenSearch percentile_approx implementation, so p90 + // lands on a different value (p50 agrees). Both are valid approximations. + int expectedP90 = isAnalyticsParquetIndicesEnabled() ? 46576 : 48086; + verifyDataRows(actual, rows(32838, expectedP90)); } @Test @@ -990,14 +994,18 @@ public void testSumGroupByNullValue() throws IOException { String.format( "source=%s | stats sum(balance) as a by age", TEST_INDEX_BANK_WITH_NULL_VALUES)); verifySchema(response, schema("a", null, "bigint"), schema("age", null, "int")); + // SUM of an all-null bucket is null per the SQL spec. The DSL-pushdown path returns 0 instead + // (a known pushdown quirk); the analytics-engine backend (DataFusion) follows the spec like + // Calcite-no-pushdown and returns null. See testSumNull and #3408. + Object emptySum = (isPushdownDisabled() || isAnalyticsParquetIndicesEnabled()) ? null : 0; verifyDataRows( response, - rows(isPushdownDisabled() ? null : 0, null), + rows(emptySum, null), rows(32838, 28), rows(39225, 32), rows(4180, 33), rows(48086, 34), - rows(isPushdownDisabled() ? null : 0, 36)); + rows(emptySum, 36)); } @Test @@ -1061,7 +1069,9 @@ public void testSumNull() throws IOException { + " ],\n" + " \"datarows\": [\n" + " [\n" - + (isPushdownDisabled() ? " null\n" : " 0\n") + + ((isPushdownDisabled() || isAnalyticsParquetIndicesEnabled()) + ? " null\n" + : " 0\n") + " ]\n" + " ],\n" + " \"total\": 1,\n"