From 536cc97dcc6bdd13ac2bc1a1931013b365174afd Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Thu, 25 Jun 2026 11:26:15 +0800 Subject: [PATCH 1/9] Preserve order for stream windows Make streamstats and trendline encode pipeline ordering explicitly in their Calcite RelNodes so DataFusion consumes order-sensitive window frames deterministically. Keep the change scoped to the command construction layer and add RelNode coverage for sorted streamstats and trendline windows. Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 85 ++++++++++++++++--- .../sql/calcite/utils/PlanUtils.java | 30 ++++--- .../calcite/CalcitePPLStreamstatsTest.java | 69 ++++++++++----- .../ppl/calcite/CalcitePPLTrendlineTest.java | 13 +++ 4 files changed, 153 insertions(+), 44 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index b07f308f91f..ea476d6d76c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -65,10 +65,14 @@ import org.apache.calcite.rex.RexBuilder; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexOver; +import org.apache.calcite.rex.RexShuttle; import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.rex.RexWindow; import org.apache.calcite.rex.RexWindowBounds; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.fun.SqlLibraryOperators; @@ -2274,6 +2278,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() + .orderBy(derivePipelineSortOrderKeys(context)) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); @@ -2290,22 +2295,28 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } - // Default: first get rawExpr - List overExpressions = - node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); - - if (hasGroup) { - // only build sequence when there is by condition + List inputOrderKeys = derivePipelineSortOrderKeys(context); + if (hasGroup || !inputOrderKeys.isEmpty()) { + // streamstats is order-sensitive. Materialize input order before any grouped window can + // repartition rows, then make each window frame walk that sequence explicitly. RexNode streamSeq = context .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() + .orderBy(inputOrderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); + int seqColIndex = context.relBuilder.peek().getRowType().getFieldCount() - 1; + + List overExpressions = + node.getWindowFunctionList().stream() + .map(w -> rexVisitor.analyze(w, context)) + .map(rex -> addStreamSeqOrder(rex, seqColIndex, context)) + .toList(); - if (!node.isBucketNullable()) { + if (hasGroup && !node.isBucketNullable()) { // construct groupNotNull predicate List groupByList = groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); @@ -2324,16 +2335,46 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) context.relBuilder.projectPlus(overExpressions); } - // resort when there is by condition context.relBuilder.sort(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); } else { + List overExpressions = + node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); context.relBuilder.projectPlus(overExpressions); } return context.relBuilder.peek(); } + private RexNode addStreamSeqOrder(RexNode rex, int seqColIndex, CalcitePlanContext context) { + RexInputRef seqRef = context.relBuilder.field(seqColIndex); + return rex.accept( + new RexShuttle() { + @Override + public RexNode visitOver(RexOver over) { + RexOver recursed = (RexOver) super.visitOver(over); + RexWindow window = recursed.getWindow(); + if (!window.orderKeys.isEmpty()) { + return recursed; + } + RexFieldCollation seqOrder = new RexFieldCollation(seqRef, Set.of()); + return context.rexBuilder.makeOver( + recursed.getType(), + recursed.getAggOperator(), + recursed.getOperands(), + window.partitionKeys, + ImmutableList.of(seqOrder), + window.getLowerBound(), + window.getUpperBound(), + window.isRows(), + true, + false, + recursed.isDistinct(), + recursed.ignoreNulls()); + } + }); + } + private List wrapWindowFunctionsWithGroupNotNull( List overExpressions, RexNode groupNotNull, CalcitePlanContext context) { List wrappedOverExprs = new ArrayList<>(overExpressions.size()); @@ -2647,6 +2688,7 @@ private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() + .orderBy(derivePipelineSortOrderKeys(context)) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(rowNum); @@ -2682,6 +2724,7 @@ private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow .aggregateCall( SqlStdOperatorTable.SUM, context.relBuilder.field("__reset_before_flag__")) .over() + .orderBy(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)) .rowsTo(RexWindowBounds.CURRENT_ROW) .toRex(); RexNode sumAfterPrev = @@ -2690,6 +2733,7 @@ private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow .aggregateCall( SqlStdOperatorTable.SUM, context.relBuilder.field("__reset_after_flag__")) .over() + .orderBy(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)) .rowsBetween( RexWindowBounds.UNBOUNDED_PRECEDING, RexWindowBounds.preceding(context.relBuilder.literal(1))) @@ -2884,6 +2928,21 @@ private static List deriveCollationOrderKeys(CalcitePlanContext context return orderKeys; } + /** Window {@code ORDER BY} keys only when the current pipeline contains an explicit sort. */ + private static List derivePipelineSortOrderKeys(CalcitePlanContext context) { + return hasSortInInput(context.relBuilder.peek()) + ? deriveCollationOrderKeys(context) + : List.of(); + } + + private static boolean hasSortInInput(RelNode rel) { + if (rel instanceof Sort) { + return true; + } + List inputs = rel.getInputs(); + return inputs.size() == 1 && hasSortInInput(inputs.getFirst()); + } + @Override public RelNode visitAppendCol(AppendCol node, CalcitePlanContext context) { // 1. resolve main plan @@ -3927,6 +3986,8 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { } }); + List trendlineOrderKeys = derivePipelineSortOrderKeys(context); + List trendlineNodes = new ArrayList<>(); List aliases = new ArrayList<>(); node.getComputations() @@ -3948,7 +4009,7 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { null, List.of(), List.of(), - List.of(), + trendlineOrderKeys, windowFrame); // CASE WHEN count() over (ROWS (windowSize-1) PRECEDING) > windowSize - 1 RexNode whenConditionExpr = @@ -3969,7 +4030,7 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { field, List.of(), List.of(), - List.of(), + trendlineOrderKeys, windowFrame); break; case TrendlineType.WMA: @@ -3979,6 +4040,7 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { field, trendlineComputation.getNumberOfDataPoints(), windowFrame, + trendlineOrderKeys, context); break; default: @@ -4006,6 +4068,7 @@ private RexNode buildWmaRexNode( RexNode field, Integer numberOfDataPoints, WindowFrame windowFrame, + List orderKeys, CalcitePlanContext context) { // Divisor: 1 + 2 + 3 + ... + windowSize, aka (windowSize * (windowSize + 1) / 2) @@ -4022,7 +4085,7 @@ private RexNode buildWmaRexNode( field, List.of(context.relBuilder.literal(i)), List.of(), - List.of(), + orderKeys, windowFrame); divider = context.relBuilder.call( diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index f899f747421..a7f53569c66 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -201,9 +201,9 @@ static RexNode makeOver( // sum(x) / count(x) return context.relBuilder.call( SqlStdOperatorTable.DIVIDE, - sumOver(context, field, partitions, rows, lowerBound, upperBound), + sumOver(context, field, partitions, orderKeys, rows, lowerBound, upperBound), context.relBuilder.cast( - countOver(context, field, partitions, rows, lowerBound, upperBound), + countOver(context, field, partitions, orderKeys, rows, lowerBound, upperBound), SqlTypeName.DOUBLE)); // stddev_pop(x) ==> // power((sum(x * x) - sum(x) * sum(x) / count(x)) / count(x), 0.5) @@ -217,13 +217,17 @@ static RexNode makeOver( // var_samp(x) ==> // (sum(x * x) - sum(x) * sum(x) / count(x)) / (count(x) - 1) case STDDEV_POP: - return variance(context, field, partitions, rows, lowerBound, upperBound, true, true); + return variance( + context, field, partitions, orderKeys, rows, lowerBound, upperBound, true, true); case STDDEV_SAMP: - return variance(context, field, partitions, rows, lowerBound, upperBound, false, true); + return variance( + context, field, partitions, orderKeys, rows, lowerBound, upperBound, false, true); case VARPOP: - return variance(context, field, partitions, rows, lowerBound, upperBound, true, false); + return variance( + context, field, partitions, orderKeys, rows, lowerBound, upperBound, true, false); case VARSAMP: - return variance(context, field, partitions, rows, lowerBound, upperBound, false, false); + return variance( + context, field, partitions, orderKeys, rows, lowerBound, upperBound, false, false); case ROW_NUMBER: return withOver( context.relBuilder.aggregateCall(SqlStdOperatorTable.ROW_NUMBER), @@ -255,24 +259,26 @@ private static RexNode sumOver( CalcitePlanContext ctx, RexNode operation, List partitions, + List orderKeys, boolean rows, RexWindowBound lowerBound, RexWindowBound upperBound) { return withOver( - ctx.relBuilder.sum(operation), partitions, List.of(), rows, lowerBound, upperBound); + ctx.relBuilder.sum(operation), partitions, orderKeys, rows, lowerBound, upperBound); } private static RexNode countOver( CalcitePlanContext ctx, RexNode operation, List partitions, + List orderKeys, boolean rows, RexWindowBound lowerBound, RexWindowBound upperBound) { return withOver( ctx.relBuilder.count(ImmutableList.of(operation)), partitions, - List.of(), + orderKeys, rows, lowerBound, upperBound); @@ -301,16 +307,18 @@ private static RexNode variance( CalcitePlanContext ctx, RexNode operator, List partitions, + List orderKeys, boolean rows, RexWindowBound lowerBound, RexWindowBound upperBound, boolean biased, boolean sqrt) { RexNode argSquared = ctx.relBuilder.call(SqlStdOperatorTable.MULTIPLY, operator, operator); - RexNode sumArgSquared = sumOver(ctx, argSquared, partitions, rows, lowerBound, upperBound); - RexNode sum = sumOver(ctx, operator, partitions, rows, lowerBound, upperBound); + RexNode sumArgSquared = + sumOver(ctx, argSquared, partitions, orderKeys, rows, lowerBound, upperBound); + RexNode sum = sumOver(ctx, operator, partitions, orderKeys, rows, lowerBound, upperBound); RexNode sumSquared = ctx.relBuilder.call(SqlStdOperatorTable.MULTIPLY, sum, sum); - RexNode count = countOver(ctx, operator, partitions, rows, lowerBound, upperBound); + RexNode count = countOver(ctx, operator, partitions, orderKeys, rows, lowerBound, upperBound); RexNode countCast = ctx.relBuilder.cast(count, SqlTypeName.DOUBLE); RexNode avgSumSquared = ctx.relBuilder.call(SqlStdOperatorTable.DIVIDE, sumSquared, countCast); RexNode diff = ctx.relBuilder.call(SqlStdOperatorTable.MINUS, sumArgSquared, avgSumSquared); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 2e4b6a605dd..8b80f18bf6d 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -29,7 +29,7 @@ public void testStreamstatsBy() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -37,8 +37,8 @@ public void testStreamstatsBy() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)" - + " `max(SAL)`\n" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS BETWEEN" + + " UNBOUNDED PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -56,8 +56,8 @@ public void testStreamstatsByNullBucket() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" - + " NULL($7), MAX($5) OVER (PARTITION BY $7 ROWS UNBOUNDED PRECEDING), null:DECIMAL(7," - + " 2))])\n" + + " NULL($7), MAX($5) OVER (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)," + + " null:DECIMAL(7, 2))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -65,8 +65,9 @@ public void testStreamstatsByNullBucket() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" - + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN" - + " UNBOUNDED PRECEDING AND CURRENT ROW) ELSE NULL END `max(SAL)`\n" + + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY" + + " `__stream_seq__` NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ELSE" + + " NULL END `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -74,6 +75,26 @@ public void testStreamstatsByNullBucket() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testStreamstatsByAfterSortOrdersWindowBySequence() { + String ppl = "source=EMP | sort - SAL | streamstats max(SAL) by DEPTNO"; + RelNode root = getRelNode(ppl); + + String plan = root.explain(); + assertTrue(plan.contains("__stream_seq__=[ROW_NUMBER() OVER (ORDER BY $5 DESC NULLS LAST)]")); + assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $8")); + } + + @Test + public void testStreamstatsAfterSortOrdersWindowBySequence() { + String ppl = "source=EMP | sort - SAL | streamstats max(SAL)"; + RelNode root = getRelNode(ppl); + + String plan = root.explain(); + assertTrue(plan.contains("__stream_seq__=[ROW_NUMBER() OVER (ORDER BY $5 DESC NULLS LAST)]")); + assertTrue(plan.contains("max(SAL)=[MAX($5) OVER (ORDER BY $8 ROWS UNBOUNDED PRECEDING)]")); + } + @Test public void testStreamstatsCurrent() { String ppl = "source=EMP | streamstats current = false max(SAL)"; @@ -123,7 +144,7 @@ public void testStreamstatsGlobal() { + " LogicalSort(sort0=[$8], dir0=[ASC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS 4 PRECEDING)])\n" + + " (PARTITION BY $7 ORDER BY $8 ROWS 4 PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -131,7 +152,8 @@ public void testStreamstatsGlobal() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ROWS BETWEEN 4 PRECEDING AND CURRENT ROW) `max(SAL)`\n" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS BETWEEN 4" + + " PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" @@ -152,8 +174,9 @@ public void testStreamstatsReset() { + " 11}])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], __reset_before_flag__=[$9]," - + " __reset_after_flag__=[$10], __seg_id__=[+(SUM($9) OVER (ROWS UNBOUNDED PRECEDING)," - + " COALESCE(SUM($10) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))])\n" + + " __reset_after_flag__=[$10], __seg_id__=[+(SUM($9) OVER (ORDER BY $8 ROWS" + + " UNBOUNDED PRECEDING), COALESCE(SUM($10) OVER (ORDER BY $8 ROWS BETWEEN UNBOUNDED" + + " PRECEDING AND 1 PRECEDING), 0))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()]," + " __reset_before_flag__=[CASE(>($5, 100), 1, 0)], __reset_after_flag__=[CASE(<($5," @@ -167,8 +190,8 @@ public void testStreamstatsReset() { + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8]," + " __reset_before_flag__=[$9], __reset_after_flag__=[$10], __seg_id__=[+(SUM($9) OVER" - + " (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($10) OVER (ROWS BETWEEN UNBOUNDED" - + " PRECEDING AND 1 PRECEDING), 0))])\n" + + " (ORDER BY $8 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($10) OVER (ORDER BY $8 ROWS" + + " BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER" + " ()], __reset_before_flag__=[CASE(>($5, 100), 1, 0)]," @@ -181,9 +204,10 @@ public void testStreamstatsReset() { + " `$cor0`.`SAL`, `$cor0`.`COMM`, `$cor0`.`DEPTNO`, `t5`.`avg(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " `__stream_seq__`, `__reset_before_flag__`, `__reset_after_flag__`," - + " (SUM(`__reset_before_flag__`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT" - + " ROW)) + COALESCE(SUM(`__reset_after_flag__`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING" - + " AND 1 PRECEDING), 0) `__seg_id__`\n" + + " (SUM(`__reset_before_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST ROWS" + + " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) +" + + " COALESCE(SUM(`__reset_after_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST" + + " ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) `__seg_id__`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`, CASE WHEN `SAL` > 100 THEN 1 ELSE 0 END" + " `__reset_before_flag__`, CASE WHEN `SAL` < 50 THEN 1 ELSE 0 END" @@ -192,9 +216,10 @@ public void testStreamstatsReset() { + "LATERAL (SELECT AVG(`SAL`) `avg(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " `__stream_seq__`, `__reset_before_flag__`, `__reset_after_flag__`," - + " (SUM(`__reset_before_flag__`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT" - + " ROW)) + COALESCE(SUM(`__reset_after_flag__`) OVER (ROWS BETWEEN UNBOUNDED PRECEDING" - + " AND 1 PRECEDING), 0) `__seg_id__`\n" + + " (SUM(`__reset_before_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST ROWS" + + " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW)) +" + + " COALESCE(SUM(`__reset_after_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST" + + " ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) `__seg_id__`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`, CASE WHEN `SAL` > 100 THEN 1 ELSE 0 END" + " `__reset_before_flag__`, CASE WHEN `SAL` < 50 THEN 1 ELSE 0 END" @@ -234,7 +259,7 @@ public void testStreamstatsWithReverse() { + " LogicalSort(sort0=[$8], dir0=[DESC])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ROWS UNBOUNDED PRECEDING)])\n" + + " (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; @@ -242,8 +267,8 @@ public void testStreamstatsWithReverse() { String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ROWS BETWEEN UNBOUNDED" - + " PRECEDING AND CURRENT ROW) `max(SAL)`\n" + + " MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS" + + " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) `max(SAL)`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," + " ROW_NUMBER() OVER () `__stream_seq__`\n" + "FROM `scott`.`EMP`) `t`\n" diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java index 3c23af4b7a6..c9ffbdfccc9 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java @@ -5,6 +5,8 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertTrue; + import org.apache.calcite.rel.RelNode; import org.apache.calcite.test.CalciteAssert.SchemaSpec; import org.junit.Test; @@ -62,6 +64,17 @@ public void testTrendlineWma() { verifyPPLToSparkSQL(root, expectedSparkSql); } + @Test + public void testTrendlineWithSortOrdersWindowFrame() { + String ppl = "source=EMP | trendline sort - SAL sma(2, SAL) | fields SAL, SAL_trendline"; + RelNode root = getRelNode(ppl); + + String plan = root.explain(); + assertTrue(plan.contains("LogicalSort(sort0=[$5], dir0=[DESC])")); + assertTrue(plan.contains("COUNT() OVER (ORDER BY $5 DESC")); + assertTrue(plan.contains("SUM($5) OVER (ORDER BY $5 DESC")); + } + @Test public void testTrendlineMultipleFields() { String ppl = From f28f7bdd384d11ce96cf211d6bd1caf67472dc9b Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Thu, 25 Jun 2026 14:24:04 +0800 Subject: [PATCH 2/9] Re-enable sorted streamstats on analytics The RelNode ordering fix makes sort followed by streamstats deterministic on the analytics-engine route, so remove the temporary STREAMSTATS_SORT_NOT_HONORED capability gate and Gradle exclude. Signed-off-by: Songkan Tang --- integ-test/build.gradle | 13 +++++-------- .../remote/CalciteStreamstatsCommandIT.java | 2 -- .../java/org/opensearch/sql/util/Capability.java | 15 +-------------- 3 files changed, 6 insertions(+), 24 deletions(-) diff --git a/integ-test/build.gradle b/integ-test/build.gradle index 17b88a901de..fda656873f8 100644 --- a/integ-test/build.gradle +++ b/integ-test/build.gradle @@ -1329,19 +1329,16 @@ task integTestRemote(type: RestIntegTestTask) { // === Excludes: asserts a Lucene pushdown fragment absent on the AE route === excludeTestsMatching '*CalciteSortCommandIT.testPushdownSortCastToDoubleExpression' - // === Excludes: CalciteStreamstatsCommandIT route divergences === + // === Excludes: chained CalciteStreamstatsCommandIT route divergences === // Each test also carries an in-test @RequiresCapability(...) recording the reason. - // - CHAINED_STREAMSTATS_BY: chaining two streamstats where an upstream stage has `by` - // emits two ROW_NUMBER() sequence columns the Substrait converter names identically, - // so the stacked schema has a duplicate/ambiguous field name (500) or, for chained - // window streamstats, non-deterministic values. Fails single- and multi-shard. + // Chaining two streamstats where an upstream stage has `by` emits two ROW_NUMBER() + // sequence columns the Substrait converter names identically, so the stacked schema has + // a duplicate/ambiguous field name (500) or, for chained window streamstats, + // non-deterministic values. Fails single- and multi-shard. excludeTestsMatching '*CalciteStreamstatsCommandIT.testMultipleStreamstats' excludeTestsMatching '*CalciteStreamstatsCommandIT.testMultipleStreamstatsWithWindow' excludeTestsMatching '*CalciteStreamstatsCommandIT.testMultipleStreamstatsWithNull1' excludeTestsMatching '*CalciteStreamstatsCommandIT.testMultipleStreamstatsWithEval' - // - STREAMSTATS_SORT_NOT_HONORED: streamstats computes its window over the backend scan - // order, ignoring a preceding `| sort` (the OVER clause has no explicit ORDER BY). - excludeTestsMatching '*CalciteStreamstatsCommandIT.testStreamstatsAndSort' } } diff --git a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java index e70812e3c3b..5ab2ded6560 100644 --- a/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java +++ b/integ-test/src/test/java/org/opensearch/sql/calcite/remote/CalciteStreamstatsCommandIT.java @@ -8,7 +8,6 @@ import static org.opensearch.sql.legacy.TestsConstants.*; import static org.opensearch.sql.util.Capability.CHAINED_STREAMSTATS_BY; import static org.opensearch.sql.util.Capability.DOC_MUTATION; -import static org.opensearch.sql.util.Capability.STREAMSTATS_SORT_NOT_HONORED; import static org.opensearch.sql.util.MatcherUtils.*; import java.io.IOException; @@ -1017,7 +1016,6 @@ public void testStreamstatsAndEventstats() throws IOException { } @Test - @RequiresCapability(STREAMSTATS_SORT_NOT_HONORED) public void testStreamstatsAndSort() throws IOException { JSONObject actual = executeQuery( diff --git a/integ-test/src/test/java/org/opensearch/sql/util/Capability.java b/integ-test/src/test/java/org/opensearch/sql/util/Capability.java index 3bac06e7fca..86f8b8fc369 100644 --- a/integ-test/src/test/java/org/opensearch/sql/util/Capability.java +++ b/integ-test/src/test/java/org/opensearch/sql/util/Capability.java @@ -486,20 +486,7 @@ public enum Capability { "Chaining two streamstats where an upstream stage partitions by a group fails on the" + " analytics-engine route: both stages emit a ROW_NUMBER() sequence column the Substrait" + " converter names identically, producing a duplicate/ambiguous field name (500) or" - + " non-deterministic window values."), - - /** - * {@code streamstats} computes its running/window aggregate over the backend scan order on the - * analytics-engine route, ignoring a preceding {@code | sort}. The {@code OVER} clause carries no - * explicit {@code ORDER BY} (streamstats orders by encounter order by design), so DataFusion - * evaluates the window in scan order rather than the sorted order the v2/Calcite path honors. - * Verified: {@code sort age | streamstats window=2 avg(age)} yields window values computed in - * insertion order, not age order, so the per-row aggregates diverge. - */ - STREAMSTATS_SORT_NOT_HONORED( - "streamstats computes its window over the backend scan order on the analytics-engine route," - + " ignoring a preceding | sort (the OVER clause has no explicit ORDER BY), so the window" - + " values diverge from the v2/Calcite path which honors the sort."); + + " non-deterministic window values."); private final String reason; From addecab7d6e0b20cf1141f8a4ffb537665e7cc18 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Thu, 25 Jun 2026 17:48:51 +0800 Subject: [PATCH 3/9] Use input collation for stream windows Avoid materializing __stream_seq__ when the input subtree already advertises a collation. In that case streamstats windows order directly by the input collation and reserve __stream_seq__ for grouped windows without an order contract. Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 105 +++++++++++++----- .../calcite/CalcitePPLStreamstatsTest.java | 12 +- 2 files changed, 86 insertions(+), 31 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index dfdb6ba54c5..153680d0ac0 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -35,6 +35,7 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; +import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; @@ -2279,7 +2280,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() - .orderBy(derivePipelineSortOrderKeys(context)) + .orderBy(deriveCollationOrderKeys(context)) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); @@ -2296,8 +2297,31 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } - List inputOrderKeys = derivePipelineSortOrderKeys(context); - if (hasGroup || !inputOrderKeys.isEmpty()) { + List inputOrderKeys = deriveCollationOrderKeys(context); + if (!inputOrderKeys.isEmpty()) { + List overExpressions = + node.getWindowFunctionList().stream() + .map(w -> rexVisitor.analyze(w, context)) + .map(rex -> addWindowOrder(rex, inputOrderKeys, context)) + .toList(); + + if (hasGroup && !node.isBucketNullable()) { + List groupByList = + groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + List notNullList = + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList(); + RexNode groupNotNull = context.relBuilder.and(notNullList); + context.relBuilder.projectPlus( + wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context)); + } else { + context.relBuilder.projectPlus(overExpressions); + } + + context.relBuilder.sort(inputOrderKeys); + } else if (hasGroup) { // streamstats is order-sensitive. Materialize input order before any grouped window can // repartition rows, then make each window frame walk that sequence explicitly. RexNode streamSeq = @@ -2314,7 +2338,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) List overExpressions = node.getWindowFunctionList().stream() .map(w -> rexVisitor.analyze(w, context)) - .map(rex -> addStreamSeqOrder(rex, seqColIndex, context)) + .map(rex -> addWindowOrder(rex, List.of(context.relBuilder.field(seqColIndex)), context)) .toList(); if (hasGroup && !node.isBucketNullable()) { @@ -2347,8 +2371,14 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) return context.relBuilder.peek(); } - private RexNode addStreamSeqOrder(RexNode rex, int seqColIndex, CalcitePlanContext context) { - RexInputRef seqRef = context.relBuilder.field(seqColIndex); + private RexNode addWindowOrder( + RexNode rex, List orderKeys, CalcitePlanContext context) { + if (orderKeys.isEmpty()) { + return rex; + } + ImmutableList.Builder orderCollationBuilder = ImmutableList.builder(); + orderKeys.forEach(key -> orderCollationBuilder.add(toRexFieldCollation(key))); + ImmutableList orderCollations = orderCollationBuilder.build(); return rex.accept( new RexShuttle() { @Override @@ -2358,13 +2388,12 @@ public RexNode visitOver(RexOver over) { if (!window.orderKeys.isEmpty()) { return recursed; } - RexFieldCollation seqOrder = new RexFieldCollation(seqRef, Set.of()); return context.rexBuilder.makeOver( recursed.getType(), recursed.getAggOperator(), recursed.getOperands(), window.partitionKeys, - ImmutableList.of(seqOrder), + orderCollations, window.getLowerBound(), window.getUpperBound(), window.isRows(), @@ -2376,6 +2405,47 @@ public RexNode visitOver(RexOver over) { }); } + private static RexFieldCollation toRexFieldCollation(RexNode node) { + return toRexFieldCollation( + node, + RelFieldCollation.Direction.ASCENDING, + RelFieldCollation.NullDirection.UNSPECIFIED); + } + + private static RexFieldCollation toRexFieldCollation( + RexNode node, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + switch (node.getKind()) { + case DESCENDING: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + RelFieldCollation.Direction.DESCENDING, + nullDirection); + case NULLS_FIRST: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + direction, + RelFieldCollation.NullDirection.FIRST); + case NULLS_LAST: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + direction, + RelFieldCollation.NullDirection.LAST); + default: + Set flags = EnumSet.noneOf(SqlKind.class); + if (direction == RelFieldCollation.Direction.DESCENDING) { + flags.add(SqlKind.DESCENDING); + } + if (nullDirection == RelFieldCollation.NullDirection.FIRST) { + flags.add(SqlKind.NULLS_FIRST); + } else if (nullDirection == RelFieldCollation.NullDirection.LAST) { + flags.add(SqlKind.NULLS_LAST); + } + return new RexFieldCollation(node, flags); + } + } + private List wrapWindowFunctionsWithGroupNotNull( List overExpressions, RexNode groupNotNull, CalcitePlanContext context) { List wrappedOverExprs = new ArrayList<>(overExpressions.size()); @@ -2689,7 +2759,7 @@ private RelNode buildResetHelperColumns(CalcitePlanContext context, StreamWindow .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() - .orderBy(derivePipelineSortOrderKeys(context)) + .orderBy(deriveCollationOrderKeys(context)) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(rowNum); @@ -2929,21 +2999,6 @@ private static List deriveCollationOrderKeys(CalcitePlanContext context return orderKeys; } - /** Window {@code ORDER BY} keys only when the current pipeline contains an explicit sort. */ - private static List derivePipelineSortOrderKeys(CalcitePlanContext context) { - return hasSortInInput(context.relBuilder.peek()) - ? deriveCollationOrderKeys(context) - : List.of(); - } - - private static boolean hasSortInInput(RelNode rel) { - if (rel instanceof Sort) { - return true; - } - List inputs = rel.getInputs(); - return inputs.size() == 1 && hasSortInInput(inputs.getFirst()); - } - @Override public RelNode visitAppendCol(AppendCol node, CalcitePlanContext context) { // 1. resolve main plan @@ -3987,7 +4042,7 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { } }); - List trendlineOrderKeys = derivePipelineSortOrderKeys(context); + List trendlineOrderKeys = deriveCollationOrderKeys(context); List trendlineNodes = new ArrayList<>(); List aliases = new ArrayList<>(); diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 8b80f18bf6d..e6459200f7e 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -76,23 +76,23 @@ public void testStreamstatsByNullBucket() { } @Test - public void testStreamstatsByAfterSortOrdersWindowBySequence() { + public void testStreamstatsByAfterSortOrdersWindowByCollation() { String ppl = "source=EMP | sort - SAL | streamstats max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); String plan = root.explain(); - assertTrue(plan.contains("__stream_seq__=[ROW_NUMBER() OVER (ORDER BY $5 DESC NULLS LAST)]")); - assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $8")); + assertFalse(plan.contains("__stream_seq__")); + assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $5 DESC NULLS LAST")); } @Test - public void testStreamstatsAfterSortOrdersWindowBySequence() { + public void testStreamstatsAfterSortOrdersWindowByCollation() { String ppl = "source=EMP | sort - SAL | streamstats max(SAL)"; RelNode root = getRelNode(ppl); String plan = root.explain(); - assertTrue(plan.contains("__stream_seq__=[ROW_NUMBER() OVER (ORDER BY $5 DESC NULLS LAST)]")); - assertTrue(plan.contains("max(SAL)=[MAX($5) OVER (ORDER BY $8 ROWS UNBOUNDED PRECEDING)]")); + assertFalse(plan.contains("__stream_seq__")); + assertTrue(plan.contains("max(SAL)=[MAX($5) OVER (ORDER BY $5 DESC NULLS LAST")); } @Test From 164b48839297127d992cd6215c7b08c11a9f59ea Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 11:26:56 +0800 Subject: [PATCH 4/9] Simplify stream window ordering Use advertised input collation directly for ordered stream windows, keep __stream_seq__ only for grouped fallback without an order contract, and share bucket-nullability projection logic across both branches. Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 68 ++++----- .../calcite/CalcitePPLStreamstatsTest.java | 130 ++++++++---------- .../ppl/calcite/CalcitePPLTrendlineTest.java | 62 +++++---- 3 files changed, 120 insertions(+), 140 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 153680d0ac0..ba06bb68e25 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2305,20 +2305,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .map(rex -> addWindowOrder(rex, inputOrderKeys, context)) .toList(); - if (hasGroup && !node.isBucketNullable()) { - List groupByList = - groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); - List notNullList = - PlanUtils.getSelectColumns(groupByList).stream() - .map(context.relBuilder::field) - .map(context.relBuilder::isNotNull) - .toList(); - RexNode groupNotNull = context.relBuilder.and(notNullList); - context.relBuilder.projectPlus( - wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context)); - } else { - context.relBuilder.projectPlus(overExpressions); - } + projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); context.relBuilder.sort(inputOrderKeys); } else if (hasGroup) { @@ -2329,7 +2316,6 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .relBuilder .aggregateCall(SqlStdOperatorTable.ROW_NUMBER) .over() - .orderBy(inputOrderKeys) .rowsTo(RexWindowBounds.CURRENT_ROW) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); @@ -2338,27 +2324,12 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) List overExpressions = node.getWindowFunctionList().stream() .map(w -> rexVisitor.analyze(w, context)) - .map(rex -> addWindowOrder(rex, List.of(context.relBuilder.field(seqColIndex)), context)) + .map( + rex -> + addWindowOrder(rex, List.of(context.relBuilder.field(seqColIndex)), context)) .toList(); - if (hasGroup && !node.isBucketNullable()) { - // construct groupNotNull predicate - List groupByList = - groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); - List notNullList = - PlanUtils.getSelectColumns(groupByList).stream() - .map(context.relBuilder::field) - .map(context.relBuilder::isNotNull) - .toList(); - RexNode groupNotNull = context.relBuilder.and(notNullList); - - // wrap each expr: CASE WHEN groupNotNull THEN rawExpr ELSE CAST(NULL AS rawType) END - List wrappedOverExprs = - wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context); - context.relBuilder.projectPlus(wrappedOverExprs); - } else { - context.relBuilder.projectPlus(overExpressions); - } + projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); context.relBuilder.sort(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); @@ -2371,8 +2342,29 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) return context.relBuilder.peek(); } - private RexNode addWindowOrder( - RexNode rex, List orderKeys, CalcitePlanContext context) { + private void projectStreamWindowExpressions( + List overExpressions, + boolean hasGroup, + List groupList, + StreamWindow node, + CalcitePlanContext context) { + if (hasGroup && !node.isBucketNullable()) { + List groupByList = + groupList.stream().map(expr -> rexVisitor.analyze(expr, context)).toList(); + List notNullList = + PlanUtils.getSelectColumns(groupByList).stream() + .map(context.relBuilder::field) + .map(context.relBuilder::isNotNull) + .toList(); + RexNode groupNotNull = context.relBuilder.and(notNullList); + context.relBuilder.projectPlus( + wrapWindowFunctionsWithGroupNotNull(overExpressions, groupNotNull, context)); + } else { + context.relBuilder.projectPlus(overExpressions); + } + } + + private RexNode addWindowOrder(RexNode rex, List orderKeys, CalcitePlanContext context) { if (orderKeys.isEmpty()) { return rex; } @@ -2407,9 +2399,7 @@ public RexNode visitOver(RexOver over) { private static RexFieldCollation toRexFieldCollation(RexNode node) { return toRexFieldCollation( - node, - RelFieldCollation.Direction.ASCENDING, - RelFieldCollation.NullDirection.UNSPECIFIED); + node, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.UNSPECIFIED); } private static RexFieldCollation toRexFieldCollation( diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index e6459200f7e..875eb9ec0dc 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -24,25 +24,19 @@ public void testStreamstatsBy() { String ppl = "source=EMP | streamstats max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" - + " LogicalSort(sort0=[$8], dir0=[ASC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (PARTITION BY $7 ORDER BY" + + " $0 NULLS LAST ROWS UNBOUNDED PRECEDING)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS BETWEEN" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN" + " UNBOUNDED PRECEDING AND CURRENT ROW) `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t`\n" - + "ORDER BY `__stream_seq__` NULLS LAST"; + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -51,27 +45,21 @@ public void testStreamstatsByNullBucket() { String ppl = "source=EMP | streamstats bucket_nullable=false max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" - + " LogicalSort(sort0=[$8], dir0=[ASC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[CASE(IS NOT" - + " NULL($7), MAX($5) OVER (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)," - + " null:DECIMAL(7, 2))])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], max(SAL)=[CASE(IS NOT NULL($7), MAX($5) OVER" + + " (PARTITION BY $7 ORDER BY $0 NULLS LAST ROWS UNBOUNDED PRECEDING), null:DECIMAL(7," + + " 2))])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, CASE WHEN" - + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY" - + " `__stream_seq__` NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ELSE" + + " `DEPTNO` IS NOT NULL THEN MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO`" + + " NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) ELSE" + " NULL END `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t`\n" - + "ORDER BY `__stream_seq__` NULLS LAST"; + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -100,16 +88,19 @@ public void testStreamstatsCurrent() { String ppl = "source=EMP | streamstats current = false max(SAL)"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (ROWS BETWEEN UNBOUNDED PRECEDING" - + " AND 1 PRECEDING)])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (ORDER BY $0 NULLS LAST" + + " ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) `max(SAL)`\n" - + "FROM `scott`.`EMP`"; + + " OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING)" + + " `max(SAL)`\n" + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -126,10 +117,11 @@ public void testStreamstatsWindow() { + " LogicalJoin(condition=[AND(>=($9, -($8, 4)), <=($9, $8), IS NOT DISTINCT" + " FROM($7, $10))], joinType=[left])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER (ORDER BY $0" + + " NULLS LAST)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" - + " LogicalProject(__r_seq__=[ROW_NUMBER() OVER ()], __r_DEPTNO__=[$7]," - + " __r_SAL__=[$5])\n" + + " LogicalProject(__r_seq__=[ROW_NUMBER() OVER (ORDER BY $0 NULLS LAST)]," + + " __r_DEPTNO__=[$7], __r_SAL__=[$5])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); } @@ -139,25 +131,19 @@ public void testStreamstatsGlobal() { String ppl = "source=EMP | streamstats window = 5 global= false max(SAL) by DEPTNO"; RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" - + " LogicalSort(sort0=[$8], dir0=[ASC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ORDER BY $8 ROWS 4 PRECEDING)])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[ASC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (PARTITION BY $7 ORDER BY" + + " $0 NULLS LAST ROWS 4 PRECEDING)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`, MAX(`SAL`)" - + " OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS BETWEEN 4" + + " OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 4" + " PRECEDING AND CURRENT ROW) `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t`\n" - + "ORDER BY `__stream_seq__` NULLS LAST"; + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` NULLS LAST"; verifyPPLToSparkSQL(root, expectedSparkSql); } @@ -178,9 +164,9 @@ public void testStreamstatsReset() { + " UNBOUNDED PRECEDING), COALESCE(SUM($10) OVER (ORDER BY $8 ROWS BETWEEN UNBOUNDED" + " PRECEDING AND 1 PRECEDING), 0))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()]," - + " __reset_before_flag__=[CASE(>($5, 100), 1, 0)], __reset_after_flag__=[CASE(<($5," - + " 50), 1, 0)])\n" + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER (ORDER BY $0" + + " NULLS LAST)], __reset_before_flag__=[CASE(>($5, 100), 1, 0)]," + + " __reset_after_flag__=[CASE(<($5, 50), 1, 0)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n" + " LogicalAggregate(group=[{}], avg(SAL)=[AVG($0)])\n" + " LogicalProject(SAL=[$5])\n" @@ -194,7 +180,7 @@ public void testStreamstatsReset() { + " BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))])\n" + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3]," + " HIREDATE=[$4], SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER" - + " ()], __reset_before_flag__=[CASE(>($5, 100), 1, 0)]," + + " (ORDER BY $0 NULLS LAST)], __reset_before_flag__=[CASE(>($5, 100), 1, 0)]," + " __reset_after_flag__=[CASE(<($5, 50), 1, 0)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); @@ -209,7 +195,8 @@ public void testStreamstatsReset() { + " COALESCE(SUM(`__reset_after_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST" + " ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) `__seg_id__`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`, CASE WHEN `SAL` > 100 THEN 1 ELSE 0 END" + + " ROW_NUMBER() OVER (ORDER BY `EMPNO` NULLS LAST) `__stream_seq__`, CASE WHEN `SAL`" + + " > 100 THEN 1 ELSE 0 END" + " `__reset_before_flag__`, CASE WHEN `SAL` < 50 THEN 1 ELSE 0 END" + " `__reset_after_flag__`\n" + "FROM `scott`.`EMP`) `t`) `$cor0`,\n" @@ -221,7 +208,8 @@ public void testStreamstatsReset() { + " COALESCE(SUM(`__reset_after_flag__`) OVER (ORDER BY `__stream_seq__` NULLS LAST" + " ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0) `__seg_id__`\n" + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`, CASE WHEN `SAL` > 100 THEN 1 ELSE 0 END" + + " ROW_NUMBER() OVER (ORDER BY `EMPNO` NULLS LAST) `__stream_seq__`, CASE WHEN `SAL`" + + " > 100 THEN 1 ELSE 0 END" + " `__reset_before_flag__`, CASE WHEN `SAL` < 50 THEN 1 ELSE 0 END" + " `__reset_after_flag__`\n" + "FROM `scott`.`EMP`) `t1`) `t2`\n" @@ -252,27 +240,21 @@ public void testMultipleStreamstatsWithWindow() { public void testStreamstatsWithReverse() { String ppl = "source=EMP | streamstats max(SAL) by DEPTNO | reverse"; RelNode root = getRelNode(ppl); - // Reverse replaces the __stream_seq__ sort in-place via backtracking + // Reverse replaces the input collation sort in-place via backtracking String expectedLogical = - "LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4], SAL=[$5]," - + " COMM=[$6], DEPTNO=[$7], max(SAL)=[$9])\n" - + " LogicalSort(sort0=[$8], dir0=[DESC])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[$8], max(SAL)=[MAX($5) OVER" - + " (PARTITION BY $7 ORDER BY $8 ROWS UNBOUNDED PRECEDING)])\n" - + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," - + " SAL=[$5], COMM=[$6], DEPTNO=[$7], __stream_seq__=[ROW_NUMBER() OVER ()])\n" - + " LogicalTableScan(table=[[scott, EMP]])\n"; + "LogicalSort(sort0=[$0], dir0=[DESC])\n" + + " LogicalProject(EMPNO=[$0], ENAME=[$1], JOB=[$2], MGR=[$3], HIREDATE=[$4]," + + " SAL=[$5], COMM=[$6], DEPTNO=[$7], max(SAL)=[MAX($5) OVER (PARTITION BY $7 ORDER BY" + + " $0 NULLS LAST ROWS UNBOUNDED PRECEDING)])\n" + + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = "SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY `__stream_seq__` NULLS LAST ROWS" + + " MAX(`SAL`) OVER (PARTITION BY `DEPTNO` ORDER BY `EMPNO` NULLS LAST ROWS" + " BETWEEN UNBOUNDED PRECEDING AND CURRENT ROW) `max(SAL)`\n" - + "FROM (SELECT `EMPNO`, `ENAME`, `JOB`, `MGR`, `HIREDATE`, `SAL`, `COMM`, `DEPTNO`," - + " ROW_NUMBER() OVER () `__stream_seq__`\n" - + "FROM `scott`.`EMP`) `t`\n" - + "ORDER BY `__stream_seq__` DESC NULLS FIRST"; + + "FROM `scott`.`EMP`\n" + + "ORDER BY `EMPNO` DESC NULLS FIRST"; verifyPPLToSparkSQL(root, expectedSparkSql); } } diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java index c9ffbdfccc9..bf7ba61eccd 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLTrendlineTest.java @@ -22,18 +22,20 @@ public void testTrendlineSma() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(SAL=[$5], sal_trend=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($5)" - + " OVER (ROWS 1 PRECEDING), CAST(COUNT($5) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL)," + "LogicalProject(SAL=[$5], sal_trend=[CASE(>(COUNT() OVER (ORDER BY $0 NULLS LAST ROWS 1" + + " PRECEDING), 1), /(SUM($5) OVER (ORDER BY $0 NULLS LAST ROWS 1 PRECEDING)," + + " CAST(COUNT($5) OVER (ORDER BY $0 NULLS LAST ROWS 1 PRECEDING)):DOUBLE NOT NULL)," + " null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($5)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `SAL`, CASE WHEN (COUNT(*) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1" - + " THEN (SUM(`SAL`) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) /" - + " CAST(COUNT(`SAL`) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS DOUBLE) ELSE" - + " NULL END `sal_trend`\n" + "SELECT `SAL`, CASE WHEN (COUNT(*) OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 1" + + " PRECEDING AND CURRENT ROW)) > 1 THEN (SUM(`SAL`) OVER (ORDER BY `EMPNO` NULLS LAST" + + " ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) / CAST(COUNT(`SAL`) OVER (ORDER BY" + + " `EMPNO` NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS DOUBLE) ELSE NULL" + + " END `sal_trend`\n" + "FROM `scott`.`EMP`\n" + "WHERE `SAL` IS NOT NULL"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -45,20 +47,22 @@ public void testTrendlineWma() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(SAL=[$5], SAL_trendline=[CASE(>(COUNT() OVER (ROWS 2 PRECEDING), 2)," - + " /(+(+(CAST(NTH_VALUE($5, 1) OVER (ROWS 2 PRECEDING)):DECIMAL(18, 2)," - + " *(NTH_VALUE($5, 2) OVER (ROWS 2 PRECEDING), 2)), *(NTH_VALUE($5, 3) OVER (ROWS 2" - + " PRECEDING), 3)), 6.0E0:DOUBLE), null:NULL)])\n" + "LogicalProject(SAL=[$5], SAL_trendline=[CASE(>(COUNT() OVER (ORDER BY $0 NULLS LAST ROWS 2" + + " PRECEDING), 2), /(+(+(CAST(NTH_VALUE($5, 1) OVER (ORDER BY $0 NULLS LAST ROWS 2" + + " PRECEDING)):DECIMAL(18, 2), *(NTH_VALUE($5, 2) OVER (ORDER BY $0 NULLS LAST ROWS 2" + + " PRECEDING), 2)), *(NTH_VALUE($5, 3) OVER (ORDER BY $0 NULLS LAST ROWS 2 PRECEDING)," + + " 3)), 6.0E0:DOUBLE), null:NULL)])\n" + " LogicalFilter(condition=[IS NOT NULL($5)])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT `SAL`, CASE WHEN (COUNT(*) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)) > 2" - + " THEN (CAST(NTH_VALUE(`SAL`, 1) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS" - + " DECIMAL(18, 2)) + (NTH_VALUE(`SAL`, 2) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT" - + " ROW)) * 2 + (NTH_VALUE(`SAL`, 3) OVER (ROWS BETWEEN 2 PRECEDING AND CURRENT ROW)) *" - + " 3) / 6.0E0 ELSE NULL END `SAL_trendline`\n" + "SELECT `SAL`, CASE WHEN (COUNT(*) OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 2" + + " PRECEDING AND CURRENT ROW)) > 2 THEN (CAST(NTH_VALUE(`SAL`, 1) OVER (ORDER BY" + + " `EMPNO` NULLS LAST ROWS BETWEEN 2 PRECEDING AND CURRENT ROW) AS DECIMAL(18, 2)) +" + + " (NTH_VALUE(`SAL`, 2) OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 2 PRECEDING AND" + + " CURRENT ROW)) * 2 + (NTH_VALUE(`SAL`, 3) OVER (ORDER BY `EMPNO` NULLS LAST ROWS" + + " BETWEEN 2 PRECEDING AND CURRENT ROW)) * 3) / 6.0E0 ELSE NULL END `SAL_trendline`\n" + "FROM `scott`.`EMP`\n" + "WHERE `SAL` IS NOT NULL"; verifyPPLToSparkSQL(root, expectedSparkSql); @@ -83,24 +87,28 @@ public void testTrendlineMultipleFields() { RelNode root = getRelNode(ppl); String expectedLogical = - "LogicalProject(SAL_trendline=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1)," - + " /(+(CAST(NTH_VALUE($5, 1) OVER (ROWS 1 PRECEDING)):DECIMAL(18, 2), *(NTH_VALUE($5," - + " 2) OVER (ROWS 1 PRECEDING), 2)), 3.0E0:DOUBLE), null:NULL)]," - + " DEPTNO_trendline=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($7) OVER (ROWS" - + " 1 PRECEDING), CAST(COUNT($7) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL)," + "LogicalProject(SAL_trendline=[CASE(>(COUNT() OVER (ORDER BY $0 NULLS LAST ROWS 1" + + " PRECEDING), 1), /(+(CAST(NTH_VALUE($5, 1) OVER (ORDER BY $0 NULLS LAST ROWS 1" + + " PRECEDING)):DECIMAL(18, 2), *(NTH_VALUE($5, 2) OVER (ORDER BY $0 NULLS LAST ROWS 1" + + " PRECEDING), 2)), 3.0E0:DOUBLE), null:NULL)], DEPTNO_trendline=[CASE(>(COUNT() OVER" + + " (ORDER BY $0 NULLS LAST ROWS 1 PRECEDING), 1), /(SUM($7) OVER (ORDER BY $0 NULLS" + + " LAST ROWS 1 PRECEDING), CAST(COUNT($7) OVER (ORDER BY $0 NULLS LAST ROWS 1" + + " PRECEDING)):DOUBLE NOT NULL)," + " null:NULL)])\n" + " LogicalFilter(condition=[AND(IS NOT NULL($5), IS NOT NULL($7))])\n" + " LogicalTableScan(table=[[scott, EMP]])\n"; verifyLogical(root, expectedLogical); String expectedSparkSql = - "SELECT CASE WHEN (COUNT(*) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1 THEN" - + " (CAST(NTH_VALUE(`SAL`, 1) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS" - + " DECIMAL(18, 2)) + (NTH_VALUE(`SAL`, 2) OVER (ROWS BETWEEN 1 PRECEDING AND CURRENT" - + " ROW)) * 2) / 3.0E0 ELSE NULL END `SAL_trendline`, CASE WHEN (COUNT(*) OVER (ROWS" - + " BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1 THEN (SUM(`DEPTNO`) OVER (ROWS BETWEEN 1" - + " PRECEDING AND CURRENT ROW)) / CAST(COUNT(`DEPTNO`) OVER (ROWS BETWEEN 1 PRECEDING" - + " AND CURRENT ROW) AS DOUBLE) ELSE NULL END `DEPTNO_trendline`\n" + "SELECT CASE WHEN (COUNT(*) OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 1 PRECEDING AND" + + " CURRENT ROW)) > 1 THEN (CAST(NTH_VALUE(`SAL`, 1) OVER (ORDER BY `EMPNO` NULLS LAST" + + " ROWS BETWEEN 1 PRECEDING AND CURRENT ROW) AS DECIMAL(18, 2)) + (NTH_VALUE(`SAL`, 2)" + + " OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) * 2) /" + + " 3.0E0 ELSE NULL END `SAL_trendline`, CASE WHEN (COUNT(*) OVER (ORDER BY `EMPNO`" + + " NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) > 1 THEN (SUM(`DEPTNO`) OVER" + + " (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 1 PRECEDING AND CURRENT ROW)) /" + + " CAST(COUNT(`DEPTNO`) OVER (ORDER BY `EMPNO` NULLS LAST ROWS BETWEEN 1 PRECEDING AND" + + " CURRENT ROW) AS DOUBLE) ELSE NULL END `DEPTNO_trendline`\n" + "FROM `scott`.`EMP`\n" + "WHERE `SAL` IS NOT NULL AND `DEPTNO` IS NOT NULL"; verifyPPLToSparkSQL(root, expectedSparkSql); From b9afa0d62a7d5d544e3d2491c8b626b8f82914ed Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 11:42:28 +0800 Subject: [PATCH 5/9] Refactor stream window order planning Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 42 +++++++------------ 1 file changed, 16 insertions(+), 26 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index ba06bb68e25..51b39ba62d8 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2297,18 +2297,9 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } - List inputOrderKeys = deriveCollationOrderKeys(context); - if (!inputOrderKeys.isEmpty()) { - List overExpressions = - node.getWindowFunctionList().stream() - .map(w -> rexVisitor.analyze(w, context)) - .map(rex -> addWindowOrder(rex, inputOrderKeys, context)) - .toList(); - - projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); - - context.relBuilder.sort(inputOrderKeys); - } else if (hasGroup) { + List windowOrderKeys = deriveCollationOrderKeys(context); + boolean useStreamSeq = windowOrderKeys.isEmpty() && hasGroup; + if (useStreamSeq) { // streamstats is order-sensitive. Materialize input order before any grouped window can // repartition rows, then make each window frame walk that sequence explicitly. RexNode streamSeq = @@ -2320,23 +2311,22 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .as(ROW_NUMBER_COLUMN_FOR_STREAMSTATS); context.relBuilder.projectPlus(streamSeq); int seqColIndex = context.relBuilder.peek().getRowType().getFieldCount() - 1; + windowOrderKeys = List.of(context.relBuilder.field(seqColIndex)); + } - List overExpressions = - node.getWindowFunctionList().stream() - .map(w -> rexVisitor.analyze(w, context)) - .map( - rex -> - addWindowOrder(rex, List.of(context.relBuilder.field(seqColIndex)), context)) - .toList(); - - projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); + List finalWindowOrderKeys = windowOrderKeys; + List overExpressions = + node.getWindowFunctionList().stream() + .map(w -> rexVisitor.analyze(w, context)) + .map(rex -> addWindowOrder(rex, finalWindowOrderKeys, context)) + .toList(); + projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); - context.relBuilder.sort(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); + if (!finalWindowOrderKeys.isEmpty()) { + context.relBuilder.sort(finalWindowOrderKeys); + } + if (useStreamSeq) { context.relBuilder.projectExcept(context.relBuilder.field(ROW_NUMBER_COLUMN_FOR_STREAMSTATS)); - } else { - List overExpressions = - node.getWindowFunctionList().stream().map(w -> rexVisitor.analyze(w, context)).toList(); - context.relBuilder.projectPlus(overExpressions); } return context.relBuilder.peek(); From 71410db86903a570dc8445e20b11935405d95ec8 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 11:48:48 +0800 Subject: [PATCH 6/9] Extract Rex field collation utility Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 44 +---------------- .../sql/calcite/utils/PlanUtils.java | 47 +++++++++++++++++++ 2 files changed, 48 insertions(+), 43 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 51b39ba62d8..3398767a819 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -35,7 +35,6 @@ import java.util.Arrays; import java.util.BitSet; import java.util.Comparator; -import java.util.EnumSet; import java.util.HashMap; import java.util.HashSet; import java.util.LinkedHashSet; @@ -2358,9 +2357,7 @@ private RexNode addWindowOrder(RexNode rex, List orderKeys, CalcitePlan if (orderKeys.isEmpty()) { return rex; } - ImmutableList.Builder orderCollationBuilder = ImmutableList.builder(); - orderKeys.forEach(key -> orderCollationBuilder.add(toRexFieldCollation(key))); - ImmutableList orderCollations = orderCollationBuilder.build(); + ImmutableList orderCollations = PlanUtils.toRexFieldCollations(orderKeys); return rex.accept( new RexShuttle() { @Override @@ -2387,45 +2384,6 @@ public RexNode visitOver(RexOver over) { }); } - private static RexFieldCollation toRexFieldCollation(RexNode node) { - return toRexFieldCollation( - node, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.UNSPECIFIED); - } - - private static RexFieldCollation toRexFieldCollation( - RexNode node, - RelFieldCollation.Direction direction, - RelFieldCollation.NullDirection nullDirection) { - switch (node.getKind()) { - case DESCENDING: - return toRexFieldCollation( - ((RexCall) node).getOperands().getFirst(), - RelFieldCollation.Direction.DESCENDING, - nullDirection); - case NULLS_FIRST: - return toRexFieldCollation( - ((RexCall) node).getOperands().getFirst(), - direction, - RelFieldCollation.NullDirection.FIRST); - case NULLS_LAST: - return toRexFieldCollation( - ((RexCall) node).getOperands().getFirst(), - direction, - RelFieldCollation.NullDirection.LAST); - default: - Set flags = EnumSet.noneOf(SqlKind.class); - if (direction == RelFieldCollation.Direction.DESCENDING) { - flags.add(SqlKind.DESCENDING); - } - if (nullDirection == RelFieldCollation.NullDirection.FIRST) { - flags.add(SqlKind.NULLS_FIRST); - } else if (nullDirection == RelFieldCollation.NullDirection.LAST) { - flags.add(SqlKind.NULLS_LAST); - } - return new RexFieldCollation(node, flags); - } - } - private List wrapWindowFunctionsWithGroupNotNull( List overExpressions, RexNode groupNotNull, CalcitePlanContext context) { List wrappedOverExprs = new ArrayList<>(overExpressions.size()); diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index a7f53569c66..d6e86d877e3 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -16,6 +16,7 @@ import java.lang.reflect.Method; import java.util.ArrayList; import java.util.Arrays; +import java.util.EnumSet; import java.util.HashSet; import java.util.List; import java.util.Objects; @@ -49,6 +50,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.rex.RexCall; import org.apache.calcite.rex.RexCorrelVariable; +import org.apache.calcite.rex.RexFieldCollation; import org.apache.calcite.rex.RexInputRef; import org.apache.calcite.rex.RexLiteral; import org.apache.calcite.rex.RexNode; @@ -303,6 +305,51 @@ private static RexNode withOver( .toRex(); } + static ImmutableList toRexFieldCollations(List orderKeys) { + ImmutableList.Builder orderCollationBuilder = ImmutableList.builder(); + orderKeys.forEach(key -> orderCollationBuilder.add(toRexFieldCollation(key))); + return orderCollationBuilder.build(); + } + + static RexFieldCollation toRexFieldCollation(RexNode node) { + return toRexFieldCollation( + node, RelFieldCollation.Direction.ASCENDING, RelFieldCollation.NullDirection.UNSPECIFIED); + } + + private static RexFieldCollation toRexFieldCollation( + RexNode node, + RelFieldCollation.Direction direction, + RelFieldCollation.NullDirection nullDirection) { + switch (node.getKind()) { + case DESCENDING: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + RelFieldCollation.Direction.DESCENDING, + nullDirection); + case NULLS_FIRST: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + direction, + RelFieldCollation.NullDirection.FIRST); + case NULLS_LAST: + return toRexFieldCollation( + ((RexCall) node).getOperands().getFirst(), + direction, + RelFieldCollation.NullDirection.LAST); + default: + Set flags = EnumSet.noneOf(SqlKind.class); + if (direction == RelFieldCollation.Direction.DESCENDING) { + flags.add(SqlKind.DESCENDING); + } + if (nullDirection == RelFieldCollation.NullDirection.FIRST) { + flags.add(SqlKind.NULLS_FIRST); + } else if (nullDirection == RelFieldCollation.NullDirection.LAST) { + flags.add(SqlKind.NULLS_LAST); + } + return new RexFieldCollation(node, flags); + } + } + private static RexNode variance( CalcitePlanContext ctx, RexNode operator, From 22c51545ca6bfa39fa61fbf49a91d5939c590c8c Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 13:33:55 +0800 Subject: [PATCH 7/9] Document stream window order tradeoff Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 12 ++++++++++++ .../calcite/explain_streamstats_distinct_count.yaml | 4 ++-- .../calcite/explain_streamstats_earliest_latest.yaml | 4 ++-- ...lain_streamstats_earliest_latest_custom_time.yaml | 4 ++-- .../calcite/explain_streamstats_null_bucket.yaml | 4 ++-- .../calcite/explain_streamstats_reset.yaml | 10 +++++----- .../explain_streamstats_reset_null_bucket.yaml | 10 +++++----- .../calcite/explain_trendline_sort_push.yaml | 4 ++-- .../explain_streamstats_distinct_count.yaml | 4 ++-- .../explain_streamstats_earliest_latest.yaml | 4 ++-- ...lain_streamstats_earliest_latest_custom_time.yaml | 4 ++-- .../explain_streamstats_null_bucket.yaml | 6 +++--- .../explain_streamstats_reset.yaml | 10 +++++----- .../explain_streamstats_reset_null_bucket.yaml | 10 +++++----- .../explain_trendline_sort_push.yaml | 4 ++-- 15 files changed, 53 insertions(+), 41 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 3398767a819..bb246dd05ff 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2296,6 +2296,12 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } + // Short-term correctness workaround for DataFusion: streamstats/trendline are evaluated in + // arrival order, and some engines can preserve that order through window partitions without an + // explicit ORDER BY. DataFusion's physical window plan does not currently provide that + // guarantee, so we declare the inherited input order on the window frame. This may add a small + // per-partition sort cost on engines that did not need it; the long-term fix is a real + // streaming window operator. List windowOrderKeys = deriveCollationOrderKeys(context); boolean useStreamSeq = windowOrderKeys.isEmpty() && hasGroup; if (useStreamSeq) { @@ -3980,6 +3986,12 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { } }); + // Short-term correctness workaround for DataFusion: streamstats/trendline are evaluated in + // arrival order, and some engines can preserve that order through window partitions without an + // explicit ORDER BY. DataFusion's physical window plan does not currently provide that + // guarantee, so we declare the inherited input order on the window frame. This may add a small + // per-partition sort cost on engines that did not need it; the long-term fix is a real + // streaming window operator. List trendlineOrderKeys = deriveCollationOrderKeys(context); List trendlineNodes = new ArrayList<>(); diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml index 0a280b77dfb..b57b021b03f 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_distinct_count.yaml @@ -3,12 +3,12 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..12=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t12]) CalciteEnumerableTopK(sort0=[$11], dir0=[ASC], fetch=[10000]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) + EnumerableWindow(window#0=[window(partition {4} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml index 2d6062c1148..29c78d31264 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest.yaml @@ -3,12 +3,12 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ORDER BY $11 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ORDER BY $11 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) CalciteEnumerableTopK(sort0=[$5], dir0=[ASC], fetch=[10000]) - EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) + EnumerableWindow(window#0=[window(partition {1} order by [5] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml index 8a7612054c8..0fa2c04ae1a 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,12 +3,12 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ORDER BY $11 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ORDER BY $11 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | EnumerableCalc(expr#0..7=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t6], latest_message=[$t7]) CalciteEnumerableTopK(sort0=[$5], dir0=[ASC], fetch=[10000]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) + EnumerableWindow(window#0=[window(partition {4} order by [5] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]], PushDownContext=[[PROJECT->[created_at, server, @timestamp, message, level]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["created_at","server","@timestamp","message","level"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml index d52457d6671..d19f5053370 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_null_bucket.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..14=[{inputs}], expr#15=[CAST($t14):DOUBLE NOT NULL], expr#16=[/($t13, $t15)], expr#17=[null:DOUBLE], expr#18=[CASE($t12, $t16, $t17)], proj#0..10=[{exprs}], avg_age=[$t18]) CalciteEnumerableTopK(sort0=[$11], dir0=[ASC], fetch=[10000]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) + EnumerableWindow(window#0=[window(partition {4} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[IS NOT NULL($t4)], proj#0..12=[{exprs}]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml index 72f8f4d6ca7..f57613734cd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset.yaml @@ -4,13 +4,13 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) LogicalSort(sort0=[$17], dir0=[ASC]) LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($0)]) LogicalProject(age=[$8]) LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | @@ -19,7 +19,7 @@ calcite: EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[0], expr#18=[COALESCE($t16, $t17)], expr#19=[+($t15, $t18)], proj#0..11=[{exprs}], __seg_id__=[$t19], $f16=[$t14]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(order by [11] rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], expr#20=[IS NULL($t4)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19], $14=[$t20]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) @@ -29,12 +29,12 @@ calcite: EnumerableHashJoin(condition=[AND(=($2, $7), <($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) EnumerableAggregate(group=[{0, 1, 2, 3}]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..1=[{exprs}], __seg_id__=[$t9], $f16=[$t4]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(order by [1] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], expr#11=[IS NULL($t0)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10], $4=[$t11]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(order by [2] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml index 42b50e7eb5f..929d00d033d 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_streamstats_reset_null_bucket.yaml @@ -4,13 +4,13 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) LogicalSort(sort0=[$17], dir0=[ASC]) LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($0)]) LogicalProject(age=[$8]) LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | @@ -19,7 +19,7 @@ calcite: EnumerableMergeJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) EnumerableSort(sort0=[$4], sort1=[$11], sort2=[$12], dir0=[ASC], dir1=[ASC], dir2=[ASC]) EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(order by [11] rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..11=[{inputs}], expr#12=[34], expr#13=[>($t8, $t12)], expr#14=[1], expr#15=[0], expr#16=[CASE($t13, $t14, $t15)], expr#17=[25], expr#18=[<($t8, $t17)], expr#19=[CASE($t18, $t14, $t15)], proj#0..11=[{exprs}], __reset_before_flag__=[$t16], __reset_after_flag__=[$t19]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[account_number, firstname, address, balance, gender, city, employer, state, age, email, lastname]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["account_number","firstname","address","balance","gender","city","employer","state","age","email","lastname"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) @@ -29,12 +29,12 @@ calcite: EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) EnumerableAggregate(group=[{0, 1, 2}]) EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(order by [1] rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], gender=[$t0], __stream_seq__=[$t2], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(order by [2] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..2=[{inputs}], expr#3=[34], expr#4=[>($t1, $t3)], expr#5=[1], expr#6=[0], expr#7=[CASE($t4, $t5, $t6)], expr#8=[25], expr#9=[<($t1, $t8)], expr#10=[CASE($t9, $t5, $t6)], proj#0..2=[{exprs}], __reset_before_flag__=[$t7], __reset_after_flag__=[$t10]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[gender, age]], OpenSearchRequestBuilder(sourceBuilder={"from":0,"timeout":"1m","_source":{"includes":["gender","age"]}}, requestedTotalSize=2147483647, pageSize=null, startFrom=0)]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml index 354c7f74fbe..5f6be04c244 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite/explain_trendline_sort_push.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(ageTrend=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($8) OVER (ROWS 1 PRECEDING), CAST(COUNT($8) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL), null:NULL)]) + LogicalProject(ageTrend=[CASE(>(COUNT() OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING), 1), /(SUM($8) OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING), CAST(COUNT($8) OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING)):DOUBLE NOT NULL), null:NULL)]) LogicalFilter(condition=[IS NOT NULL($8)]) LogicalSort(sort0=[$8], dir0=[ASC]) LogicalSort(fetch=[5]) @@ -9,7 +9,7 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[1], expr#5=[>($t1, $t4)], expr#6=[CAST($t3):DOUBLE NOT NULL], expr#7=[/($t2, $t6)], expr#8=[null:NULL], expr#9=[CASE($t5, $t7, $t8)], ageTrend=[$t9]) - EnumerableWindow(window#0=[window(rows between $1 PRECEDING and CURRENT ROW aggs [COUNT(), $SUM0($0), COUNT($0)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [0] rows between $1 PRECEDING and CURRENT ROW aggs [COUNT(), $SUM0($0), COUNT($0)])], constants=[[1]]) EnumerableCalc(expr#0=[{inputs}], expr#1=[IS NOT NULL($t0)], age=[$t0], $condition=[$t1]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]], PushDownContext=[[PROJECT->[age], LIMIT->5, SORT->[{ "age" : { diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml index 550cf0ea9cb..f696e69e88b 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_distinct_count.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], distinct_states=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], distinct_states=[DISTINCT_COUNT_APPROX($7) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..18=[{inputs}], proj#0..10=[{exprs}], distinct_states=[$t18]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$17], dir0=[ASC]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) + EnumerableWindow(window#0=[window(partition {4} order by [17] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [DISTINCT_COUNT_APPROX($7)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml index c37fae48771..2c91e942520 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $2) OVER (PARTITION BY $1 ORDER BY $11 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $2) OVER (PARTITION BY $1 ORDER BY $11 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableWindow(window#0=[window(partition {1} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) + EnumerableWindow(window#0=[window(partition {1} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $2), ARG_MAX($3, $2)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml index b85e4b6b7bb..fc5bc29a2dc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_earliest_latest_custom_time.yaml @@ -3,13 +3,13 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], earliest_message=[$12], latest_message=[$13]) LogicalSort(sort0=[$11], dir0=[ASC]) - LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)]) + LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[$11], earliest_message=[ARG_MIN($3, $0) OVER (PARTITION BY $4 ORDER BY $11 ROWS UNBOUNDED PRECEDING)], latest_message=[ARG_MAX($3, $0) OVER (PARTITION BY $4 ORDER BY $11 ROWS UNBOUNDED PRECEDING)]) LogicalProject(created_at=[$0], server=[$1], @timestamp=[$2], message=[$3], level=[$4], _id=[$5], _index=[$6], _score=[$7], _maxscore=[$8], _sort=[$9], _routing=[$10], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) physical: | EnumerableCalc(expr#0..13=[{inputs}], proj#0..4=[{exprs}], earliest_message=[$t12], latest_message=[$t13]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) + EnumerableWindow(window#0=[window(partition {4} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ARG_MIN($3, $0), ARG_MAX($3, $0)])]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_logs]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml index 08876045225..1126ffd7bd0 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_null_bucket.yaml @@ -3,14 +3,14 @@ calcite: LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$18]) LogicalSort(sort0=[$17], dir0=[ASC]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], avg_age=[CASE(IS NOT NULL($4), /(SUM($8) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING), CAST(COUNT($8) OVER (PARTITION BY $4 ORDER BY $17 ROWS UNBOUNDED PRECEDING)):DOUBLE NOT NULL), null:DOUBLE)]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | EnumerableCalc(expr#0..14=[{inputs}], expr#15=[CAST($t14):DOUBLE NOT NULL], expr#16=[/($t13, $t15)], expr#17=[null:DOUBLE], expr#18=[CASE($t12, $t16, $t17)], proj#0..10=[{exprs}], avg_age=[$t18]) EnumerableLimit(fetch=[10000]) EnumerableSort(sort0=[$11], dir0=[ASC]) - EnumerableWindow(window#0=[window(partition {4} rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) + EnumerableWindow(window#0=[window(partition {4} order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($8), COUNT($8)])]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[IS NOT NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], $12=[$t18]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) - CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file + CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml index 5664cc6aa87..b1eb418bbbd 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset.yaml @@ -4,13 +4,13 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) LogicalSort(sort0=[$17], dir0=[ASC]) LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($0)]) LogicalProject(age=[$8]) LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), OR(=($4, $cor0.gender), AND(IS NULL($4), IS NULL($cor0.gender))))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | @@ -19,7 +19,7 @@ calcite: EnumerableMergeJoin(condition=[AND(=($11, $15), =($12, $16), =($13, $17), IS NOT DISTINCT FROM($4, $14))], joinType=[left]) EnumerableSort(sort0=[$11], sort1=[$12], sort2=[$13], dir0=[ASC], dir1=[ASC], dir2=[ASC]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[0], expr#18=[COALESCE($t16, $t17)], expr#19=[+($t15, $t18)], proj#0..11=[{exprs}], __seg_id__=[$t19], $f16=[$t14]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(order by [11] rows between UNBOUNDED PRECEDING and $15 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], expr#26=[IS NULL($t4)], proj#0..10=[{exprs}], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25], $14=[$t26]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) @@ -29,12 +29,12 @@ calcite: EnumerableHashJoin(condition=[AND(=($2, $7), <($6, $1), OR(=($4, $0), AND(IS NULL($4), $3)))], joinType=[inner]) EnumerableAggregate(group=[{0, 1, 2, 3}]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..1=[{exprs}], __seg_id__=[$t9], $f16=[$t4]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(order by [1] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], expr#26=[IS NULL($t4)], gender=[$t4], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25], $4=[$t26]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(order by [2] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], age=[$t8], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml index 40fb4087001..959d3d5acd6 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_streamstats_reset_null_bucket.yaml @@ -4,13 +4,13 @@ calcite: LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], avg_age=[$21]) LogicalSort(sort0=[$17], dir0=[ASC]) LogicalCorrelate(correlation=[$cor0], joinType=[left], requiredColumns=[{4, 17, 20}]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) LogicalAggregate(group=[{}], avg_age=[AVG($0)]) LogicalProject(age=[$8]) LogicalFilter(condition=[AND(<($17, $cor0.__stream_seq__), =($20, $cor0.__seg_id__), =($4, $cor0.gender))]) - LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) + LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[$17], __reset_before_flag__=[$18], __reset_after_flag__=[$19], __seg_id__=[+(SUM($18) OVER (ORDER BY $17 ROWS UNBOUNDED PRECEDING), COALESCE(SUM($19) OVER (ORDER BY $17 ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING), 0))]) LogicalProject(account_number=[$0], firstname=[$1], address=[$2], balance=[$3], gender=[$4], city=[$5], employer=[$6], state=[$7], age=[$8], email=[$9], lastname=[$10], _id=[$11], _index=[$12], _score=[$13], _maxscore=[$14], _sort=[$15], _routing=[$16], __stream_seq__=[ROW_NUMBER() OVER ()], __reset_before_flag__=[CASE(>($8, 34), 1, 0)], __reset_after_flag__=[CASE(<($8, 25), 1, 0)]) CalciteLogicalIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) physical: | @@ -19,7 +19,7 @@ calcite: EnumerableHashJoin(condition=[AND(=($4, $13), =($11, $14), =($12, $15))], joinType=[left]) EnumerableSort(sort0=[$11], dir0=[ASC]) EnumerableCalc(expr#0..15=[{inputs}], expr#16=[0], expr#17=[COALESCE($t15, $t16)], expr#18=[+($t14, $t17)], proj#0..11=[{exprs}], __seg_id__=[$t18]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [11] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($12)])], window#1=[window(order by [11] rows between UNBOUNDED PRECEDING and $14 PRECEDING aggs [$SUM0($13)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], proj#0..10=[{exprs}], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) @@ -28,12 +28,12 @@ calcite: EnumerableHashJoin(condition=[AND(=($2, $6), =($0, $3), <($5, $1))], joinType=[inner]) EnumerableAggregate(group=[{0, 1, 2}]) EnumerableCalc(expr#0..5=[{inputs}], expr#6=[0], expr#7=[COALESCE($t5, $t6)], expr#8=[+($t4, $t7)], proj#0..1=[{exprs}], __seg_id__=[$t8]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [1] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($2)])], window#1=[window(order by [1] rows between UNBOUNDED PRECEDING and $4 PRECEDING aggs [$SUM0($3)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) EnumerableCalc(expr#0..6=[{inputs}], expr#7=[0], expr#8=[COALESCE($t6, $t7)], expr#9=[+($t5, $t8)], proj#0..2=[{exprs}], __seg_id__=[$t9]) - EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [2] rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [$SUM0($3)])], window#1=[window(order by [2] rows between UNBOUNDED PRECEDING and $5 PRECEDING aggs [$SUM0($4)])], constants=[[1]]) EnumerableCalc(expr#0..17=[{inputs}], expr#18=[34], expr#19=[>($t8, $t18)], expr#20=[1], expr#21=[0], expr#22=[CASE($t19, $t20, $t21)], expr#23=[25], expr#24=[<($t8, $t23)], expr#25=[CASE($t24, $t20, $t21)], gender=[$t4], age=[$t8], __stream_seq__=[$t17], __reset_before_flag__=[$t22], __reset_after_flag__=[$t25]) EnumerableWindow(window#0=[window(rows between UNBOUNDED PRECEDING and CURRENT ROW aggs [ROW_NUMBER()])]) CalciteEnumerableIndexScan(table=[[OpenSearch, opensearch-sql_test_index_account]]) \ No newline at end of file diff --git a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_trendline_sort_push.yaml b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_trendline_sort_push.yaml index 2427a30e1a7..3c87f8683fc 100644 --- a/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_trendline_sort_push.yaml +++ b/integ-test/src/test/resources/expectedOutput/calcite_no_pushdown/explain_trendline_sort_push.yaml @@ -1,7 +1,7 @@ calcite: logical: | LogicalSystemLimit(fetch=[10000], type=[QUERY_SIZE_LIMIT]) - LogicalProject(ageTrend=[CASE(>(COUNT() OVER (ROWS 1 PRECEDING), 1), /(SUM($8) OVER (ROWS 1 PRECEDING), CAST(COUNT($8) OVER (ROWS 1 PRECEDING)):DOUBLE NOT NULL), null:NULL)]) + LogicalProject(ageTrend=[CASE(>(COUNT() OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING), 1), /(SUM($8) OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING), CAST(COUNT($8) OVER (ORDER BY $8 NULLS LAST ROWS 1 PRECEDING)):DOUBLE NOT NULL), null:NULL)]) LogicalFilter(condition=[IS NOT NULL($8)]) LogicalSort(sort0=[$8], dir0=[ASC]) LogicalSort(fetch=[5]) @@ -9,7 +9,7 @@ calcite: physical: | EnumerableLimit(fetch=[10000]) EnumerableCalc(expr#0..3=[{inputs}], expr#4=[1], expr#5=[>($t1, $t4)], expr#6=[CAST($t3):DOUBLE NOT NULL], expr#7=[/($t2, $t6)], expr#8=[null:NULL], expr#9=[CASE($t5, $t7, $t8)], ageTrend=[$t9]) - EnumerableWindow(window#0=[window(rows between $1 PRECEDING and CURRENT ROW aggs [COUNT(), $SUM0($0), COUNT($0)])], constants=[[1]]) + EnumerableWindow(window#0=[window(order by [0] rows between $1 PRECEDING and CURRENT ROW aggs [COUNT(), $SUM0($0), COUNT($0)])], constants=[[1]]) EnumerableCalc(expr#0..16=[{inputs}], expr#17=[IS NOT NULL($t8)], age=[$t8], $condition=[$t17]) EnumerableSort(sort0=[$8], dir0=[ASC]) EnumerableLimit(fetch=[5]) From 2b3f739e668c6216c001bf674f3195f0414edbfd Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 14:16:52 +0800 Subject: [PATCH 8/9] Preserve streamstats output order Strip explicit input sorts before grouped streamstats windows and restore them afterward so Calcite does not remove the post-window sort as redundant. This keeps sorted streamstats output in pipeline order while still declaring window order for deterministic window evaluation. Share the collation-to-order and restore-order helpers with dedup, and add RelNode coverage for the stripped input sort shape. Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 32 +++++++++------- .../plan/rule/PPLDedupConvertRule.java | 38 +------------------ .../sql/calcite/utils/PlanUtils.java | 36 ++++++++++++++++++ .../calcite/CalcitePPLStreamstatsTest.java | 7 ++++ 4 files changed, 64 insertions(+), 49 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index bb246dd05ff..61b67e9c695 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2296,13 +2296,17 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) new String[] {ROW_NUMBER_COLUMN_FOR_STREAMSTATS}); } - // Short-term correctness workaround for DataFusion: streamstats/trendline are evaluated in - // arrival order, and some engines can preserve that order through window partitions without an - // explicit ORDER BY. DataFusion's physical window plan does not currently provide that - // guarantee, so we declare the inherited input order on the window frame. This may add a small - // per-partition sort cost on engines that did not need it; the long-term fix is a real - // streaming window operator. - List windowOrderKeys = deriveCollationOrderKeys(context); + // Short-term correctness workaround: streamstats/trendline are evaluated in arrival order, and + // some engines can preserve that order through window partitions without an explicit ORDER BY. + // DataFusion and Calcite EnumerableWindow do not currently provide that guarantee for + // partitioned windows, so we declare the inherited input order on the window frame and restore + // explicit upstream sort order after the window. This may add a small per-partition sort cost + // on engines that did not need it; the long-term fix is a real streaming window operator. + RelCollation strippedInputCollation = stripInputSort(context.relBuilder); + List windowOrderKeys = + strippedInputCollation == null + ? deriveCollationOrderKeys(context) + : PlanUtils.collationToOrderKeys(context.relBuilder, strippedInputCollation); boolean useStreamSeq = windowOrderKeys.isEmpty() && hasGroup; if (useStreamSeq) { // streamstats is order-sensitive. Materialize input order before any grouped window can @@ -2327,7 +2331,9 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .toList(); projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); - if (!finalWindowOrderKeys.isEmpty()) { + if (strippedInputCollation != null) { + PlanUtils.restoreInputOrder(context.relBuilder, strippedInputCollation); + } else if (!finalWindowOrderKeys.isEmpty()) { context.relBuilder.sort(finalWindowOrderKeys); } if (useStreamSeq) { @@ -3986,11 +3992,11 @@ public RelNode visitTrendline(Trendline node, CalcitePlanContext context) { } }); - // Short-term correctness workaround for DataFusion: streamstats/trendline are evaluated in - // arrival order, and some engines can preserve that order through window partitions without an - // explicit ORDER BY. DataFusion's physical window plan does not currently provide that - // guarantee, so we declare the inherited input order on the window frame. This may add a small - // per-partition sort cost on engines that did not need it; the long-term fix is a real + // Short-term correctness workaround: streamstats/trendline are evaluated in arrival order, and + // some engines can preserve that order through window partitions without an explicit ORDER BY. + // DataFusion and Calcite EnumerableWindow do not currently provide that guarantee for every + // window frame, so we declare the inherited input order on the window frame. This may add a + // small per-partition sort cost on engines that did not need it; the long-term fix is a real // streaming window operator. List trendlineOrderKeys = deriveCollationOrderKeys(context); diff --git a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java index 39bd243ea5d..30dc901b0b6 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java +++ b/core/src/main/java/org/opensearch/sql/calcite/plan/rule/PPLDedupConvertRule.java @@ -6,6 +6,8 @@ package org.opensearch.sql.calcite.plan.rule; import static org.opensearch.sql.calcite.utils.PlanUtils.ROW_NUMBER_COLUMN_FOR_DEDUP; +import static org.opensearch.sql.calcite.utils.PlanUtils.collationToOrderKeys; +import static org.opensearch.sql.calcite.utils.PlanUtils.restoreInputOrder; import java.util.ArrayList; import java.util.List; @@ -187,42 +189,6 @@ public static void buildDedupNotNull( restoreInputOrder(relBuilder, inputCollation); } - /** - * Convert a RelCollation to a list of RexNode order keys using the RelBuilder's field references. - */ - private static List collationToOrderKeys(RelBuilder relBuilder, RelCollation collation) { - if (collation == null || collation.getFieldCollations().isEmpty()) { - return List.of(); - } - List orderKeys = new ArrayList<>(); - for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { - RexNode fieldRef = relBuilder.field(fieldCollation.getFieldIndex()); - if (fieldCollation.direction.isDescending()) { - fieldRef = relBuilder.desc(fieldRef); - } - if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.LAST) { - fieldRef = relBuilder.nullsLast(fieldRef); - } else if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) { - fieldRef = relBuilder.nullsFirst(fieldRef); - } - orderKeys.add(fieldRef); - } - return orderKeys; - } - - /** - * Re-apply a sort after dedup to restore the input order that may have been disrupted by the - * window operator. EnumerableWindow can re-partition data by the PARTITION BY key, destroying any - * upstream sort order. This explicit re-sort ensures the final output preserves the original - * order. - */ - private static void restoreInputOrder(RelBuilder relBuilder, RelCollation inputCollation) { - if (inputCollation != null && !inputCollation.getFieldCollations().isEmpty()) { - List sortKeys = collationToOrderKeys(relBuilder, inputCollation); - relBuilder.sort(sortKeys); - } - } - /** Rule configuration. */ @Value.Immutable public interface Config extends OpenSearchRuleConfig { diff --git a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java index d6e86d877e3..4104d05909e 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java +++ b/core/src/main/java/org/opensearch/sql/calcite/utils/PlanUtils.java @@ -756,6 +756,42 @@ public Void visitCorrelVariable(RexCorrelVariable correlVar) { return outputCollation; } + /** + * Convert a {@link RelCollation} to {@link RexNode} order keys using the current RelBuilder field + * references. + */ + public static List collationToOrderKeys( + RelBuilder relBuilder, @Nullable RelCollation collation) { + if (collation == null || collation.getFieldCollations().isEmpty()) { + return List.of(); + } + List orderKeys = new ArrayList<>(); + for (RelFieldCollation fieldCollation : collation.getFieldCollations()) { + RexNode fieldRef = relBuilder.field(fieldCollation.getFieldIndex()); + if (fieldCollation.direction.isDescending()) { + fieldRef = relBuilder.desc(fieldRef); + } + if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.LAST) { + fieldRef = relBuilder.nullsLast(fieldRef); + } else if (fieldCollation.nullDirection == RelFieldCollation.NullDirection.FIRST) { + fieldRef = relBuilder.nullsFirst(fieldRef); + } + orderKeys.add(fieldRef); + } + return orderKeys; + } + + /** + * Re-apply a sort to restore input order that may have been disrupted by a window operator. + * EnumerableWindow can re-partition data by the PARTITION BY key, destroying upstream sort order. + */ + public static void restoreInputOrder( + RelBuilder relBuilder, @Nullable RelCollation inputCollation) { + if (inputCollation != null && !inputCollation.getFieldCollations().isEmpty()) { + relBuilder.sort(collationToOrderKeys(relBuilder, inputCollation)); + } + } + /** * Remove the first Sort node found in the tree, replacing it with its input. Only traverses * through single-input operators (Filter, Project) that preserve order. diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 875eb9ec0dc..9bb5858c782 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -5,6 +5,7 @@ package org.opensearch.sql.ppl.calcite; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; @@ -71,6 +72,7 @@ public void testStreamstatsByAfterSortOrdersWindowByCollation() { String plan = root.explain(); assertFalse(plan.contains("__stream_seq__")); assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $5 DESC NULLS LAST")); + assertEquals(1, countOccurrences(plan, "LogicalSort(")); } @Test @@ -81,6 +83,11 @@ public void testStreamstatsAfterSortOrdersWindowByCollation() { String plan = root.explain(); assertFalse(plan.contains("__stream_seq__")); assertTrue(plan.contains("max(SAL)=[MAX($5) OVER (ORDER BY $5 DESC NULLS LAST")); + assertEquals(1, countOccurrences(plan, "LogicalSort(")); + } + + private static int countOccurrences(String text, String target) { + return text.split(java.util.regex.Pattern.quote(target), -1).length - 1; } @Test From 6c123568f56b044229d8b9245dcb621124adab20 Mon Sep 17 00:00:00 2001 From: Songkan Tang Date: Fri, 26 Jun 2026 16:27:39 +0800 Subject: [PATCH 9/9] Preserve sorted streamstats arrival order Signed-off-by: Songkan Tang --- .../sql/calcite/CalciteRelNodeVisitor.java | 22 +++++++++---------- .../calcite/CalcitePPLStreamstatsTest.java | 9 ++++---- 2 files changed, 15 insertions(+), 16 deletions(-) diff --git a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java index 61b67e9c695..f3c910c2b4c 100644 --- a/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java +++ b/core/src/main/java/org/opensearch/sql/calcite/CalciteRelNodeVisitor.java @@ -2299,15 +2299,15 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) // Short-term correctness workaround: streamstats/trendline are evaluated in arrival order, and // some engines can preserve that order through window partitions without an explicit ORDER BY. // DataFusion and Calcite EnumerableWindow do not currently provide that guarantee for - // partitioned windows, so we declare the inherited input order on the window frame and restore - // explicit upstream sort order after the window. This may add a small per-partition sort cost - // on engines that did not need it; the long-term fix is a real streaming window operator. - RelCollation strippedInputCollation = stripInputSort(context.relBuilder); - List windowOrderKeys = - strippedInputCollation == null - ? deriveCollationOrderKeys(context) - : PlanUtils.collationToOrderKeys(context.relBuilder, strippedInputCollation); - boolean useStreamSeq = windowOrderKeys.isEmpty() && hasGroup; + // partitioned windows, so we make grouped streamstats frames walk an explicit input sequence. + // When the input already has an explicit Sort, materialize the sequence after that Sort instead + // of stripping it; this preserves "sort, then streamstats" semantics including tie arrival + // order. This may add a small per-partition sort cost on engines that did not need it; the + // long-term fix is a real streaming window operator. + RelCollation explicitInputCollation = PlanUtils.findInputCollation(context.relBuilder.peek()); + List windowOrderKeys = deriveCollationOrderKeys(context); + boolean useStreamSeq = + hasGroup && (windowOrderKeys.isEmpty() || explicitInputCollation != null); if (useStreamSeq) { // streamstats is order-sensitive. Materialize input order before any grouped window can // repartition rows, then make each window frame walk that sequence explicitly. @@ -2331,9 +2331,7 @@ public RelNode visitStreamWindow(StreamWindow node, CalcitePlanContext context) .toList(); projectStreamWindowExpressions(overExpressions, hasGroup, groupList, node, context); - if (strippedInputCollation != null) { - PlanUtils.restoreInputOrder(context.relBuilder, strippedInputCollation); - } else if (!finalWindowOrderKeys.isEmpty()) { + if (!finalWindowOrderKeys.isEmpty()) { context.relBuilder.sort(finalWindowOrderKeys); } if (useStreamSeq) { diff --git a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java index 9bb5858c782..f86b6ee5625 100644 --- a/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java +++ b/ppl/src/test/java/org/opensearch/sql/ppl/calcite/CalcitePPLStreamstatsTest.java @@ -70,9 +70,11 @@ public void testStreamstatsByAfterSortOrdersWindowByCollation() { RelNode root = getRelNode(ppl); String plan = root.explain(); - assertFalse(plan.contains("__stream_seq__")); - assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $5 DESC NULLS LAST")); - assertEquals(1, countOccurrences(plan, "LogicalSort(")); + assertTrue(plan.contains("__stream_seq__=[ROW_NUMBER() OVER ()]")); + assertTrue(plan.contains("MAX($5) OVER (PARTITION BY $7 ORDER BY $8")); + assertTrue(plan.contains("LogicalSort(sort0=[$5], dir0=[DESC-nulls-last])")); + assertTrue(plan.contains("LogicalSort(sort0=[$8], dir0=[ASC])")); + assertEquals(2, countOccurrences(plan, "LogicalSort(")); } @Test @@ -83,7 +85,6 @@ public void testStreamstatsAfterSortOrdersWindowByCollation() { String plan = root.explain(); assertFalse(plan.contains("__stream_seq__")); assertTrue(plan.contains("max(SAL)=[MAX($5) OVER (ORDER BY $5 DESC NULLS LAST")); - assertEquals(1, countOccurrences(plan, "LogicalSort(")); } private static int countOccurrences(String text, String target) {