Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,7 @@ object CHExpressionUtil {
REGR_SLOPE -> DefaultValidator(),
REGR_INTERCEPT -> DefaultValidator(),
REGR_SXY -> DefaultValidator(),
BITMAP_CONSTRUCT_AGG -> DefaultValidator(),
TO_UTC_TIMESTAMP -> UtcTimestampValidator(),
FROM_UTC_TIMESTAMP -> UtcTimestampValidator(),
STACK -> DefaultValidator(),
Expand Down
3 changes: 2 additions & 1 deletion cpp/velox/substrait/SubstraitToVeloxPlanValidator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1323,7 +1323,8 @@ bool SubstraitToVeloxPlanValidator::validate(const ::substrait::AggregateRel& ag
"regr_slope",
"regr_intercept",
"regr_sxy",
"regr_replacement"};
"regr_replacement",
"bitmap_construct_agg"};

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Follow-up opportunity: register bitmap_or_agg (and bitmap_and_agg for Spark 4.1)

Problem: bitmap_or_agg was introduced in the same Spark 3.5 commit as bitmap_construct_agg, and bitmap_and_agg in Spark 4.1. If native Velox implementations exist for these, registering them together would avoid unnecessary columnar-to-row transitions when users combine bitmap functions in the same query stage.

Investigation Needed: Confirm whether bitmap_or_agg and bitmap_and_agg have native Velox implementations (look for registration in cpp/velox/udf/ or the Velox aggregate function registry). If yes, consider adding them in a follow-up PR:

      "bitmap_construct_agg",
      "bitmap_or_agg",
      "bitmap_and_agg"};

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've Velox PR for bitmap_or_agg in review phase. Will add follow up PR for Gluten once Velox PR merges. Thanks!


auto udafFuncs = UdfLoader::getInstance()->getRegisteredUdafNames();

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
// Exception.
.exclude("column pruning - non-readable file")
enableSuite[GlutenBitmapExpressionsQuerySuite]
// bitmap_construct_agg is not supported natively in CH backend.
.excludeCH("bitmap_construct_agg routes to native")

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

remove?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one checks the native plan path, specifically added to test the native plan. Since for CH backend it runs on spark, this test fail in CH backend.

enableSuite[GlutenBitwiseExpressionsSuite]
enableSuite[GlutenBloomFilterAggregateQuerySuite]
.excludeCH("Test bloom_filter_agg and might_contain")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,6 +85,10 @@ class VeloxTestSettings extends BackendTestSettings {
"INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates")
// Doesn't support unhex with failOnError=true.
.exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
// bitmap_construct_agg offloaded to Velox throws GlutenException instead of
// SparkArrayIndexOutOfBoundsException.
.exclude("INVALID_BITMAP_POSITION: position out of bounds")
.exclude("INVALID_BITMAP_POSITION: negative position")
enableSuite[GlutenQueryParsingErrorsSuite]
enableSuite[GlutenArithmeticExpressionSuite]
.exclude("SPARK-45786: Decimal multiply, divide, remainder, quot")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@
*/
package org.apache.spark.sql

import org.apache.gluten.execution.HashAggregateExecBaseTransformer

import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

class GlutenBitmapExpressionsQuerySuite
extends BitmapExpressionsQuerySuite
with GlutenSQLTestsTrait {}
with GlutenSQLTestsTrait
with AdaptiveSparkPlanHelper {

test("bitmap_construct_agg routes to native") {
val df = spark.sql(
"SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
"FROM values (1L), (2L), (3L) AS t(col)")
df.collect()
assert(
collectWithSubqueries(df.queryExecution.executedPlan) {
case h: HashAggregateExecBaseTransformer => h
}.nonEmpty,
"Expected native HashAggregateExecBaseTransformer in plan"
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
// Exception.
.exclude("column pruning - non-readable file")
enableSuite[GlutenBitmapExpressionsQuerySuite]
// bitmap_construct_agg is not supported natively in CH backend.
.excludeCH("bitmap_construct_agg routes to native")
enableSuite[GlutenBitwiseExpressionsSuite]
enableSuite[GlutenBloomFilterAggregateQuerySuite]
.excludeCH("Test bloom_filter_agg and might_contain")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,10 @@ class VeloxTestSettings extends BackendTestSettings {
"INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates")
// Doesn't support unhex with failOnError=true.
.exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
// bitmap_construct_agg offloaded to Velox throws GlutenException instead of
// SparkArrayIndexOutOfBoundsException.
.exclude("INVALID_BITMAP_POSITION: position out of bounds")
.exclude("INVALID_BITMAP_POSITION: negative position")
enableSuite[GlutenQueryParsingErrorsSuite]
enableSuite[GlutenQueryContextSuite]
enableSuite[GlutenQueryExecutionAnsiErrorsSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@
*/
package org.apache.spark.sql

import org.apache.gluten.execution.HashAggregateExecBaseTransformer

import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

class GlutenBitmapExpressionsQuerySuite
extends BitmapExpressionsQuerySuite
with GlutenSQLTestsTrait {}
with GlutenSQLTestsTrait
with AdaptiveSparkPlanHelper {

test("bitmap_construct_agg routes to native") {
val df = spark.sql(
"SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
"FROM values (1L), (2L), (3L) AS t(col)")
df.collect()
assert(
collectWithSubqueries(df.queryExecution.executedPlan) {
case h: HashAggregateExecBaseTransformer => h
}.nonEmpty,
"Expected native HashAggregateExecBaseTransformer in plan"
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ class ClickHouseTestSettings extends BackendTestSettings {
// Exception.
.exclude("column pruning - non-readable file")
enableSuite[GlutenBitmapExpressionsQuerySuite]
// bitmap_construct_agg is not supported natively in CH backend.
.excludeCH("bitmap_construct_agg routes to native")
enableSuite[GlutenBitwiseExpressionsSuite]
enableSuite[GlutenBloomFilterAggregateQuerySuite]
.excludeCH("Test bloom_filter_agg and might_contain")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,6 +293,10 @@ class VeloxTestSettings extends BackendTestSettings {
"INCONSISTENT_BEHAVIOR_CROSS_VERSION: compatibility with Spark 2.4/3.2 in reading/writing dates")
// Doesn't support unhex with failOnError=true.
.exclude("CONVERSION_INVALID_INPUT: to_binary conversion function hex")
// bitmap_construct_agg offloaded to Velox throws GlutenException instead of
// SparkArrayIndexOutOfBoundsException.
.exclude("INVALID_BITMAP_POSITION: position out of bounds")
Comment thread
minni31 marked this conversation as resolved.
.exclude("INVALID_BITMAP_POSITION: negative position")
enableSuite[GlutenQueryParsingErrorsSuite]
enableSuite[GlutenQueryContextSuite]
enableSuite[GlutenQueryExecutionAnsiErrorsSuite]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,25 @@
*/
package org.apache.spark.sql

import org.apache.gluten.execution.HashAggregateExecBaseTransformer

import org.apache.spark.sql.execution.adaptive.AdaptiveSparkPlanHelper

class GlutenBitmapExpressionsQuerySuite
extends BitmapExpressionsQuerySuite
with GlutenSQLTestsTrait {}
with GlutenSQLTestsTrait
with AdaptiveSparkPlanHelper {

test("bitmap_construct_agg routes to native") {
val df = spark.sql(
"SELECT bitmap_construct_agg(bitmap_bit_position(col)) " +
"FROM values (1L), (2L), (3L) AS t(col)")
df.collect()
assert(
collectWithSubqueries(df.queryExecution.executedPlan) {
case h: HashAggregateExecBaseTransformer => h
}.nonEmpty,
"Expected native HashAggregateExecBaseTransformer in plan"
)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ object ExpressionNames {
final val COLLECT_LIST = "collect_list"
final val COLLECT_SET = "collect_set"
final val BLOOM_FILTER_AGG = "bloom_filter_agg"
final val BITMAP_CONSTRUCT_AGG = "bitmap_construct_agg"
final val VAR_SAMP = "var_samp"
final val VAR_POP = "var_pop"
final val BIT_AND_AGG = "bit_and"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ class Spark35Shims extends SparkShims {
Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
Sig[RegrSXY](ExpressionNames.REGR_SXY),
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,8 @@ class Spark40Shims extends SparkShims {
Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
Sig[RegrSXY](ExpressionNames.REGR_SXY),
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ class Spark41Shims extends SparkShims {
Sig[RegrSlope](ExpressionNames.REGR_SLOPE),
Sig[RegrIntercept](ExpressionNames.REGR_INTERCEPT),
Sig[RegrSXY](ExpressionNames.REGR_SXY),
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT)
Sig[RegrReplacement](ExpressionNames.REGR_REPLACEMENT),
Sig[BitmapConstructAgg](ExpressionNames.BITMAP_CONSTRUCT_AGG)
)
}

Expand Down
Loading