Skip to content

Commit 5bfcf95

Browse files
kazantsev-maksimKazantsev Maksim
andauthored
feat: Implement Spark bitmap_bit_position function (#20275)
## Which issue does this PR close? N/A ## Rationale for this change Add new function: https://spark.apache.org/docs/latest/api/sql/index.html#bitmap_bit_position ## What changes are included in this PR? - Implementation - Unit Tests - SLT tests ## Are these changes tested? Yes, tests added as part of this PR. ## Are there any user-facing changes? No, these are new function. --------- Co-authored-by: Kazantsev Maksim <mn.kazantsev@gmail.com>
1 parent 726d730 commit 5bfcf95

File tree

3 files changed

+261
-1
lines changed

3 files changed

+261
-1
lines changed
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
use arrow::array::{ArrayRef, AsArray, Int64Array};
19+
use arrow::datatypes::Field;
20+
use arrow::datatypes::{DataType, FieldRef, Int8Type, Int16Type, Int32Type, Int64Type};
21+
use datafusion::logical_expr::{ColumnarValue, Signature, TypeSignature, Volatility};
22+
use datafusion_common::utils::take_function_args;
23+
use datafusion_common::{Result, internal_err};
24+
use datafusion_expr::{ScalarFunctionArgs, ScalarUDFImpl};
25+
use datafusion_functions::utils::make_scalar_function;
26+
use std::any::Any;
27+
use std::sync::Arc;
28+
29+
/// Spark-compatible `bitmap_bit_position` expression
30+
/// <https://spark.apache.org/docs/latest/api/sql/index.html#bitmap_bit_position>
31+
#[derive(Debug, PartialEq, Eq, Hash)]
32+
pub struct BitmapBitPosition {
33+
signature: Signature,
34+
}
35+
36+
impl Default for BitmapBitPosition {
37+
fn default() -> Self {
38+
Self::new()
39+
}
40+
}
41+
42+
impl BitmapBitPosition {
43+
pub fn new() -> Self {
44+
Self {
45+
signature: Signature::one_of(
46+
vec![
47+
TypeSignature::Exact(vec![DataType::Int8]),
48+
TypeSignature::Exact(vec![DataType::Int16]),
49+
TypeSignature::Exact(vec![DataType::Int32]),
50+
TypeSignature::Exact(vec![DataType::Int64]),
51+
],
52+
Volatility::Immutable,
53+
),
54+
}
55+
}
56+
}
57+
58+
impl ScalarUDFImpl for BitmapBitPosition {
59+
fn as_any(&self) -> &dyn Any {
60+
self
61+
}
62+
63+
fn name(&self) -> &str {
64+
"bitmap_bit_position"
65+
}
66+
67+
fn signature(&self) -> &Signature {
68+
&self.signature
69+
}
70+
71+
fn return_type(&self, _arg_types: &[DataType]) -> Result<DataType> {
72+
internal_err!("return_field_from_args should be used instead")
73+
}
74+
75+
fn return_field_from_args(
76+
&self,
77+
args: datafusion_expr::ReturnFieldArgs,
78+
) -> Result<FieldRef> {
79+
Ok(Arc::new(Field::new(
80+
self.name(),
81+
DataType::Int64,
82+
args.arg_fields[0].is_nullable(),
83+
)))
84+
}
85+
86+
fn invoke_with_args(&self, args: ScalarFunctionArgs) -> Result<ColumnarValue> {
87+
make_scalar_function(bitmap_bit_position_inner, vec![])(&args.args)
88+
}
89+
}
90+
91+
pub fn bitmap_bit_position_inner(arg: &[ArrayRef]) -> Result<ArrayRef> {
92+
let [array] = take_function_args("bitmap_bit_position", arg)?;
93+
match &array.data_type() {
94+
DataType::Int8 => {
95+
let result: Int64Array = array
96+
.as_primitive::<Int8Type>()
97+
.iter()
98+
.map(|opt| opt.map(|value| bitmap_bit_position(value.into())))
99+
.collect();
100+
Ok(Arc::new(result))
101+
}
102+
DataType::Int16 => {
103+
let result: Int64Array = array
104+
.as_primitive::<Int16Type>()
105+
.iter()
106+
.map(|opt| opt.map(|value| bitmap_bit_position(value.into())))
107+
.collect();
108+
Ok(Arc::new(result))
109+
}
110+
DataType::Int32 => {
111+
let result: Int64Array = array
112+
.as_primitive::<Int32Type>()
113+
.iter()
114+
.map(|opt| opt.map(|value| bitmap_bit_position(value.into())))
115+
.collect();
116+
Ok(Arc::new(result))
117+
}
118+
DataType::Int64 => {
119+
let result: Int64Array = array
120+
.as_primitive::<Int64Type>()
121+
.iter()
122+
.map(|opt| opt.map(bitmap_bit_position))
123+
.collect();
124+
Ok(Arc::new(result))
125+
}
126+
data_type => {
127+
internal_err!("bitmap_bit_position does not support {data_type}")
128+
}
129+
}
130+
}
131+
132+
const NUM_BYTES: i64 = 4 * 1024;
133+
const NUM_BITS: i64 = NUM_BYTES * 8;
134+
135+
fn bitmap_bit_position(value: i64) -> i64 {
136+
if value > 0 {
137+
(value - 1) % NUM_BITS
138+
} else {
139+
(value.wrapping_neg()) % NUM_BITS
140+
}
141+
}

datafusion/spark/src/function/bitmap/mod.rs

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,13 +15,15 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18+
pub mod bitmap_bit_position;
1819
pub mod bitmap_count;
1920

2021
use datafusion_expr::ScalarUDF;
2122
use datafusion_functions::make_udf_function;
2223
use std::sync::Arc;
2324

2425
make_udf_function!(bitmap_count::BitmapCount, bitmap_count);
26+
make_udf_function!(bitmap_bit_position::BitmapBitPosition, bitmap_bit_position);
2527

2628
pub mod expr_fn {
2729
use datafusion_functions::export_functions;
@@ -31,8 +33,13 @@ pub mod expr_fn {
3133
"Returns the number of set bits in the input bitmap.",
3234
arg
3335
));
36+
export_functions!((
37+
bitmap_bit_position,
38+
"Returns the bit position for the given input child expression.",
39+
arg
40+
));
3441
}
3542

3643
pub fn functions() -> Vec<Arc<ScalarUDF>> {
37-
vec![bitmap_count()]
44+
vec![bitmap_count(), bitmap_bit_position()]
3845
}
Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
# Licensed to the Apache Software Foundation (ASF) under one
2+
# or more contributor license agreements. See the NOTICE file
3+
# distributed with this work for additional information
4+
# regarding copyright ownership. The ASF licenses this file
5+
# to you under the Apache License, Version 2.0 (the
6+
# "License"); you may not use this file except in compliance
7+
# with the License. You may obtain a copy of the License at
8+
9+
# http://www.apache.org/licenses/LICENSE-2.0
10+
11+
# Unless required by applicable law or agreed to in writing,
12+
# software distributed under the License is distributed on an
13+
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
# KIND, either express or implied. See the License for the
15+
# specific language governing permissions and limitations
16+
# under the License.
17+
18+
19+
query I
20+
SELECT bitmap_bit_position(arrow_cast(1, 'Int8'));
21+
----
22+
0
23+
24+
query I
25+
SELECT bitmap_bit_position(arrow_cast(3, 'Int8'));
26+
----
27+
2
28+
29+
query I
30+
SELECT bitmap_bit_position(arrow_cast(7, 'Int8'));
31+
----
32+
6
33+
34+
query I
35+
SELECT bitmap_bit_position(arrow_cast(15, 'Int8'));
36+
----
37+
14
38+
39+
query I
40+
SELECT bitmap_bit_position(arrow_cast(-1, 'Int8'));
41+
----
42+
1
43+
44+
query I
45+
SELECT bitmap_bit_position(arrow_cast(256, 'Int16'));
46+
----
47+
255
48+
49+
query I
50+
SELECT bitmap_bit_position(arrow_cast(1024, 'Int16'));
51+
----
52+
1023
53+
54+
query I
55+
SELECT bitmap_bit_position(arrow_cast(-32768, 'Int16'));
56+
----
57+
0
58+
59+
query I
60+
SELECT bitmap_bit_position(arrow_cast(16384, 'Int16'));
61+
----
62+
16383
63+
64+
query I
65+
SELECT bitmap_bit_position(arrow_cast(-1, 'Int16'));
66+
----
67+
1
68+
69+
query I
70+
SELECT bitmap_bit_position(arrow_cast(65536, 'Int32'));
71+
----
72+
32767
73+
74+
query I
75+
SELECT bitmap_bit_position(arrow_cast(1048576, 'Int32'));
76+
----
77+
32767
78+
79+
query I
80+
SELECT bitmap_bit_position(arrow_cast(-2147483648, 'Int32'));
81+
----
82+
0
83+
84+
query I
85+
SELECT bitmap_bit_position(arrow_cast(1073741824, 'Int32'));
86+
----
87+
32767
88+
89+
query I
90+
SELECT bitmap_bit_position(arrow_cast(-1, 'Int32'));
91+
----
92+
1
93+
94+
query I
95+
SELECT bitmap_bit_position(arrow_cast(4294967296, 'Int64'));
96+
----
97+
32767
98+
99+
query I
100+
SELECT bitmap_bit_position(arrow_cast(-1, 'Int64'));
101+
----
102+
1
103+
104+
query I
105+
SELECT bitmap_bit_position(arrow_cast(-9223372036854775808, 'Int64'));
106+
----
107+
0
108+
109+
query I
110+
SELECT bitmap_bit_position(arrow_cast(9223372036854775807, 'Int64'));
111+
----
112+
32766

0 commit comments

Comments
 (0)