Skip to content

Commit 783cef6

Browse files
committed
fix: widen power(decimal, float) to Float64 and simplify
1 parent b382ecd commit 783cef6

5 files changed

Lines changed: 245 additions & 154 deletions

File tree

datafusion/core/tests/expr_api/simplification.rs

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,13 +648,19 @@ fn test_simplify_power() {
648648
let expected = col("c3_non_null");
649649
test_simplify(expr, expected)
650650
}
651-
// Power(c3, Log(c3, c4)) ===> c4
651+
// Power(c3, Log(c3, c4)) ===> cast(c4 AS Int64)
652+
// The simplifier rewrites `power(b, log(b, x))` to `x`, but the
653+
// rewritten expression must keep the same type as the original
654+
// `power` call. `power`'s declared return type follows its base
655+
// argument (c3 = Int64), so the UInt32 c4 has to be cast to Int64
656+
// to preserve the output schema the optimizer already committed to.
652657
{
653658
let expr = power(
654659
col("c3_non_null"),
655660
log(col("c3_non_null"), col("c4_non_null")),
656661
);
657-
let expected = col("c4_non_null");
662+
let expected =
663+
Expr::Cast(Cast::new(Box::new(col("c4_non_null")), DataType::Int64));
658664
test_simplify(expr, expected)
659665
}
660666
// Power(c3, c4) ===> Power(c3, c4)

datafusion/functions/Cargo.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -212,6 +212,11 @@ harness = false
212212
name = "atan2"
213213
required-features = ["math_expressions"]
214214

215+
[[bench]]
216+
harness = false
217+
name = "power"
218+
required-features = ["math_expressions"]
219+
215220
[[bench]]
216221
harness = false
217222
name = "substr_index"
Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
// Licensed to the Apache Software Foundation (ASF) under one
2+
// or more contributor license agreements. See the NOTICE file
3+
// distributed with this work for additional information
4+
// regarding copyright ownership. The ASF licenses this file
5+
// to you under the Apache License, Version 2.0 (the
6+
// "License"); you may not use this file except in compliance
7+
// with the License. You may obtain a copy of the License at
8+
//
9+
// http://www.apache.org/licenses/LICENSE-2.0
10+
//
11+
// Unless required by applicable law or agreed to in writing,
12+
// software distributed under the License is distributed on an
13+
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14+
// KIND, either express or implied. See the License for the
15+
// specific language governing permissions and limitations
16+
// under the License.
17+
18+
//! Microbenchmark for `power(decimal_array, int_*)`.
19+
//!
20+
//! Covers both array- and scalar-shaped integer exponents on a Decimal
21+
//! base. Both shapes are dispatched to the native per-row decimal kernel;
22+
//! the bench guards against any future change that routes either shape
23+
//! through a Float64 round-trip, which is measurably slower than the
24+
//! decimal kernel for the cases the kernel can handle.
25+
26+
extern crate criterion;
27+
28+
use arrow::array::{Decimal128Array, Int64Array};
29+
use arrow::datatypes::{DataType, Field, FieldRef};
30+
use criterion::{Criterion, criterion_group, criterion_main};
31+
use datafusion_common::ScalarValue;
32+
use datafusion_common::config::ConfigOptions;
33+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
34+
use datafusion_functions::math::power;
35+
use std::hint::black_box;
36+
use std::sync::Arc;
37+
38+
fn make_decimal_array(size: usize, precision: u8, scale: i8) -> Decimal128Array {
39+
// Use a fixed unscaled value (250) so the bench is independent of `scale`.
40+
// The four-arm dispatch in `power` only cares about the Decimal variant
41+
// and the exponent's shape, not the numeric value.
42+
let arr = Decimal128Array::from(vec![250i128; size]);
43+
arr.with_precision_and_scale(precision, scale).unwrap()
44+
}
45+
46+
fn make_int_array(size: usize, value: i64) -> Int64Array {
47+
Int64Array::from(vec![value; size])
48+
}
49+
50+
fn run_power(
51+
power_fn: &ScalarUDF,
52+
args: &[ColumnarValue],
53+
arg_fields: &[FieldRef],
54+
return_field: &FieldRef,
55+
config_options: &Arc<ConfigOptions>,
56+
num_rows: usize,
57+
) {
58+
black_box(
59+
power_fn
60+
.invoke_with_args(ScalarFunctionArgs {
61+
args: args.to_vec(),
62+
arg_fields: arg_fields.to_vec(),
63+
number_rows: num_rows,
64+
return_field: Arc::clone(return_field),
65+
config_options: Arc::clone(config_options),
66+
})
67+
.unwrap(),
68+
);
69+
}
70+
71+
fn criterion_benchmark(c: &mut Criterion) {
72+
let power_fn = power();
73+
let config_options = Arc::new(ConfigOptions::default());
74+
let precision: u8 = 20;
75+
let scale: i8 = 2;
76+
let decimal_ty = DataType::Decimal128(precision, scale);
77+
78+
// Exponents are bounded by what the native decimal kernel can handle
79+
// without overflowing the i128 intermediate; see
80+
// <https://github.com/apache/datafusion/issues/22480>
81+
let exponents = [2i64, 4, 8];
82+
83+
for size in [1024usize, 8192] {
84+
let base_arr = Arc::new(make_decimal_array(size, precision, scale));
85+
let base_field: FieldRef = Field::new("base", decimal_ty.clone(), true).into();
86+
let exp_field: FieldRef = Field::new("exp", DataType::Int64, true).into();
87+
let return_field: FieldRef = Field::new("r", decimal_ty.clone(), true).into();
88+
let arg_fields = vec![base_field, exp_field];
89+
90+
for &exp in &exponents {
91+
let exp_arr = Arc::new(make_int_array(size, exp));
92+
let array_args = vec![
93+
ColumnarValue::Array(base_arr.clone()),
94+
ColumnarValue::Array(exp_arr),
95+
];
96+
c.bench_function(
97+
&format!(
98+
"power decimal({precision},{scale}) array x int array, exp={exp}, n={size}"
99+
),
100+
|b| {
101+
b.iter(|| {
102+
run_power(
103+
&power_fn,
104+
&array_args,
105+
&arg_fields,
106+
&return_field,
107+
&config_options,
108+
size,
109+
)
110+
})
111+
},
112+
);
113+
114+
let scalar_args = vec![
115+
ColumnarValue::Array(base_arr.clone()),
116+
ColumnarValue::Scalar(ScalarValue::Int64(Some(exp))),
117+
];
118+
c.bench_function(
119+
&format!(
120+
"power decimal({precision},{scale}) array x int scalar, exp={exp}, n={size}"
121+
),
122+
|b| {
123+
b.iter(|| {
124+
run_power(
125+
&power_fn,
126+
&scalar_args,
127+
&arg_fields,
128+
&return_field,
129+
&config_options,
130+
size,
131+
)
132+
})
133+
},
134+
);
135+
}
136+
}
137+
}
138+
139+
criterion_group!(benches, criterion_benchmark);
140+
criterion_main!(benches);

0 commit comments

Comments
 (0)