Skip to content

Commit ba240b2

Browse files
authored
perf: Optimize overlay with new string builder (#22182)
## Which issue does this PR close? - Closes #22181. ## Rationale for this change This PR optimizes `overlay` by switching to the new bulk-NULL string builders, and also taking advantage of `append_with` to avoid copying into an intermediate `String`. Along the way, we also optimize character counting for Unicode inputs, reducing the number of times we need to walk the input string. Benchmarks (Arm64): StringArray - all_utf8: 3.6 ms → 1.85 ms (-48.6%) - ascii: 319.8 µs → 134.9 µs (-57.8%) - high_nulls: 400.7 µs → 75.1 µs (-81.3%) - low_nulls: 2.0 ms → 1.03 ms (-48.3%) - no_for: 2.1 ms → 1.12 ms (-46.8%) - no_nulls: 2.0 ms → 1.05 ms (-48.4%) StringViewArray - all_utf8: 3.6 ms → 1.86 ms (-48.4%) - ascii: 313.8 µs → 133.8 µs (-57.4%) - low_nulls: 2.0 ms → 1.05 ms (-47.4%) - no_for: 2.1 ms → 1.12 ms (-46.7%) ## What changes are included in this PR? * Switch to `BulkNullStringArrayBuilder` to build the result set, and `NullBuffer::union_many` to compute NULLs in bulk * Use `append_with` to avoid an intermediate string copy * For Unicode inputs, replace three `char_indices` walks with a single string traversal * More comprehensive benchmark coverage * Fix a misleading/inaccurate error message ## Are these changes tested? Yes, covered by existing tests. ## Are there any user-facing changes? No.
1 parent 4a41173 commit ba240b2

3 files changed

Lines changed: 318 additions & 130 deletions

File tree

datafusion/functions/benches/overlay.rs

Lines changed: 145 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -21,40 +21,56 @@ use arrow::datatypes::{DataType, Field};
2121
use criterion::{Criterion, criterion_group, criterion_main};
2222
use datafusion_common::ScalarValue;
2323
use datafusion_common::config::ConfigOptions;
24-
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
24+
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs, ScalarUDF};
2525
use helper::gen_string_array;
2626
use std::hint::black_box;
2727
use std::sync::Arc;
2828

29-
fn criterion_benchmark(c: &mut Criterion) {
30-
const N_ROWS: usize = 8192;
29+
#[expect(clippy::too_many_arguments)]
30+
fn bench_overlay(
31+
c: &mut Criterion,
32+
name: &str,
33+
overlay: &ScalarUDF,
34+
n_rows: usize,
35+
null_density: f32,
36+
utf8_density: f32,
37+
is_string_view: bool,
38+
with_for: bool,
39+
) {
3140
const STR_LEN: usize = 128;
3241

33-
let overlay = datafusion_functions::core::overlay();
34-
let config_options = Arc::new(ConfigOptions::default());
35-
36-
let mut args = gen_string_array(N_ROWS, STR_LEN, 0.1, 0.5, false);
37-
args.push(ColumnarValue::Scalar(ScalarValue::Utf8(Some(
38-
"DataFusion".to_string(),
39-
))));
42+
let mut args =
43+
gen_string_array(n_rows, STR_LEN, null_density, utf8_density, is_string_view);
44+
// The substring scalar's type must match the string column's type (the
45+
// function dispatches per-type without coercion).
46+
let substr = "DataFusion".to_string();
47+
let substr_scalar = if is_string_view {
48+
ScalarValue::Utf8View(Some(substr))
49+
} else {
50+
ScalarValue::Utf8(Some(substr))
51+
};
52+
args.push(ColumnarValue::Scalar(substr_scalar));
4053
args.push(ColumnarValue::Scalar(ScalarValue::Int64(Some(32))));
41-
args.push(ColumnarValue::Scalar(ScalarValue::Int64(Some(8))));
54+
if with_for {
55+
args.push(ColumnarValue::Scalar(ScalarValue::Int64(Some(8))));
56+
}
4257

4358
let arg_fields = args
4459
.iter()
4560
.enumerate()
4661
.map(|(idx, arg)| Field::new(format!("arg_{idx}"), arg.data_type(), true).into())
4762
.collect::<Vec<_>>();
4863
let return_field = Arc::new(Field::new("f", DataType::Utf8, true));
64+
let config_options = Arc::new(ConfigOptions::default());
4965

50-
c.bench_function("overlay_StringArray_utf8_scalar_args", |b| {
66+
c.bench_function(name, |b| {
5167
b.iter(|| {
5268
black_box(
5369
overlay
5470
.invoke_with_args(ScalarFunctionArgs {
5571
args: args.clone(),
5672
arg_fields: arg_fields.clone(),
57-
number_rows: N_ROWS,
73+
number_rows: n_rows,
5874
return_field: Arc::clone(&return_field),
5975
config_options: Arc::clone(&config_options),
6076
})
@@ -64,5 +80,121 @@ fn criterion_benchmark(c: &mut Criterion) {
6480
});
6581
}
6682

83+
fn criterion_benchmark(c: &mut Criterion) {
84+
const N_ROWS: usize = 8192;
85+
const MIXED_UTF8: f32 = 0.5;
86+
let overlay = datafusion_functions::core::overlay();
87+
88+
// Null-density variants on StringArray (mixed ASCII/UTF-8, 4-arg form).
89+
bench_overlay(
90+
c,
91+
"overlay_StringArray_low_nulls",
92+
&overlay,
93+
N_ROWS,
94+
0.1,
95+
MIXED_UTF8,
96+
false,
97+
true,
98+
);
99+
bench_overlay(
100+
c,
101+
"overlay_StringArray_high_nulls",
102+
&overlay,
103+
N_ROWS,
104+
0.9,
105+
MIXED_UTF8,
106+
false,
107+
true,
108+
);
109+
bench_overlay(
110+
c,
111+
"overlay_StringArray_no_nulls",
112+
&overlay,
113+
N_ROWS,
114+
0.0,
115+
MIXED_UTF8,
116+
false,
117+
true,
118+
);
119+
120+
// Content variants on StringArray (no nulls, 4-arg form). Pair against
121+
// `overlay_StringArray_no_nulls` to isolate the impact of UTF-8 density.
122+
bench_overlay(
123+
c,
124+
"overlay_StringArray_ascii",
125+
&overlay,
126+
N_ROWS,
127+
0.0,
128+
0.0,
129+
false,
130+
true,
131+
);
132+
bench_overlay(
133+
c,
134+
"overlay_StringArray_all_utf8",
135+
&overlay,
136+
N_ROWS,
137+
0.0,
138+
1.0,
139+
false,
140+
true,
141+
);
142+
143+
// 3-arg form (no FOR clause), where the replace length is derived from
144+
// the substring per row.
145+
bench_overlay(
146+
c,
147+
"overlay_StringArray_no_for",
148+
&overlay,
149+
N_ROWS,
150+
0.0,
151+
MIXED_UTF8,
152+
false,
153+
false,
154+
);
155+
156+
// StringViewArray counterparts.
157+
bench_overlay(
158+
c,
159+
"overlay_StringViewArray_low_nulls",
160+
&overlay,
161+
N_ROWS,
162+
0.1,
163+
MIXED_UTF8,
164+
true,
165+
true,
166+
);
167+
bench_overlay(
168+
c,
169+
"overlay_StringViewArray_ascii",
170+
&overlay,
171+
N_ROWS,
172+
0.0,
173+
0.0,
174+
true,
175+
true,
176+
);
177+
bench_overlay(
178+
c,
179+
"overlay_StringViewArray_all_utf8",
180+
&overlay,
181+
N_ROWS,
182+
0.0,
183+
1.0,
184+
true,
185+
true,
186+
);
187+
bench_overlay(
188+
c,
189+
"overlay_StringViewArray_no_for",
190+
&overlay,
191+
N_ROWS,
192+
0.0,
193+
MIXED_UTF8,
194+
true,
195+
false,
196+
);
197+
}
198+
67199
criterion_group!(benches, criterion_benchmark);
68200
criterion_main!(benches);

0 commit comments

Comments
 (0)