Skip to content

Commit cf57f3e

Browse files
authored
perf: Add append_with to string builders, use in replace (#22029)
## Which issue does this PR close? - Closes #21997 (potentially). ## Rationale for this change This PR adds two new APIs to `GenericStringArrayBuilder` and `StringViewArrayBuilder`: 1. `append_with` appends a row whose bytes are produced by invoking a closure that is passed a `StringWriter` 2. `append_byte_map` appends a row whose bytes are produced by mapping each byte of the input with a byte-to-byte map closure. For `StringViewArrayBuilder`, `StringWriter` is an append-only string writer that switches between writing to a new inline view (for short strings) or to the in-progress data block automatically. For `GenericStringArrayBuilder`, `StringWriter` just appends to the value buffer directly. (We need two new APIs because `append_byte_map` vectorizes a lot better than `append_with`, so callers that fit the byte-to-byte map pattern should prefer it.) Both of these new APIs allow string UDFs to avoid creating an intermediate data copy in many cases. To illustrate this, this PR adopts the new APIs in `replace`. Benchmarks (Arm64): Group 1: ASCII single-byte fast path (StringArray) - size=1024 str_len=32 nulls=0.0 : 16.27 µs -> 12.83 µs (−21.1%) - size=1024 str_len=32 nulls=0.2 : 14.23 µs -> 12.10 µs (−15.0%) - size=1024 str_len=128 nulls=0.0 : 11.28 µs -> 8.21 µs (−27.3%) - size=1024 str_len=128 nulls=0.2 : 10.37 µs -> 7.79 µs (−24.9%) - size=4096 str_len=32 nulls=0.0 : 62.48 µs -> 49.50 µs (−20.8%) - size=4096 str_len=32 nulls=0.2 : 55.74 µs -> 46.66 µs (−16.3%) - size=4096 str_len=128 nulls=0.0 : 42.26 µs -> 29.06 µs (−31.2%) - size=4096 str_len=128 nulls=0.2 : 39.17 µs -> 28.52 µs (−27.2%) Group 2: Multi-byte StringArray — general writer path - size=1024 str_len=32 nulls=0.0 : 23.58 µs -> 21.75 µs (−7.8%) - size=1024 str_len=32 nulls=0.2 : 18.92 µs -> 17.41 µs (−8.0%) - size=1024 str_len=128 nulls=0.0 : 37.56 µs -> 35.33 µs (−5.9%) - size=1024 str_len=128 nulls=0.2 : 29.62 µs -> 28.71 µs (−3.1%) - size=4096 str_len=32 nulls=0.0 : 97.15 µs -> 88.92 µs (−8.5%) - size=4096 str_len=32 nulls=0.2 : 77.03 µs -> 71.43 µs (−7.3%) - size=4096 str_len=128 nulls=0.0 : 173.66 µs -> 163.68 µs (−5.7%) - size=4096 str_len=128 nulls=0.2 : 134.98 µs -> 128.56 µs (−4.8%) Group 3: Multi-byte StringViewArray — general writer path - size=1024 str_len=32 nulls=0.0 : 24.46 µs -> 22.18 µs (−9.3%) - size=1024 str_len=32 nulls=0.2 : 20.04 µs -> 17.71 µs (−11.7%) - size=1024 str_len=128 nulls=0.0 : 36.43 µs -> 35.79 µs (−1.8%) - size=1024 str_len=128 nulls=0.2 : 29.73 µs -> 28.70 µs (−3.5%) - size=4096 str_len=32 nulls=0.0 : 99.07 µs -> 89.68 µs (−9.5%) - size=4096 str_len=32 nulls=0.2 : 84.38 µs -> 72.46 µs (−14.1%) - size=4096 str_len=128 nulls=0.0 : 169.27 µs -> 164.80 µs (−2.6%, n.s.) - size=4096 str_len=128 nulls=0.2 : 133.79 µs -> 130.20 µs (−2.7%, n.s.) Group 4: Empty-from StringArray - size=1024 str_len=32 : 87.75 µs -> 50.64 µs (−42.3%) - size=1024 str_len=128 : 313.00 µs -> 187.77 µs (−40.0%) Group 5: Empty-from StringViewArray - size=1024 str_len=32 : 87.01 µs -> 50.10 µs (−42.4%) - size=1024 str_len=128 : 313.99 µs -> 190.17 µs (−39.4%) ## What changes are included in this PR? * Add `append_byte_map` and `append_with` to both of the bulk-NULL string builders * Add unit tests * Adopt the new APIs in `replace` ## Are these changes tested? Yes; new tests added. ## Are there any user-facing changes? No.
1 parent 1af9bd7 commit cf57f3e

3 files changed

Lines changed: 791 additions & 242 deletions

File tree

datafusion/functions/benches/replace.rs

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,6 +162,36 @@ fn criterion_benchmark(c: &mut Criterion) {
162162
}
163163
}
164164

165+
// Empty-`from` path: insert `to` between every char of the input and at
166+
// both ends.
167+
if size == 1024 {
168+
for &str_len in &[32_usize, 128] {
169+
let args = create_args::<i32>(size, str_len, false, 0, 3, 0.0);
170+
group.bench_function(
171+
format!("replace_string_empty_from [size={size}, str_len={str_len}]"),
172+
|b| {
173+
b.iter(|| {
174+
let args_cloned = args.clone();
175+
black_box(invoke_replace_with_args(args_cloned, size))
176+
})
177+
},
178+
);
179+
180+
let args = create_args::<i32>(size, str_len, true, 0, 3, 0.0);
181+
group.bench_function(
182+
format!(
183+
"replace_string_view_empty_from [size={size}, str_len={str_len}]"
184+
),
185+
|b| {
186+
b.iter(|| {
187+
let args_cloned = args.clone();
188+
black_box(invoke_replace_with_args(args_cloned, size))
189+
})
190+
},
191+
);
192+
}
193+
}
194+
165195
group.finish();
166196
}
167197
}

datafusion/functions/src/string/replace.rs

Lines changed: 42 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
2121
use arrow::buffer::NullBuffer;
2222
use arrow::datatypes::DataType;
2323

24-
use crate::strings::GenericStringArrayBuilder;
24+
use crate::strings::{
25+
BulkNullStringArrayBuilder, GenericStringArrayBuilder, StringWriter,
26+
};
2527
use crate::utils::{make_scalar_function, utf8_to_str_type};
2628
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2729
use datafusion_common::types::logical_string;
@@ -164,7 +166,6 @@ fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
164166

165167
let len = string_array.len();
166168
let mut builder = GenericStringArrayBuilder::<i32>::with_capacity(len, 0);
167-
let mut buffer = String::new();
168169
let nulls = NullBuffer::union_many([
169170
string_array.nulls(),
170171
from_array.nulls(),
@@ -184,19 +185,15 @@ fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
184185
let string = unsafe { string_array.value_unchecked(i) };
185186
let from = unsafe { from_array.value_unchecked(i) };
186187
let to = unsafe { to_array.value_unchecked(i) };
187-
buffer.clear();
188-
replace_into_string(&mut buffer, string, from, to);
189-
builder.append_value(&buffer);
188+
apply_replace(&mut builder, string, from, to);
190189
}
191190
} else {
192191
for i in 0..len {
193192
// SAFETY: i < len, and no input has a null buffer.
194193
let string = unsafe { string_array.value_unchecked(i) };
195194
let from = unsafe { from_array.value_unchecked(i) };
196195
let to = unsafe { to_array.value_unchecked(i) };
197-
buffer.clear();
198-
replace_into_string(&mut buffer, string, from, to);
199-
builder.append_value(&buffer);
196+
apply_replace(&mut builder, string, from, to);
200197
}
201198
}
202199

@@ -212,7 +209,6 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
212209

213210
let len = string_array.len();
214211
let mut builder = GenericStringArrayBuilder::<T>::with_capacity(len, 0);
215-
let mut buffer = String::new();
216212
let nulls = NullBuffer::union_many([
217213
string_array.nulls(),
218214
from_array.nulls(),
@@ -232,71 +228,75 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
232228
let string = unsafe { string_array.value_unchecked(i) };
233229
let from = unsafe { from_array.value_unchecked(i) };
234230
let to = unsafe { to_array.value_unchecked(i) };
235-
buffer.clear();
236-
replace_into_string(&mut buffer, string, from, to);
237-
builder.append_value(&buffer);
231+
apply_replace(&mut builder, string, from, to);
238232
}
239233
} else {
240234
for i in 0..len {
241235
// SAFETY: i < len, and no input has a null buffer.
242236
let string = unsafe { string_array.value_unchecked(i) };
243237
let from = unsafe { from_array.value_unchecked(i) };
244238
let to = unsafe { to_array.value_unchecked(i) };
245-
buffer.clear();
246-
replace_into_string(&mut buffer, string, from, to);
247-
builder.append_value(&buffer);
239+
apply_replace(&mut builder, string, from, to);
248240
}
249241
}
250242

251243
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
252244
}
253245

254-
/// Helper function to perform string replacement into a reusable String buffer
255246
#[inline]
256-
fn replace_into_string(buffer: &mut String, string: &str, from: &str, to: &str) {
257-
if from.is_empty() {
258-
// When from is empty, insert 'to' at the beginning, between each character, and at the end
259-
// This matches the behavior of str::replace()
260-
buffer.push_str(to);
261-
for ch in string.chars() {
262-
buffer.push(ch);
263-
buffer.push_str(to);
264-
}
265-
return;
266-
}
267-
268-
// Fast path for replacing a single ASCII character with another single ASCII character.
269-
// Extends the buffer's underlying Vec<u8> directly, for performance.
270-
if let ([from_byte], [to_byte]) = (from.as_bytes(), to.as_bytes())
247+
fn apply_replace<B: BulkNullStringArrayBuilder>(
248+
builder: &mut B,
249+
string: &str,
250+
from: &str,
251+
to: &str,
252+
) {
253+
// Hot path: single ASCII byte → single ASCII byte. An ASCII byte (< 0x80)
254+
// cannot appear inside a multi-byte UTF-8 sequence, so any multi-byte
255+
// sequences in `string` pass through unchanged and output stays valid
256+
// UTF-8.
257+
if let (&[from_byte], &[to_byte]) = (from.as_bytes(), to.as_bytes())
271258
&& from_byte.is_ascii()
272259
&& to_byte.is_ascii()
273260
{
274-
// SAFETY: Replacing an ASCII byte with another ASCII byte preserves UTF-8 validity.
261+
// SAFETY: see the contract above.
275262
unsafe {
276-
buffer.as_mut_vec().extend(
277-
string
278-
.as_bytes()
279-
.iter()
280-
.map(|&b| if b == *from_byte { *to_byte } else { b }),
281-
);
263+
builder.append_byte_map(string.as_bytes(), |b| {
264+
if b == from_byte { to_byte } else { b }
265+
});
282266
}
283267
return;
284268
}
285269

270+
if from.is_empty() {
271+
// Empty `from`: insert `to` before each character and at both ends.
272+
builder.append_with(|w| {
273+
w.write_str(to);
274+
for ch in string.chars() {
275+
w.write_char(ch);
276+
w.write_str(to);
277+
}
278+
});
279+
return;
280+
}
281+
282+
builder.append_with(|w| replace_into_writer(w, string, from, to));
283+
}
284+
285+
#[inline]
286+
fn replace_into_writer<W: StringWriter>(w: &mut W, string: &str, from: &str, to: &str) {
286287
let mut last_end = 0;
287288
for (start, _part) in string.match_indices(from) {
288-
buffer.push_str(&string[last_end..start]);
289-
buffer.push_str(to);
289+
w.write_str(&string[last_end..start]);
290+
w.write_str(to);
290291
last_end = start + from.len();
291292
}
292-
buffer.push_str(&string[last_end..]);
293+
w.write_str(&string[last_end..]);
293294
}
294295

295296
#[cfg(test)]
296297
mod tests {
297298
use super::*;
298299
use crate::utils::test::test_function;
299-
use arrow::array::Array;
300300
use arrow::array::LargeStringArray;
301301
use arrow::array::StringArray;
302302
use arrow::datatypes::DataType::{LargeUtf8, Utf8};

0 commit comments

Comments
 (0)