Skip to content

Commit d286ff7

Browse files
committed
perf: Use bulk-NULL builder in replace
1 parent 89e14f1 commit d286ff7

1 file changed

Lines changed: 39 additions & 29 deletions

File tree

datafusion/functions/src/string/replace.rs

Lines changed: 39 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,11 @@
1717

1818
use std::sync::Arc;
1919

20-
use arrow::array::{ArrayRef, GenericStringBuilder, OffsetSizeTrait};
20+
use arrow::array::{Array, ArrayRef, OffsetSizeTrait};
21+
use arrow::buffer::NullBuffer;
2122
use arrow::datatypes::DataType;
2223

24+
use crate::strings::GenericStringArrayBuilder;
2325
use crate::utils::{make_scalar_function, utf8_to_str_type};
2426
use datafusion_common::cast::{as_generic_string_array, as_string_view_array};
2527
use datafusion_common::types::logical_string;
@@ -160,25 +162,29 @@ fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
160162
let from_array = as_string_view_array(&args[1])?;
161163
let to_array = as_string_view_array(&args[2])?;
162164

163-
let mut builder = GenericStringBuilder::<i32>::new();
165+
let len = string_array.len();
166+
let mut builder = GenericStringArrayBuilder::<i32>::with_capacity(len, 0);
164167
let mut buffer = String::new();
168+
let nulls = NullBuffer::union(
169+
NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(),
170+
to_array.nulls(),
171+
);
165172

166-
for ((string, from), to) in string_array
167-
.iter()
168-
.zip(from_array.iter())
169-
.zip(to_array.iter())
170-
{
171-
match (string, from, to) {
172-
(Some(string), Some(from), Some(to)) => {
173-
buffer.clear();
174-
replace_into_string(&mut buffer, string, from, to);
175-
builder.append_value(&buffer);
176-
}
177-
_ => builder.append_null(),
173+
for i in 0..len {
174+
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
175+
builder.append_placeholder();
176+
continue;
178177
}
178+
// SAFETY: union of input nulls is non-null at i, so each input is too.
179+
let string = unsafe { string_array.value_unchecked(i) };
180+
let from = unsafe { from_array.value_unchecked(i) };
181+
let to = unsafe { to_array.value_unchecked(i) };
182+
buffer.clear();
183+
replace_into_string(&mut buffer, string, from, to);
184+
builder.append_value(&buffer);
179185
}
180186

181-
Ok(Arc::new(builder.finish()) as ArrayRef)
187+
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
182188
}
183189

184190
/// Replaces all occurrences in string of substring from with substring to.
@@ -188,25 +194,29 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
188194
let from_array = as_generic_string_array::<T>(&args[1])?;
189195
let to_array = as_generic_string_array::<T>(&args[2])?;
190196

191-
let mut builder = GenericStringBuilder::<T>::new();
197+
let len = string_array.len();
198+
let mut builder = GenericStringArrayBuilder::<T>::with_capacity(len, 0);
192199
let mut buffer = String::new();
200+
let nulls = NullBuffer::union(
201+
NullBuffer::union(string_array.nulls(), from_array.nulls()).as_ref(),
202+
to_array.nulls(),
203+
);
193204

194-
for ((string, from), to) in string_array
195-
.iter()
196-
.zip(from_array.iter())
197-
.zip(to_array.iter())
198-
{
199-
match (string, from, to) {
200-
(Some(string), Some(from), Some(to)) => {
201-
buffer.clear();
202-
replace_into_string(&mut buffer, string, from, to);
203-
builder.append_value(&buffer);
204-
}
205-
_ => builder.append_null(),
205+
for i in 0..len {
206+
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
207+
builder.append_placeholder();
208+
continue;
206209
}
210+
// SAFETY: union of input nulls is non-null at i, so each input is too.
211+
let string = unsafe { string_array.value_unchecked(i) };
212+
let from = unsafe { from_array.value_unchecked(i) };
213+
let to = unsafe { to_array.value_unchecked(i) };
214+
buffer.clear();
215+
replace_into_string(&mut buffer, string, from, to);
216+
builder.append_value(&buffer);
207217
}
208218

209-
Ok(Arc::new(builder.finish()) as ArrayRef)
219+
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
210220
}
211221

212222
/// Helper function to perform string replacement into a reusable String buffer

0 commit comments

Comments
 (0)