Skip to content

Commit e4c68e7

Browse files
committed
Duplicate loop body, hoist is-null check
1 parent b34cd51 commit e4c68e7

1 file changed

Lines changed: 52 additions & 22 deletions

File tree

datafusion/functions/src/string/replace.rs

Lines changed: 52 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -170,18 +170,33 @@ fn replace_view(args: &[ArrayRef]) -> Result<ArrayRef> {
170170
to_array.nulls(),
171171
);
172172

173-
for i in 0..len {
174-
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
175-
builder.append_placeholder();
176-
continue;
173+
// Hoist the nulls.is_some() check out of the loop. LLVM does not always
174+
// unswitch this loop on its own (the Utf8View body is large enough to
175+
// exceed its cost-benefit threshold).
176+
if let Some(nulls_ref) = nulls.as_ref() {
177+
for i in 0..len {
178+
if nulls_ref.is_null(i) {
179+
builder.append_placeholder();
180+
continue;
181+
}
182+
// SAFETY: union of input nulls is non-null at i, so each input is too.
183+
let string = unsafe { string_array.value_unchecked(i) };
184+
let from = unsafe { from_array.value_unchecked(i) };
185+
let to = unsafe { to_array.value_unchecked(i) };
186+
buffer.clear();
187+
replace_into_string(&mut buffer, string, from, to);
188+
builder.append_value(&buffer);
189+
}
190+
} else {
191+
for i in 0..len {
192+
// SAFETY: i < len, and no input has a null buffer.
193+
let string = unsafe { string_array.value_unchecked(i) };
194+
let from = unsafe { from_array.value_unchecked(i) };
195+
let to = unsafe { to_array.value_unchecked(i) };
196+
buffer.clear();
197+
replace_into_string(&mut buffer, string, from, to);
198+
builder.append_value(&buffer);
177199
}
178-
// SAFETY: union of input nulls is non-null at i, so each input is too.
179-
let string = unsafe { string_array.value_unchecked(i) };
180-
let from = unsafe { from_array.value_unchecked(i) };
181-
let to = unsafe { to_array.value_unchecked(i) };
182-
buffer.clear();
183-
replace_into_string(&mut buffer, string, from, to);
184-
builder.append_value(&buffer);
185200
}
186201

187202
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)
@@ -202,18 +217,33 @@ fn replace<T: OffsetSizeTrait>(args: &[ArrayRef]) -> Result<ArrayRef> {
202217
to_array.nulls(),
203218
);
204219

205-
for i in 0..len {
206-
if nulls.as_ref().is_some_and(|n| n.is_null(i)) {
207-
builder.append_placeholder();
208-
continue;
220+
// Hoist the nulls.is_some() check out of the loop. LLVM unswitches this
221+
// automatically today, but kept explicit so the no-nulls fast path is not
222+
// contingent on the optimizer's cost heuristic.
223+
if let Some(nulls_ref) = nulls.as_ref() {
224+
for i in 0..len {
225+
if nulls_ref.is_null(i) {
226+
builder.append_placeholder();
227+
continue;
228+
}
229+
// SAFETY: union of input nulls is non-null at i, so each input is too.
230+
let string = unsafe { string_array.value_unchecked(i) };
231+
let from = unsafe { from_array.value_unchecked(i) };
232+
let to = unsafe { to_array.value_unchecked(i) };
233+
buffer.clear();
234+
replace_into_string(&mut buffer, string, from, to);
235+
builder.append_value(&buffer);
236+
}
237+
} else {
238+
for i in 0..len {
239+
// SAFETY: i < len, and no input has a null buffer.
240+
let string = unsafe { string_array.value_unchecked(i) };
241+
let from = unsafe { from_array.value_unchecked(i) };
242+
let to = unsafe { to_array.value_unchecked(i) };
243+
buffer.clear();
244+
replace_into_string(&mut buffer, string, from, to);
245+
builder.append_value(&buffer);
209246
}
210-
// SAFETY: union of input nulls is non-null at i, so each input is too.
211-
let string = unsafe { string_array.value_unchecked(i) };
212-
let from = unsafe { from_array.value_unchecked(i) };
213-
let to = unsafe { to_array.value_unchecked(i) };
214-
buffer.clear();
215-
replace_into_string(&mut buffer, string, from, to);
216-
builder.append_value(&buffer);
217247
}
218248

219249
Ok(Arc::new(builder.finish(nulls)?) as ArrayRef)

0 commit comments

Comments
 (0)