Skip to content

Commit b34cd51

Browse files
committed
Revise replace benchmark
1 parent 4acc63b commit b34cd51

1 file changed

Lines changed: 85 additions & 102 deletions

File tree

datafusion/functions/benches/replace.rs

Lines changed: 85 additions & 102 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::OffsetSizeTrait;
18+
use arrow::array::{GenericStringArray, OffsetSizeTrait, StringViewArray};
1919
use arrow::datatypes::{DataType, Field};
2020
use arrow::util::bench_util::{
2121
create_string_array_with_len, create_string_view_array_with_len,
@@ -29,31 +29,62 @@ use std::hint::black_box;
2929
use std::sync::Arc;
3030
use std::time::Duration;
3131

32+
/// Build a string array, dropping the null buffer when `null_density == 0.0`
33+
/// so the resulting array matches what real DataFusion produces for a column
34+
/// with no nulls (`nulls() == None`, not `Some(all-valid)`).
35+
fn make_string_array<O: OffsetSizeTrait>(
36+
size: usize,
37+
null_density: f32,
38+
str_len: usize,
39+
) -> GenericStringArray<O> {
40+
let arr = create_string_array_with_len::<O>(size, null_density, str_len);
41+
if null_density == 0.0 {
42+
let (offsets, values, _) = arr.into_parts();
43+
GenericStringArray::<O>::new(offsets, values, None)
44+
} else {
45+
arr
46+
}
47+
}
48+
49+
fn make_string_view_array(
50+
size: usize,
51+
null_density: f32,
52+
str_len: usize,
53+
) -> StringViewArray {
54+
let arr = create_string_view_array_with_len(size, null_density, str_len, false);
55+
if null_density == 0.0 {
56+
let (views, buffers, _) = arr.into_parts();
57+
StringViewArray::new(views, buffers, None)
58+
} else {
59+
arr
60+
}
61+
}
62+
3263
fn create_args<O: OffsetSizeTrait>(
3364
size: usize,
3465
str_len: usize,
3566
force_view_types: bool,
3667
from_len: usize,
3768
to_len: usize,
69+
null_density: f32,
3870
) -> Vec<ColumnarValue> {
3971
if force_view_types {
4072
let string_array =
41-
Arc::new(create_string_view_array_with_len(size, 0.1, str_len, false));
42-
let from_array = Arc::new(create_string_view_array_with_len(
43-
size, 0.1, from_len, false,
44-
));
45-
let to_array =
46-
Arc::new(create_string_view_array_with_len(size, 0.1, to_len, false));
73+
Arc::new(make_string_view_array(size, null_density, str_len));
74+
let from_array =
75+
Arc::new(make_string_view_array(size, null_density, from_len));
76+
let to_array = Arc::new(make_string_view_array(size, null_density, to_len));
4777
vec![
4878
ColumnarValue::Array(string_array),
4979
ColumnarValue::Array(from_array),
5080
ColumnarValue::Array(to_array),
5181
]
5282
} else {
5383
let string_array =
54-
Arc::new(create_string_array_with_len::<O>(size, 0.1, str_len));
55-
let from_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, from_len));
56-
let to_array = Arc::new(create_string_array_with_len::<O>(size, 0.1, to_len));
84+
Arc::new(make_string_array::<O>(size, null_density, str_len));
85+
let from_array =
86+
Arc::new(make_string_array::<O>(size, null_density, from_len));
87+
let to_array = Arc::new(make_string_array::<O>(size, null_density, to_len));
5788

5889
vec![
5990
ColumnarValue::Array(string_array),
@@ -90,98 +121,50 @@ fn criterion_benchmark(c: &mut Criterion) {
90121
group.sample_size(10);
91122
group.measurement_time(Duration::from_secs(10));
92123

93-
// ASCII single character replacement (fast path)
94-
let str_len = 32;
95-
let args = create_args::<i32>(size, str_len, false, 1, 1);
96-
group.bench_function(
97-
format!("replace_string_ascii_single [size={size}, str_len={str_len}]"),
98-
|b| {
99-
b.iter(|| {
100-
let args_cloned = args.clone();
101-
black_box(invoke_replace_with_args(args_cloned, size))
102-
})
103-
},
104-
);
105-
106-
// Multi-character strings (general path)
107-
let args = create_args::<i32>(size, str_len, true, 3, 5);
108-
group.bench_function(
109-
format!("replace_string_view [size={size}, str_len={str_len}]"),
110-
|b| {
111-
b.iter(|| {
112-
let args_cloned = args.clone();
113-
black_box(invoke_replace_with_args(args_cloned, size))
114-
})
115-
},
116-
);
117-
118-
let args = create_args::<i32>(size, str_len, false, 3, 5);
119-
group.bench_function(
120-
format!("replace_string [size={size}, str_len={str_len}]"),
121-
|b| {
122-
b.iter(|| {
123-
let args_cloned = args.clone();
124-
black_box(invoke_replace_with_args(args_cloned, size))
125-
})
126-
},
127-
);
128-
129-
let args = create_args::<i64>(size, str_len, false, 3, 5);
130-
group.bench_function(
131-
format!("replace_large_string [size={size}, str_len={str_len}]"),
132-
|b| {
133-
b.iter(|| {
134-
let args_cloned = args.clone();
135-
black_box(invoke_replace_with_args(args_cloned, size))
136-
})
137-
},
138-
);
139-
140-
// Larger strings
141-
let str_len = 128;
142-
let args = create_args::<i32>(size, str_len, false, 1, 1);
143-
group.bench_function(
144-
format!("replace_string_ascii_single [size={size}, str_len={str_len}]"),
145-
|b| {
146-
b.iter(|| {
147-
let args_cloned = args.clone();
148-
black_box(invoke_replace_with_args(args_cloned, size))
149-
})
150-
},
151-
);
152-
153-
let args = create_args::<i32>(size, str_len, true, 3, 5);
154-
group.bench_function(
155-
format!("replace_string_view [size={size}, str_len={str_len}]"),
156-
|b| {
157-
b.iter(|| {
158-
let args_cloned = args.clone();
159-
black_box(invoke_replace_with_args(args_cloned, size))
160-
})
161-
},
162-
);
163-
164-
let args = create_args::<i32>(size, str_len, false, 3, 5);
165-
group.bench_function(
166-
format!("replace_string [size={size}, str_len={str_len}]"),
167-
|b| {
168-
b.iter(|| {
169-
let args_cloned = args.clone();
170-
black_box(invoke_replace_with_args(args_cloned, size))
171-
})
172-
},
173-
);
174-
175-
let args = create_args::<i64>(size, str_len, false, 3, 5);
176-
group.bench_function(
177-
format!("replace_large_string [size={size}, str_len={str_len}]"),
178-
|b| {
179-
b.iter(|| {
180-
let args_cloned = args.clone();
181-
black_box(invoke_replace_with_args(args_cloned, size))
182-
})
183-
},
184-
);
124+
for &nulls in &[0.0_f32, 0.2] {
125+
for &str_len in &[32_usize, 128] {
126+
// ASCII single character replacement (fast path)
127+
let args = create_args::<i32>(size, str_len, false, 1, 1, nulls);
128+
group.bench_function(
129+
format!(
130+
"replace_string_ascii_single [size={size}, str_len={str_len}, nulls={nulls}]"
131+
),
132+
|b| {
133+
b.iter(|| {
134+
let args_cloned = args.clone();
135+
black_box(invoke_replace_with_args(args_cloned, size))
136+
})
137+
},
138+
);
139+
140+
// Multi-character strings (general path)
141+
let args = create_args::<i32>(size, str_len, true, 3, 5, nulls);
142+
group.bench_function(
143+
format!(
144+
"replace_string_view [size={size}, str_len={str_len}, nulls={nulls}]"
145+
),
146+
|b| {
147+
b.iter(|| {
148+
let args_cloned = args.clone();
149+
black_box(invoke_replace_with_args(args_cloned, size))
150+
})
151+
},
152+
);
153+
154+
let args = create_args::<i32>(size, str_len, false, 3, 5, nulls);
155+
group.bench_function(
156+
format!(
157+
"replace_string [size={size}, str_len={str_len}, nulls={nulls}]"
158+
),
159+
|b| {
160+
b.iter(|| {
161+
let args_cloned = args.clone();
162+
black_box(invoke_replace_with_args(args_cloned, size))
163+
})
164+
},
165+
);
166+
}
167+
}
185168

186169
group.finish();
187170
}

0 commit comments

Comments
 (0)