1515// specific language governing permissions and limitations
1616// under the License.
1717
18- use arrow:: array:: OffsetSizeTrait ;
18+ use arrow:: array:: { GenericStringArray , OffsetSizeTrait , StringViewArray } ;
1919use arrow:: datatypes:: { DataType , Field } ;
2020use arrow:: util:: bench_util:: {
2121 create_string_array_with_len, create_string_view_array_with_len,
@@ -29,31 +29,62 @@ use std::hint::black_box;
2929use std:: sync:: Arc ;
3030use std:: time:: Duration ;
3131
32+ /// Build a string array, dropping the null buffer when `null_density == 0.0`
33+ /// so the resulting array matches what real DataFusion produces for a column
34+ /// with no nulls (`nulls() == None`, not `Some(all-valid)`).
35+ fn make_string_array < O : OffsetSizeTrait > (
36+ size : usize ,
37+ null_density : f32 ,
38+ str_len : usize ,
39+ ) -> GenericStringArray < O > {
40+ let arr = create_string_array_with_len :: < O > ( size, null_density, str_len) ;
41+ if null_density == 0.0 {
42+ let ( offsets, values, _) = arr. into_parts ( ) ;
43+ GenericStringArray :: < O > :: new ( offsets, values, None )
44+ } else {
45+ arr
46+ }
47+ }
48+
49+ fn make_string_view_array (
50+ size : usize ,
51+ null_density : f32 ,
52+ str_len : usize ,
53+ ) -> StringViewArray {
54+ let arr = create_string_view_array_with_len ( size, null_density, str_len, false ) ;
55+ if null_density == 0.0 {
56+ let ( views, buffers, _) = arr. into_parts ( ) ;
57+ StringViewArray :: new ( views, buffers, None )
58+ } else {
59+ arr
60+ }
61+ }
62+
3263fn create_args < O : OffsetSizeTrait > (
3364 size : usize ,
3465 str_len : usize ,
3566 force_view_types : bool ,
3667 from_len : usize ,
3768 to_len : usize ,
69+ null_density : f32 ,
3870) -> Vec < ColumnarValue > {
3971 if force_view_types {
4072 let string_array =
41- Arc :: new ( create_string_view_array_with_len ( size, 0.1 , str_len, false ) ) ;
42- let from_array = Arc :: new ( create_string_view_array_with_len (
43- size, 0.1 , from_len, false ,
44- ) ) ;
45- let to_array =
46- Arc :: new ( create_string_view_array_with_len ( size, 0.1 , to_len, false ) ) ;
73+ Arc :: new ( make_string_view_array ( size, null_density, str_len) ) ;
74+ let from_array =
75+ Arc :: new ( make_string_view_array ( size, null_density, from_len) ) ;
76+ let to_array = Arc :: new ( make_string_view_array ( size, null_density, to_len) ) ;
4777 vec ! [
4878 ColumnarValue :: Array ( string_array) ,
4979 ColumnarValue :: Array ( from_array) ,
5080 ColumnarValue :: Array ( to_array) ,
5181 ]
5282 } else {
5383 let string_array =
54- Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , str_len) ) ;
55- let from_array = Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , from_len) ) ;
56- let to_array = Arc :: new ( create_string_array_with_len :: < O > ( size, 0.1 , to_len) ) ;
84+ Arc :: new ( make_string_array :: < O > ( size, null_density, str_len) ) ;
85+ let from_array =
86+ Arc :: new ( make_string_array :: < O > ( size, null_density, from_len) ) ;
87+ let to_array = Arc :: new ( make_string_array :: < O > ( size, null_density, to_len) ) ;
5788
5889 vec ! [
5990 ColumnarValue :: Array ( string_array) ,
@@ -90,98 +121,50 @@ fn criterion_benchmark(c: &mut Criterion) {
90121 group. sample_size ( 10 ) ;
91122 group. measurement_time ( Duration :: from_secs ( 10 ) ) ;
92123
93- // ASCII single character replacement (fast path)
94- let str_len = 32 ;
95- let args = create_args :: < i32 > ( size, str_len, false , 1 , 1 ) ;
96- group. bench_function (
97- format ! ( "replace_string_ascii_single [size={size}, str_len={str_len}]" ) ,
98- |b| {
99- b. iter ( || {
100- let args_cloned = args. clone ( ) ;
101- black_box ( invoke_replace_with_args ( args_cloned, size) )
102- } )
103- } ,
104- ) ;
105-
106- // Multi-character strings (general path)
107- let args = create_args :: < i32 > ( size, str_len, true , 3 , 5 ) ;
108- group. bench_function (
109- format ! ( "replace_string_view [size={size}, str_len={str_len}]" ) ,
110- |b| {
111- b. iter ( || {
112- let args_cloned = args. clone ( ) ;
113- black_box ( invoke_replace_with_args ( args_cloned, size) )
114- } )
115- } ,
116- ) ;
117-
118- let args = create_args :: < i32 > ( size, str_len, false , 3 , 5 ) ;
119- group. bench_function (
120- format ! ( "replace_string [size={size}, str_len={str_len}]" ) ,
121- |b| {
122- b. iter ( || {
123- let args_cloned = args. clone ( ) ;
124- black_box ( invoke_replace_with_args ( args_cloned, size) )
125- } )
126- } ,
127- ) ;
128-
129- let args = create_args :: < i64 > ( size, str_len, false , 3 , 5 ) ;
130- group. bench_function (
131- format ! ( "replace_large_string [size={size}, str_len={str_len}]" ) ,
132- |b| {
133- b. iter ( || {
134- let args_cloned = args. clone ( ) ;
135- black_box ( invoke_replace_with_args ( args_cloned, size) )
136- } )
137- } ,
138- ) ;
139-
140- // Larger strings
141- let str_len = 128 ;
142- let args = create_args :: < i32 > ( size, str_len, false , 1 , 1 ) ;
143- group. bench_function (
144- format ! ( "replace_string_ascii_single [size={size}, str_len={str_len}]" ) ,
145- |b| {
146- b. iter ( || {
147- let args_cloned = args. clone ( ) ;
148- black_box ( invoke_replace_with_args ( args_cloned, size) )
149- } )
150- } ,
151- ) ;
152-
153- let args = create_args :: < i32 > ( size, str_len, true , 3 , 5 ) ;
154- group. bench_function (
155- format ! ( "replace_string_view [size={size}, str_len={str_len}]" ) ,
156- |b| {
157- b. iter ( || {
158- let args_cloned = args. clone ( ) ;
159- black_box ( invoke_replace_with_args ( args_cloned, size) )
160- } )
161- } ,
162- ) ;
163-
164- let args = create_args :: < i32 > ( size, str_len, false , 3 , 5 ) ;
165- group. bench_function (
166- format ! ( "replace_string [size={size}, str_len={str_len}]" ) ,
167- |b| {
168- b. iter ( || {
169- let args_cloned = args. clone ( ) ;
170- black_box ( invoke_replace_with_args ( args_cloned, size) )
171- } )
172- } ,
173- ) ;
174-
175- let args = create_args :: < i64 > ( size, str_len, false , 3 , 5 ) ;
176- group. bench_function (
177- format ! ( "replace_large_string [size={size}, str_len={str_len}]" ) ,
178- |b| {
179- b. iter ( || {
180- let args_cloned = args. clone ( ) ;
181- black_box ( invoke_replace_with_args ( args_cloned, size) )
182- } )
183- } ,
184- ) ;
124+ for & nulls in & [ 0.0_f32 , 0.2 ] {
125+ for & str_len in & [ 32_usize , 128 ] {
126+ // ASCII single character replacement (fast path)
127+ let args = create_args :: < i32 > ( size, str_len, false , 1 , 1 , nulls) ;
128+ group. bench_function (
129+ format ! (
130+ "replace_string_ascii_single [size={size}, str_len={str_len}, nulls={nulls}]"
131+ ) ,
132+ |b| {
133+ b. iter ( || {
134+ let args_cloned = args. clone ( ) ;
135+ black_box ( invoke_replace_with_args ( args_cloned, size) )
136+ } )
137+ } ,
138+ ) ;
139+
140+ // Multi-character strings (general path)
141+ let args = create_args :: < i32 > ( size, str_len, true , 3 , 5 , nulls) ;
142+ group. bench_function (
143+ format ! (
144+ "replace_string_view [size={size}, str_len={str_len}, nulls={nulls}]"
145+ ) ,
146+ |b| {
147+ b. iter ( || {
148+ let args_cloned = args. clone ( ) ;
149+ black_box ( invoke_replace_with_args ( args_cloned, size) )
150+ } )
151+ } ,
152+ ) ;
153+
154+ let args = create_args :: < i32 > ( size, str_len, false , 3 , 5 , nulls) ;
155+ group. bench_function (
156+ format ! (
157+ "replace_string [size={size}, str_len={str_len}, nulls={nulls}]"
158+ ) ,
159+ |b| {
160+ b. iter ( || {
161+ let args_cloned = args. clone ( ) ;
162+ black_box ( invoke_replace_with_args ( args_cloned, size) )
163+ } )
164+ } ,
165+ ) ;
166+ }
167+ }
185168
186169 group. finish ( ) ;
187170 }
0 commit comments