Skip to content

Commit 4a41173

Browse files
authored
perf: Optimize translate to use new bulk-NULL string builders (#22171)
## Which issue does this PR close? - Closes #22170. ## Rationale for this change This PR refactors and optimizes the `translate` UDF. In particular, we switch to using the new bulk-NULL string builders, avoiding per-row NULL computation, and avoid an intermediate string copy by using `append_with` / `append_byte_map`. Benchmarks (Arm64): Scalar (scalar_from_to) - size=1024, str_len=8: 13.6 µs → 8.5 µs (-37.5%) - size=1024, str_len=32: 26.2 µs → 20.6 µs (-21.4%) - size=1024, str_len=128: 87.9 µs → 68.9 µs (-21.6%) - size=1024, str_len=1024: 572.9 µs → 531.1 µs (-7.3%) - size=4096, str_len=8: 51.6 µs → 31.4 µs (-39.1%) - size=4096, str_len=32: 103.1 µs → 79.8 µs (-22.6%) - size=4096, str_len=128: 341.1 µs → 273.4 µs (-19.8%) - size=4096, str_len=1024: 2.3 ms → 2.1 ms (-8.7%) Array — ASCII (array_from_to) - size=1024, str_len=8: 50.6 µs → 21.2 µs (-58.1%) - size=1024, str_len=32: 106.5 µs → 26.7 µs (-74.9%) - size=1024, str_len=128: 265.4 µs → 59.9 µs (-77.4%) - size=1024, str_len=1024: 1760.8 µs → 797.1 µs (-54.7%) - size=4096, str_len=8: 211.4 µs → 84.3 µs (-60.1%) - size=4096, str_len=32: 435.2 µs → 120.6 µs (-72.3%) - size=4096, str_len=128: 1079.0 µs → 487.6 µs (-54.8%) - size=4096, str_len=1024: 7.2 ms → 3.2 ms (-55.6%) Array — non-ASCII (array_from_to_non_ascii) - size=1024, str_len=8: 71.2 µs → 68.6 µs (-3.7%) - size=1024, str_len=32: 228.8 µs → 236.9 µs (+3.5%) - size=1024, str_len=128: 880.5 µs → 881.4 µs (+0.1%) - size=1024, str_len=1024: 6.7 ms → 6.7 ms (+0.6%) - size=4096, str_len=8: 375.5 µs → 376.6 µs (+0.3%) - size=4096, str_len=32: 1041.2 µs → 1079.6 µs (+3.7%) - size=4096, str_len=128: 3.5 ms → 3.6 ms (+2.9%) - size=4096, str_len=1024: 27.0 ms → 26.8 ms (-0.7%) ## What changes are included in this PR? * Switch from using the Rust StringBuilders to our new bulk-NULL string builders * Switch from per-row NULL checks to computing the NULL bitmaps with `NullBuffer::union_many` * Use `append_with` and `append_byte_map` rather than `append_value`, which avoids an intermediate scratch buffer * Refactor lookup table code to use a single `TranslationTable` enum * Add a benchmark for the "varying `from`/`to`, Unicode strings" case * Add a unit test ## Are these changes tested? Yes; new test added. ## Are there any user-facing changes? No.
1 parent cbebc6f commit 4a41173

2 files changed

Lines changed: 316 additions & 149 deletions

File tree

datafusion/functions/benches/translate.rs

Lines changed: 58 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,18 +15,45 @@
1515
// specific language governing permissions and limitations
1616
// under the License.
1717

18-
use arrow::array::OffsetSizeTrait;
18+
use arrow::array::{GenericStringArray, OffsetSizeTrait};
1919
use arrow::datatypes::{DataType, Field};
2020
use arrow::util::bench_util::create_string_array_with_len;
2121
use criterion::{Criterion, SamplingMode, criterion_group, criterion_main};
2222
use datafusion_common::config::ConfigOptions;
2323
use datafusion_common::{DataFusionError, ScalarValue};
2424
use datafusion_expr::{ColumnarValue, ScalarFunctionArgs};
2525
use datafusion_functions::unicode;
26+
use rand::SeedableRng;
27+
use rand::prelude::IndexedRandom;
28+
use rand::rngs::StdRng;
2629
use std::hint::black_box;
2730
use std::sync::Arc;
2831
use std::time::Duration;
2932

33+
// Mix of 2-byte (Greek) and 3-byte (CJK/Hangul) UTF-8 to exercise
34+
// variable-width char paths in translate.
35+
const NON_ASCII_ALPHABET: &[char] = &[
36+
'α', 'β', 'γ', 'δ', 'ε', 'ζ', 'η', 'θ', 'ι', 'κ', 'λ', 'μ', 'ν', 'ξ', 'ο', 'π', 'ρ',
37+
'σ', 'τ', 'υ', 'φ', 'χ', 'ψ', 'ω', '日', '本', '語', '中', '文', '한', '국', '어',
38+
];
39+
40+
fn create_non_ascii_string_array<O: OffsetSizeTrait>(
41+
size: usize,
42+
char_count: usize,
43+
seed: u64,
44+
) -> GenericStringArray<O> {
45+
let mut rng = StdRng::seed_from_u64(seed);
46+
(0..size)
47+
.map(|_| {
48+
Some(
49+
(0..char_count)
50+
.map(|_| *NON_ASCII_ALPHABET.choose(&mut rng).unwrap())
51+
.collect::<String>(),
52+
)
53+
})
54+
.collect()
55+
}
56+
3057
fn create_args_array_from_to<O: OffsetSizeTrait>(
3158
size: usize,
3259
str_len: usize,
@@ -42,6 +69,25 @@ fn create_args_array_from_to<O: OffsetSizeTrait>(
4269
]
4370
}
4471

72+
fn create_args_array_from_to_non_ascii<O: OffsetSizeTrait>(
73+
size: usize,
74+
str_len: usize,
75+
) -> Vec<ColumnarValue> {
76+
let string_array = Arc::new(create_non_ascii_string_array::<O>(
77+
size,
78+
str_len,
79+
0xA110_AAAA,
80+
));
81+
let from_array = Arc::new(create_non_ascii_string_array::<O>(size, 3, 0xA110_BBBB));
82+
let to_array = Arc::new(create_non_ascii_string_array::<O>(size, 2, 0xA110_CCCC));
83+
84+
vec![
85+
ColumnarValue::Array(string_array),
86+
ColumnarValue::Array(from_array),
87+
ColumnarValue::Array(to_array),
88+
]
89+
}
90+
4591
fn create_args_scalar_from_to<O: OffsetSizeTrait>(
4692
size: usize,
4793
str_len: usize,
@@ -91,6 +137,17 @@ fn criterion_benchmark(c: &mut Criterion) {
91137
})
92138
});
93139

140+
let args = create_args_array_from_to_non_ascii::<i32>(size, str_len);
141+
group.bench_function(
142+
format!("array_from_to_non_ascii [str_len={str_len}]"),
143+
|b| {
144+
b.iter(|| {
145+
let args_cloned = args.clone();
146+
black_box(invoke_translate_with_args(args_cloned, size))
147+
})
148+
},
149+
);
150+
94151
let args = create_args_scalar_from_to::<i32>(size, str_len);
95152
group.bench_function(format!("scalar_from_to [str_len={str_len}]"), |b| {
96153
b.iter(|| {

0 commit comments

Comments
 (0)