Skip to content

Commit 01d34a8

Browse files
authored
Add append_value_n to GenericByteBuilder (apache#9426)
# Which issue does this PR close? - Closes apache#9425. # Rationale for this change I noticed that this method is available on PrimitiveTypeBuilder, but missing on the GenericByteBuilder, which make sense since the gain is less, but after benchmarking, it shows a solid 10%. Mostly because the more efficient allocation of the null-mask. ``` ┌───────────────────┬────────────────┬───────────────────┬─────────┐ │ Benchmark │ append_value_n │ append_value loop │ Speedup │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=5 │ 371 ns │ 408 ns │ 10% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=30 │ 456 ns │ 507 ns │ 10% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100/len=1024 │ 1.81 µs │ 1.95 µs │ 8% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=5 │ 2.39 µs │ 2.87 µs │ 17% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=30 │ 3.41 µs │ 3.89 µs │ 12% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=1000/len=1024 │ 12.3 µs │ 14.4 µs │ 15% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=5 │ 23.8 µs │ 29.3 µs │ 19% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=30 │ 33.7 µs │ 39.0 µs │ 14% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=10000/len=1024 │ 115.9 µs │ 135.0 µs │ 14% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=5 │ 227.5 µs │ 278.6 µs │ 18% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=30 │ 328.1 µs │ 377.9 µs │ 13% │ ├───────────────────┼────────────────┼───────────────────┼─────────┤ │ n=100000/len=1024 │ 1.16 ms │ 1.34 ms │ 14% │ └───────────────────┴────────────────┴───────────────────┴─────────┘ ``` I think this is still worthwhile to be added. Let me know what the community thinks! # What changes are included in this PR? A new public API. # Are these changes tested? Yes! # Are there any user-facing changes? A new public API.
1 parent 73a516e commit 01d34a8

1 file changed

Lines changed: 32 additions & 0 deletions

File tree

arrow-array/src/builder/generic_bytes_builder.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,21 @@ impl<T: ByteArrayType> GenericByteBuilder<T> {
110110
self.offsets_builder.push(self.next_offset());
111111
}
112112

113+
/// Appends a value of type `T` into the builder `n` times.
114+
///
115+
/// See [`Self::append_value`] for more panic information.
116+
#[inline]
117+
pub fn append_value_n(&mut self, value: impl AsRef<T::Native>, n: usize) {
118+
let bytes: &[u8] = value.as_ref().as_ref();
119+
self.value_builder.reserve(bytes.len() * n);
120+
self.offsets_builder.reserve(n);
121+
for _ in 0..n {
122+
self.value_builder.extend_from_slice(bytes);
123+
self.offsets_builder.push(self.next_offset());
124+
}
125+
self.null_buffer_builder.append_n_non_nulls(n);
126+
}
127+
113128
/// Append an `Option` value into the builder.
114129
///
115130
/// - A `None` value will append a null value.
@@ -939,4 +954,21 @@ mod tests {
939954

940955
assert!(matches!(result, Err(ArrowError::OffsetOverflowError(_))));
941956
}
957+
958+
#[test]
959+
fn test_append_value_n() {
960+
let mut builder = GenericStringBuilder::<i32>::new();
961+
builder.append_value("hello");
962+
builder.append_value_n("world", 3);
963+
builder.append_null();
964+
let array = builder.finish();
965+
966+
assert_eq!(5, array.len());
967+
assert_eq!(1, array.null_count());
968+
assert_eq!("hello", array.value(0));
969+
assert_eq!("world", array.value(1));
970+
assert_eq!("world", array.value(2));
971+
assert_eq!("world", array.value(3));
972+
assert!(array.is_null(4));
973+
}
942974
}

0 commit comments

Comments
 (0)