@@ -52,30 +52,36 @@ macro_rules! impl_append_to_builder {
5252 debug_assert!( !null_words. is_null( ) , "null_bitset_ptr is null" ) ;
5353 debug_assert!( !ptr. is_null( ) , "element_offset pointer is null" ) ;
5454 for idx in 0 ..num_elements {
55- let word_idx = idx >> 6 ;
56- let bit_idx = idx & 0x3f ;
57- // SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
58- let is_null = unsafe { ( * null_words. add( word_idx) & ( 1i64 << bit_idx) ) != 0 } ;
55+ // SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
56+ let is_null = unsafe { Self :: is_null_in_bitset( null_words, idx) } ;
5957
6058 if is_null {
6159 builder. append_null( ) ;
6260 } else {
6361 // SAFETY: ptr is within element data bounds
64- builder. append_value( unsafe { * ptr } ) ;
62+ builder. append_value( unsafe { ptr. read_unaligned ( ) } ) ;
6563 }
6664 // SAFETY: ptr stays within bounds, iterating num_elements times
6765 ptr = unsafe { ptr. add( 1 ) } ;
6866 }
6967 } else {
7068 // SAFETY: element_offset points to contiguous data of length num_elements
7169 debug_assert!( self . element_offset != 0 , "element_offset is null" ) ;
72- let slice = unsafe {
73- std:: slice:: from_raw_parts(
74- self . element_offset as * const $element_type,
75- num_elements,
76- )
77- } ;
78- builder. append_slice( slice) ;
70+ let ptr = self . element_offset as * const $element_type;
71+ // Use bulk copy when data is properly aligned, fall back to
72+ // per-element unaligned reads otherwise
73+ if ( ptr as usize ) . is_multiple_of( std:: mem:: align_of:: <$element_type>( ) ) {
74+ let slice = unsafe {
75+ std:: slice:: from_raw_parts( ptr, num_elements)
76+ } ;
77+ builder. append_slice( slice) ;
78+ } else {
79+ let mut ptr = ptr;
80+ for _ in 0 ..num_elements {
81+ builder. append_value( unsafe { ptr. read_unaligned( ) } ) ;
82+ ptr = unsafe { ptr. add( 1 ) } ;
83+ }
84+ }
7985 }
8086 }
8187 } ;
@@ -158,6 +164,17 @@ impl SparkUnsafeArray {
158164 ( self . row_addr + 8 ) as * const i64
159165 }
160166
167+ /// Checks whether the null bit at `idx` is set in the given null bitset pointer.
168+ ///
169+ /// # Safety
170+ /// `null_words` must point to at least `ceil((idx+1)/64)` i64 words.
171+ #[ inline]
172+ unsafe fn is_null_in_bitset ( null_words : * const i64 , idx : usize ) -> bool {
173+ let word_idx = idx >> 6 ;
174+ let bit_idx = idx & 0x3f ;
175+ ( null_words. add ( word_idx) . read_unaligned ( ) & ( 1i64 << bit_idx) ) != 0
176+ }
177+
161178 impl_append_to_builder ! ( append_ints_to_builder, Int32Builder , i32 ) ;
162179 impl_append_to_builder ! ( append_longs_to_builder, Int64Builder , i64 ) ;
163180 impl_append_to_builder ! ( append_shorts_to_builder, Int16Builder , i16 ) ;
@@ -189,10 +206,8 @@ impl SparkUnsafeArray {
189206 "append_booleans: null_bitset_ptr is null"
190207 ) ;
191208 for idx in 0 ..num_elements {
192- let word_idx = idx >> 6 ;
193- let bit_idx = idx & 0x3f ;
194- // SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
195- let is_null = unsafe { ( * null_words. add ( word_idx) & ( 1i64 << bit_idx) ) != 0 } ;
209+ // SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
210+ let is_null = unsafe { Self :: is_null_in_bitset ( null_words, idx) } ;
196211
197212 if is_null {
198213 builder. append_null ( ) ;
@@ -234,16 +249,14 @@ impl SparkUnsafeArray {
234249 "append_timestamps: element_offset pointer is null"
235250 ) ;
236251 for idx in 0 ..num_elements {
237- let word_idx = idx >> 6 ;
238- let bit_idx = idx & 0x3f ;
239- // SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
240- let is_null = unsafe { ( * null_words. add ( word_idx) & ( 1i64 << bit_idx) ) != 0 } ;
252+ // SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
253+ let is_null = unsafe { Self :: is_null_in_bitset ( null_words, idx) } ;
241254
242255 if is_null {
243256 builder. append_null ( ) ;
244257 } else {
245258 // SAFETY: ptr is within element data bounds
246- builder. append_value ( unsafe { * ptr } ) ;
259+ builder. append_value ( unsafe { ptr. read_unaligned ( ) } ) ;
247260 }
248261 // SAFETY: ptr stays within bounds, iterating num_elements times
249262 ptr = unsafe { ptr. add ( 1 ) } ;
@@ -254,10 +267,17 @@ impl SparkUnsafeArray {
254267 self . element_offset != 0 ,
255268 "append_timestamps: element_offset is null"
256269 ) ;
257- let slice = unsafe {
258- std:: slice:: from_raw_parts ( self . element_offset as * const i64 , num_elements)
259- } ;
260- builder. append_slice ( slice) ;
270+ let ptr = self . element_offset as * const i64 ;
271+ if ( ptr as usize ) . is_multiple_of ( std:: mem:: align_of :: < i64 > ( ) ) {
272+ let slice = unsafe { std:: slice:: from_raw_parts ( ptr, num_elements) } ;
273+ builder. append_slice ( slice) ;
274+ } else {
275+ let mut ptr = ptr;
276+ for _ in 0 ..num_elements {
277+ builder. append_value ( unsafe { ptr. read_unaligned ( ) } ) ;
278+ ptr = unsafe { ptr. add ( 1 ) } ;
279+ }
280+ }
261281 }
262282 }
263283
@@ -283,16 +303,14 @@ impl SparkUnsafeArray {
283303 "append_dates: element_offset pointer is null"
284304 ) ;
285305 for idx in 0 ..num_elements {
286- let word_idx = idx >> 6 ;
287- let bit_idx = idx & 0x3f ;
288- // SAFETY: word_idx < ceil(num_elements/64) since idx < num_elements
289- let is_null = unsafe { ( * null_words. add ( word_idx) & ( 1i64 << bit_idx) ) != 0 } ;
306+ // SAFETY: null_words has ceil(num_elements/64) words, idx < num_elements
307+ let is_null = unsafe { Self :: is_null_in_bitset ( null_words, idx) } ;
290308
291309 if is_null {
292310 builder. append_null ( ) ;
293311 } else {
294312 // SAFETY: ptr is within element data bounds
295- builder. append_value ( unsafe { * ptr } ) ;
313+ builder. append_value ( unsafe { ptr. read_unaligned ( ) } ) ;
296314 }
297315 // SAFETY: ptr stays within bounds, iterating num_elements times
298316 ptr = unsafe { ptr. add ( 1 ) } ;
@@ -303,10 +321,17 @@ impl SparkUnsafeArray {
303321 self . element_offset != 0 ,
304322 "append_dates: element_offset is null"
305323 ) ;
306- let slice = unsafe {
307- std:: slice:: from_raw_parts ( self . element_offset as * const i32 , num_elements)
308- } ;
309- builder. append_slice ( slice) ;
324+ let ptr = self . element_offset as * const i32 ;
325+ if ( ptr as usize ) . is_multiple_of ( std:: mem:: align_of :: < i32 > ( ) ) {
326+ let slice = unsafe { std:: slice:: from_raw_parts ( ptr, num_elements) } ;
327+ builder. append_slice ( slice) ;
328+ } else {
329+ let mut ptr = ptr;
330+ for _ in 0 ..num_elements {
331+ builder. append_value ( unsafe { ptr. read_unaligned ( ) } ) ;
332+ ptr = unsafe { ptr. add ( 1 ) } ;
333+ }
334+ }
310335 }
311336 }
312337}
0 commit comments