@@ -266,23 +266,25 @@ macro_rules! deinterleaving_load {
266266 transmute( ( v0, v1) )
267267 } } ;
268268
269+ // N = 3
269270 ( $elem: ty, $lanes: literal, 3 , $ptr: expr) => { {
270271 use $crate:: core_arch:: macros:: deinterleave_mask;
271272 use $crate:: core_arch:: simd:: Simd ;
272- use $crate:: mem:: { MaybeUninit , transmute} ;
273+ use $crate:: mem:: transmute;
273274
275+ // NOTE: repr(simd) adds padding to make the total size a power of two.
276+ // Hence writing W to ptr might write out of bounds.
274277 type V = Simd <$elem, $lanes>;
275- type W = Simd < $elem, { $lanes * 3 } > ;
278+ type Arr = [ $elem; { $lanes * 3 } ] ;
276279
277- // NOTE: repr(simd) adds padding to make the total size a power of two.
278- // Hence reading W from ptr might read out of bounds.
279- let mut mem = MaybeUninit :: <W >:: uninit( ) ;
280- $crate:: ptr:: copy_nonoverlapping(
281- $ptr. cast:: <$elem>( ) ,
282- mem. as_mut_ptr( ) . cast:: <$elem>( ) ,
283- $lanes * 3 ,
284- ) ;
285- let w = mem. assume_init( ) ;
280+ // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
281+ // passing a pointer that is not sufficiently aligned is an UB.
282+ let arr: Arr = $crate:: ptr:: read_unaligned( $ptr as * const Arr ) ;
283+
284+ type W = Simd <$elem, { $lanes * 4 } >;
285+ let mut tmp = [ 0 as $elem; { $lanes * 4 } ] ;
286+ tmp[ ..$lanes * 3 ] . copy_from_slice( & arr) ;
287+ let w: W = W :: from_array( tmp) ;
286288
287289 let v0: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 3 , 0 >( ) ) ;
288290 let v1: V = simd_shuffle!( w, w, deinterleave_mask:: <$lanes, 3 , 1 >( ) ) ;
@@ -291,6 +293,7 @@ macro_rules! deinterleaving_load {
291293 transmute( ( v0, v1, v2) )
292294 } } ;
293295
296+ // N = 4
294297 ( $elem: ty, $lanes: literal, 4 , $ptr: expr) => { {
295298 use $crate:: core_arch:: macros:: deinterleave_mask;
296299 use $crate:: core_arch:: simd:: Simd ;
@@ -345,16 +348,22 @@ macro_rules! interleaving_store {
345348 let v2v2: Simd <$elem, { $lanes * 2 } > =
346349 simd_shuffle!( $v. 2 , $v. 2 , identity:: <{ $lanes * 2 } >( ) ) ;
347350
348- type W = Simd <$elem, { $lanes * 3 } >;
349-
350351 // NOTE: repr(simd) adds padding to make the total size a power of two.
351352 // Hence writing W to ptr might write out of bounds.
353+
354+ type W = Simd <$elem, { $lanes * 3 } >;
355+
352356 let w: W = simd_shuffle!( v0v1, v2v2, interleave_mask:: <{ $lanes * 3 } , $lanes, 3 >( ) ) ;
353- $crate:: ptr:: copy_nonoverlapping(
354- ( & w as * const W ) . cast:: <$elem>( ) ,
355- $ptr. cast:: <$elem>( ) ,
356- $lanes * 3 ,
357- ) ;
357+
358+ let arr: [ $elem; { $lanes * 3 } ] = $crate:: mem:: transmute_copy( & w) ;
359+
360+ // NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
361+ // passing a pointer that is not sufficiently aligned is an UB.
362+ let mut i = 0 ;
363+ while i < $lanes * 3 {
364+ $crate:: ptr:: write_unaligned( ( $ptr as * mut $elem) . add( i) , arr[ i] ) ;
365+ i += 1 ;
366+ }
358367 } } ;
359368
360369 // N = 4
0 commit comments