Skip to content

Commit 281d640

Browse files
committed
Removes alignment requirement in st3
1 parent 6277e35 commit 281d640

1 file changed

Lines changed: 27 additions & 18 deletions

File tree

crates/core_arch/src/macros.rs

Lines changed: 27 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -266,23 +266,25 @@ macro_rules! deinterleaving_load {
266266
transmute((v0, v1))
267267
}};
268268

269+
// N = 3
269270
($elem:ty, $lanes:literal, 3, $ptr:expr) => {{
270271
use $crate::core_arch::macros::deinterleave_mask;
271272
use $crate::core_arch::simd::Simd;
272-
use $crate::mem::{MaybeUninit, transmute};
273+
use $crate::mem::transmute;
273274

275+
// NOTE: repr(simd) adds padding to make the total size a power of two.
276+
// Hence writing W to ptr might write out of bounds.
274277
type V = Simd<$elem, $lanes>;
275-
type W = Simd<$elem, { $lanes * 3 }>;
278+
type Arr = [$elem; { $lanes * 3 }];
276279

277-
// NOTE: repr(simd) adds padding to make the total size a power of two.
278-
// Hence reading W from ptr might read out of bounds.
279-
let mut mem = MaybeUninit::<W>::uninit();
280-
$crate::ptr::copy_nonoverlapping(
281-
$ptr.cast::<$elem>(),
282-
mem.as_mut_ptr().cast::<$elem>(),
283-
$lanes * 3,
284-
);
285-
let w = mem.assume_init();
280+
// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
281+
// passing a pointer that is not sufficiently aligned is an UB.
282+
let arr: Arr = $crate::ptr::read_unaligned($ptr as *const Arr);
283+
284+
type W = Simd<$elem, { $lanes * 4 }>;
285+
let mut tmp = [0 as $elem; { $lanes * 4 }];
286+
tmp[..$lanes * 3].copy_from_slice(&arr);
287+
let w: W = W::from_array(tmp);
286288

287289
let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>());
288290
let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>());
@@ -291,6 +293,7 @@ macro_rules! deinterleaving_load {
291293
transmute((v0, v1, v2))
292294
}};
293295

296+
// N = 4
294297
($elem:ty, $lanes:literal, 4, $ptr:expr) => {{
295298
use $crate::core_arch::macros::deinterleave_mask;
296299
use $crate::core_arch::simd::Simd;
@@ -345,16 +348,22 @@ macro_rules! interleaving_store {
345348
let v2v2: Simd<$elem, { $lanes * 2 }> =
346349
simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
347350

348-
type W = Simd<$elem, { $lanes * 3 }>;
349-
350351
// NOTE: repr(simd) adds padding to make the total size a power of two.
351352
// Hence writing W to ptr might write out of bounds.
353+
354+
type W = Simd<$elem, { $lanes * 3 }>;
355+
352356
let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
353-
$crate::ptr::copy_nonoverlapping(
354-
(&w as *const W).cast::<$elem>(),
355-
$ptr.cast::<$elem>(),
356-
$lanes * 3,
357-
);
357+
358+
let arr: [$elem; { $lanes * 3 }] = $crate::mem::transmute_copy(&w);
359+
360+
// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
361+
// passing a pointer that is not sufficiently aligned is an UB.
362+
let mut i = 0;
363+
while i < $lanes * 3 {
364+
$crate::ptr::write_unaligned(($ptr as *mut $elem).add(i), arr[i]);
365+
i += 1;
366+
}
358367
}};
359368

360369
// N = 4

0 commit comments

Comments
 (0)