Skip to content

Commit 2513831

Browse files
authored
Merge pull request #2143 from awxkee/fix_aligned_reads
Removes alignment requirement in st3,ld3
2 parents b769e5b + fc7a454 commit 2513831

1 file changed

Lines changed: 18 additions & 17 deletions

File tree

crates/core_arch/src/macros.rs

Lines changed: 18 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -266,23 +266,22 @@ macro_rules! deinterleaving_load {
266266
transmute((v0, v1))
267267
}};
268268

269+
// N = 3
269270
($elem:ty, $lanes:literal, 3, $ptr:expr) => {{
270271
use $crate::core_arch::macros::deinterleave_mask;
271272
use $crate::core_arch::simd::Simd;
272-
use $crate::mem::{MaybeUninit, transmute};
273+
use $crate::mem::transmute;
273274

274275
type V = Simd<$elem, $lanes>;
275-
type W = Simd<$elem, { $lanes * 3 }>;
276+
type Arr = [$elem; { $lanes * 3 }];
276277

278+
// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
279+
// passing a pointer that is not sufficiently aligned is an UB.
280+
let arr: Arr = $crate::ptr::read_unaligned($ptr as *const [$elem; { $lanes * 3 }]);
277281
// NOTE: repr(simd) adds padding to make the total size a power of two.
278-
// Hence reading W from ptr might read out of bounds.
279-
let mut mem = MaybeUninit::<W>::uninit();
280-
$crate::ptr::copy_nonoverlapping(
281-
$ptr.cast::<$elem>(),
282-
mem.as_mut_ptr().cast::<$elem>(),
283-
$lanes * 3,
284-
);
285-
let w = mem.assume_init();
282+
// Hence reading a W from ptr might read out of bounds.
283+
type W = Simd<$elem, { $lanes * 3 }>;
284+
let w: W = W::from_array(arr);
286285

287286
let v0: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 0>());
288287
let v1: V = simd_shuffle!(w, w, deinterleave_mask::<$lanes, 3, 1>());
@@ -291,6 +290,7 @@ macro_rules! deinterleaving_load {
291290
transmute((v0, v1, v2))
292291
}};
293292

293+
// N = 4
294294
($elem:ty, $lanes:literal, 4, $ptr:expr) => {{
295295
use $crate::core_arch::macros::deinterleave_mask;
296296
use $crate::core_arch::simd::Simd;
@@ -345,16 +345,17 @@ macro_rules! interleaving_store {
345345
let v2v2: Simd<$elem, { $lanes * 2 }> =
346346
simd_shuffle!($v.2, $v.2, identity::<{ $lanes * 2 }>());
347347

348-
type W = Simd<$elem, { $lanes * 3 }>;
349-
350348
// NOTE: repr(simd) adds padding to make the total size a power of two.
351349
// Hence writing W to ptr might write out of bounds.
350+
type W = Simd<$elem, { $lanes * 3 }>;
351+
352352
let w: W = simd_shuffle!(v0v1, v2v2, interleave_mask::<{ $lanes * 3 }, $lanes, 3>());
353-
$crate::ptr::copy_nonoverlapping(
354-
(&w as *const W).cast::<$elem>(),
355-
$ptr.cast::<$elem>(),
356-
$lanes * 3,
357-
);
353+
354+
let arr: [$elem; { $lanes * 3 }] = $crate::mem::transmute_copy(&w);
355+
356+
// NOTE: copy_nonoverlapping requires both pointers to be aligned to at least align_of::<$elem>(),
357+
// passing a pointer that is not sufficiently aligned is an UB.
358+
$ptr.cast::<[$elem; { $lanes * 3 }]>().write_unaligned(arr);
358359
}};
359360

360361
// N = 4

0 commit comments

Comments
 (0)