Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,7 @@ extern "C" int LLVMFuzzerTestOneInput(const uint8_t* data, size_t size)
}

// Check 2: SIMD x4 path agrees with scalar path lane-by-lane.
std::array<uint32_t, 4> simd_out{};
alignas(16) std::array<uint32_t, 4> simd_out{};
production_simd(scalars, bit_offset, window_bits, simd_out);
for (size_t lane = 0; lane < 4; ++lane) {
const uint32_t want = production_scalar(scalars[lane].data(), bit_offset, window_bits);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -211,11 +211,9 @@ struct ConstantineSliceParamsU32 {
}

// Store a `SimdU32x4` to a 4-lane uint32 destination as a single 128-bit op.
// On WASM the explicit `wasm_v128_store` is used because earlier codegen for
// the equivalent struct-wrapper assignment was observed to round-trip the
// vector through 4 scalar memory slots; the intrinsic guarantees the
// `i32x4.store` opcode. On native the `vector_size` store lowers directly to
// SSE2 `movdqu` / NEON `st1`.
// Precondition: `dst` is 16-byte aligned.
// On WASM the explicit intrinsic guarantees a `v128.store`; on native the typed
// vector store lets the compiler use aligned SIMD stores (e.g. x86 movaps/movdqa).
[[gnu::always_inline]] inline void simd_u32x4_store(uint32_t* dst, SimdU32x4 v) noexcept
{
#ifdef __wasm_simd128__
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ TEST(PippengerConstantine, SimdX4MatchesScalarPathLanewise)
std::array<std::array<uint64_t, NUM_LIMBS_U64>, 4> scalars{
random_scalar_limbs(), random_scalar_limbs(), random_scalar_limbs(), random_scalar_limbs()
};
std::array<uint32_t, 4> got_simd{};
alignas(16) std::array<uint32_t, 4> got_simd{};
production_simd_path(scalars.data(), bit_offset, window_bits, got_simd.data());
for (size_t lane = 0; lane < 4; ++lane) {
const uint32_t want = production_scalar_path(scalars[lane].data(), bit_offset, window_bits);
Expand Down
Loading