Skip to content

Commit 560d922

Browse files
committed
add vpmaddwd tests back in
1 parent 1a103b1 commit 560d922

1 file changed

Lines changed: 20 additions & 0 deletions

File tree

crates/core_arch/src/x86/avx2.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4677,6 +4677,26 @@ mod tests {
46774677
assert_eq_m256i(r, e);
46784678
}
46794679

4680+
#[target_feature(enable = "avx2")]
4681+
#[cfg_attr(test, assert_instr(vpmaddwd))]
4682+
unsafe fn test_mm256_madd_epi16_mul_one(v: __m256i) -> __m256i {
4683+
// This is a trick used in the adler32 algorithm to get a widening addition. The
4684+
// multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
4685+
// instruction is no longer selected. The assert_instr verifies that this is the case.
4686+
let one_v = _mm256_set1_epi16(1);
4687+
_mm256_madd_epi16(v, one_v)
4688+
}
4689+
4690+
#[target_feature(enable = "avx2")]
4691+
#[cfg_attr(test, assert_instr(vpmaddwd))]
4692+
unsafe fn test_mm256_madd_epi16_shl(v: __m256i) -> __m256i {
4693+
// This is a trick used in the base64 algorithm to get a widening addition. Instead of a
4694+
// multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
4695+
// for the automatic optimization to vpmaddwd.
4696+
let shift_value = _mm256_set1_epi32(12i32);
4697+
_mm256_madd_epi16(v, shift_value)
4698+
}
4699+
46804700
#[simd_test(enable = "avx2")]
46814701
const fn test_mm256_inserti128_si256() {
46824702
let a = _mm256_setr_epi64x(1, 2, 3, 4);

0 commit comments

Comments
 (0)