File tree Expand file tree Collapse file tree
Expand file tree Collapse file tree Original file line number Diff line number Diff line change @@ -4677,6 +4677,26 @@ mod tests {
46774677 assert_eq_m256i ( r, e) ;
46784678 }
46794679
4680+ #[ target_feature( enable = "avx2" ) ]
4681+ #[ cfg_attr( test, assert_instr( vpmaddwd) ) ]
4682+ unsafe fn test_mm256_madd_epi16_mul_one ( v : __m256i ) -> __m256i {
4683+ // This is a trick used in the adler32 algorithm to get a widening addition. The
4684+ // multiplication by 1 is trivial, but must not be optimized out because then the vpmaddwd
4685+ // instruction is no longer selected. The assert_instr verifies that this is the case.
4686+ let one_v = _mm256_set1_epi16 ( 1 ) ;
4687+ _mm256_madd_epi16 ( v, one_v)
4688+ }
4689+
4690+ #[ target_feature( enable = "avx2" ) ]
4691+ #[ cfg_attr( test, assert_instr( vpmaddwd) ) ]
4692+ unsafe fn test_mm256_madd_epi16_shl ( v : __m256i ) -> __m256i {
4693+ // This is a trick used in the base64 algorithm to get a widening addition. Instead of a
4694+ // multiplication, a vector shl is used. In LLVM 22 that breaks the pattern recognition
4695+ // for the automatic optimization to vpmaddwd.
4696+ let shift_value = _mm256_set1_epi32 ( 12i32 ) ;
4697+ _mm256_madd_epi16 ( v, shift_value)
4698+ }
4699+
46804700 #[ simd_test( enable = "avx2" ) ]
46814701 const fn test_mm256_inserti128_si256 ( ) {
46824702 let a = _mm256_setr_epi64x ( 1 , 2 , 3 , 4 ) ;
You can’t perform that action at this time.
0 commit comments