@@ -991,7 +991,21 @@ pub const fn _mm256_hadd_epi32(a: __m256i, b: __m256i) -> __m256i {
991991#[ cfg_attr( test, assert_instr( vphaddsw) ) ]
992992#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
993993pub fn _mm256_hadds_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
994- unsafe { transmute ( phaddsw ( a. as_i16x16 ( ) , b. as_i16x16 ( ) ) ) }
994+ let a = a. as_i16x16 ( ) ;
995+ let b = b. as_i16x16 ( ) ;
996+ unsafe {
997+ let even: i16x16 = simd_shuffle ! (
998+ a,
999+ b,
1000+ [ 0 , 2 , 4 , 6 , 16 , 18 , 20 , 22 , 8 , 10 , 12 , 14 , 24 , 26 , 28 , 30 ]
1001+ ) ;
1002+ let odd: i16x16 = simd_shuffle ! (
1003+ a,
1004+ b,
1005+ [ 1 , 3 , 5 , 7 , 17 , 19 , 21 , 23 , 9 , 11 , 13 , 15 , 25 , 27 , 29 , 31 ]
1006+ ) ;
1007+ simd_saturating_add ( even, odd) . as_m256i ( )
1008+ }
9951009}
9961010
9971011/// Horizontally subtract adjacent pairs of 16-bit integers in `a` and `b`.
@@ -1047,7 +1061,21 @@ pub const fn _mm256_hsub_epi32(a: __m256i, b: __m256i) -> __m256i {
10471061#[ cfg_attr( test, assert_instr( vphsubsw) ) ]
10481062#[ stable( feature = "simd_x86" , since = "1.27.0" ) ]
10491063pub fn _mm256_hsubs_epi16 ( a : __m256i , b : __m256i ) -> __m256i {
1050- unsafe { transmute ( phsubsw ( a. as_i16x16 ( ) , b. as_i16x16 ( ) ) ) }
1064+ let a = a. as_i16x16 ( ) ;
1065+ let b = b. as_i16x16 ( ) ;
1066+ unsafe {
1067+ let even: i16x16 = simd_shuffle ! (
1068+ a,
1069+ b,
1070+ [ 0 , 2 , 4 , 6 , 16 , 18 , 20 , 22 , 8 , 10 , 12 , 14 , 24 , 26 , 28 , 30 ]
1071+ ) ;
1072+ let odd: i16x16 = simd_shuffle ! (
1073+ a,
1074+ b,
1075+ [ 1 , 3 , 5 , 7 , 17 , 19 , 21 , 23 , 9 , 11 , 13 , 15 , 25 , 27 , 29 , 31 ]
1076+ ) ;
1077+ simd_saturating_sub ( even, odd) . as_m256i ( )
1078+ }
10511079}
10521080
10531081/// Returns values from `slice` at offsets determined by `offsets * scale`,
@@ -3791,10 +3819,6 @@ pub const fn _mm256_extract_epi16<const INDEX: i32>(a: __m256i) -> i32 {
37913819
37923820#[ allow( improper_ctypes) ]
37933821unsafe extern "C" {
3794- #[ link_name = "llvm.x86.avx2.phadd.sw" ]
3795- fn phaddsw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
3796- #[ link_name = "llvm.x86.avx2.phsub.sw" ]
3797- fn phsubsw ( a : i16x16 , b : i16x16 ) -> i16x16 ;
37983822 #[ link_name = "llvm.x86.avx2.pmadd.wd" ]
37993823 fn pmaddwd ( a : i16x16 , b : i16x16 ) -> i32x8 ;
38003824 #[ link_name = "llvm.x86.avx2.pmadd.ub.sw" ]
0 commit comments