@@ -6321,20 +6321,22 @@ pub const unsafe fn _mm_mask_storeu_epi8(mem_addr: *mut i8, mask: __mmask16, a:
63216321#[target_feature(enable = "avx512bw")]
63226322#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63236323#[cfg_attr(test, assert_instr(vpmaddwd))]
6324- pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6325- // It's a trick used in the Adler-32 algorithm to perform a widening addition.
6326- //
6327- // ```rust
6328- // #[target_feature(enable = "avx512bw")]
6329- // unsafe fn widening_add(mad: __m512i) -> __m512i {
6330- // _mm512_madd_epi16(mad, _mm512_set1_epi16(1))
6331- // }
6332- // ```
6333- //
6334- // If we implement this using generic vector intrinsics, the optimizer
6335- // will eliminate this pattern, and `vpmaddwd` will no longer be emitted.
6336- // For this reason, we use x86 intrinsics.
6337- unsafe { transmute(vpmaddwd(a.as_i16x32(), b.as_i16x32())) }
6324+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6325+ pub const fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
6326+ unsafe {
6327+ let r: i32x32 = simd_mul(simd_cast(a.as_i16x32()), simd_cast(b.as_i16x32()));
6328+ let even: i32x16 = simd_shuffle!(
6329+ r,
6330+ r,
6331+ [0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, 26, 28, 30]
6332+ );
6333+ let odd: i32x16 = simd_shuffle!(
6334+ r,
6335+ r,
6336+ [1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31]
6337+ );
6338+ simd_add(even, odd).as_m512i()
6339+ }
63386340}
63396341
63406342/// Multiply packed signed 16-bit integers in a and b, producing intermediate signed 32-bit integers. Horizontally add adjacent pairs of intermediate 32-bit integers, and pack the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set).
@@ -6344,7 +6346,8 @@ pub fn _mm512_madd_epi16(a: __m512i, b: __m512i) -> __m512i {
63446346#[target_feature(enable = "avx512bw")]
63456347#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63466348#[cfg_attr(test, assert_instr(vpmaddwd))]
6347- pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6349+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6350+ pub const fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
63486351 unsafe {
63496352 let madd = _mm512_madd_epi16(a, b).as_i32x16();
63506353 transmute(simd_select_bitmask(k, madd, src.as_i32x16()))
@@ -6358,7 +6361,8 @@ pub fn _mm512_mask_madd_epi16(src: __m512i, k: __mmask16, a: __m512i, b: __m512i
63586361#[target_feature(enable = "avx512bw")]
63596362#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63606363#[cfg_attr(test, assert_instr(vpmaddwd))]
6361- pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
6364+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6365+ pub const fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
63626366 unsafe {
63636367 let madd = _mm512_madd_epi16(a, b).as_i32x16();
63646368 transmute(simd_select_bitmask(k, madd, i32x16::ZERO))
@@ -6372,7 +6376,8 @@ pub fn _mm512_maskz_madd_epi16(k: __mmask16, a: __m512i, b: __m512i) -> __m512i
63726376#[target_feature(enable = "avx512bw,avx512vl")]
63736377#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63746378#[cfg_attr(test, assert_instr(vpmaddwd))]
6375- pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6379+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6380+ pub const fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
63766381 unsafe {
63776382 let madd = _mm256_madd_epi16(a, b).as_i32x8();
63786383 transmute(simd_select_bitmask(k, madd, src.as_i32x8()))
@@ -6386,7 +6391,8 @@ pub fn _mm256_mask_madd_epi16(src: __m256i, k: __mmask8, a: __m256i, b: __m256i)
63866391#[target_feature(enable = "avx512bw,avx512vl")]
63876392#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
63886393#[cfg_attr(test, assert_instr(vpmaddwd))]
6389- pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
6394+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6395+ pub const fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
63906396 unsafe {
63916397 let madd = _mm256_madd_epi16(a, b).as_i32x8();
63926398 transmute(simd_select_bitmask(k, madd, i32x8::ZERO))
@@ -6400,7 +6406,8 @@ pub fn _mm256_maskz_madd_epi16(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
64006406#[target_feature(enable = "avx512bw,avx512vl")]
64016407#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64026408#[cfg_attr(test, assert_instr(vpmaddwd))]
6403- pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6409+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6410+ pub const fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
64046411 unsafe {
64056412 let madd = _mm_madd_epi16(a, b).as_i32x4();
64066413 transmute(simd_select_bitmask(k, madd, src.as_i32x4()))
@@ -6414,7 +6421,8 @@ pub fn _mm_mask_madd_epi16(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) ->
64146421#[target_feature(enable = "avx512bw,avx512vl")]
64156422#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
64166423#[cfg_attr(test, assert_instr(vpmaddwd))]
6417- pub fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
6424+ #[rustc_const_unstable(feature = "stdarch_const_x86", issue = "149298")]
6425+ pub const fn _mm_maskz_madd_epi16(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
64186426 unsafe {
64196427 let madd = _mm_madd_epi16(a, b).as_i32x4();
64206428 transmute(simd_select_bitmask(k, madd, i32x4::ZERO))
@@ -12574,8 +12582,6 @@ unsafe extern "C" {
1257412582 #[link_name = "llvm.x86.avx512.pmul.hr.sw.512"]
1257512583 fn vpmulhrsw(a: i16x32, b: i16x32) -> i16x32;
1257612584
12577- #[link_name = "llvm.x86.avx512.pmaddw.d.512"]
12578- fn vpmaddwd(a: i16x32, b: i16x32) -> i32x16;
1257912585 #[link_name = "llvm.x86.avx512.pmaddubs.w.512"]
1258012586 fn vpmaddubsw(a: u8x64, b: i8x64) -> i16x32;
1258112587
@@ -17500,7 +17506,7 @@ mod tests {
1750017506 }
1750117507
1750217508 #[simd_test(enable = "avx512bw")]
17503- fn test_mm512_madd_epi16() {
17509+ const fn test_mm512_madd_epi16() {
1750417510 let a = _mm512_set1_epi16(1);
1750517511 let b = _mm512_set1_epi16(1);
1750617512 let r = _mm512_madd_epi16(a, b);
@@ -17509,7 +17515,7 @@ mod tests {
1750917515 }
1751017516
1751117517 #[simd_test(enable = "avx512bw")]
17512- fn test_mm512_mask_madd_epi16() {
17518+ const fn test_mm512_mask_madd_epi16() {
1751317519 let a = _mm512_set1_epi16(1);
1751417520 let b = _mm512_set1_epi16(1);
1751517521 let r = _mm512_mask_madd_epi16(a, 0, a, b);
@@ -17537,7 +17543,7 @@ mod tests {
1753717543 }
1753817544
1753917545 #[simd_test(enable = "avx512bw")]
17540- fn test_mm512_maskz_madd_epi16() {
17546+ const fn test_mm512_maskz_madd_epi16() {
1754117547 let a = _mm512_set1_epi16(1);
1754217548 let b = _mm512_set1_epi16(1);
1754317549 let r = _mm512_maskz_madd_epi16(0, a, b);
@@ -17548,7 +17554,7 @@ mod tests {
1754817554 }
1754917555
1755017556 #[simd_test(enable = "avx512bw,avx512vl")]
17551- fn test_mm256_mask_madd_epi16() {
17557+ const fn test_mm256_mask_madd_epi16() {
1755217558 let a = _mm256_set1_epi16(1);
1755317559 let b = _mm256_set1_epi16(1);
1755417560 let r = _mm256_mask_madd_epi16(a, 0, a, b);
@@ -17568,7 +17574,7 @@ mod tests {
1756817574 }
1756917575
1757017576 #[simd_test(enable = "avx512bw,avx512vl")]
17571- fn test_mm256_maskz_madd_epi16() {
17577+ const fn test_mm256_maskz_madd_epi16() {
1757217578 let a = _mm256_set1_epi16(1);
1757317579 let b = _mm256_set1_epi16(1);
1757417580 let r = _mm256_maskz_madd_epi16(0, a, b);
@@ -17579,7 +17585,7 @@ mod tests {
1757917585 }
1758017586
1758117587 #[simd_test(enable = "avx512bw,avx512vl")]
17582- fn test_mm_mask_madd_epi16() {
17588+ const fn test_mm_mask_madd_epi16() {
1758317589 let a = _mm_set1_epi16(1);
1758417590 let b = _mm_set1_epi16(1);
1758517591 let r = _mm_mask_madd_epi16(a, 0, a, b);
@@ -17590,7 +17596,7 @@ mod tests {
1759017596 }
1759117597
1759217598 #[simd_test(enable = "avx512bw,avx512vl")]
17593- fn test_mm_maskz_madd_epi16() {
17599+ const fn test_mm_maskz_madd_epi16() {
1759417600 let a = _mm_set1_epi16(1);
1759517601 let b = _mm_set1_epi16(1);
1759617602 let r = _mm_maskz_madd_epi16(0, a, b);
0 commit comments