Skip to content

Commit a989a69

Browse files
authored
Merge pull request #2085 from sayantn/more-intrinsics
Use more LLVM intrinsics
2 parents 6398eb0 + 84ed835 commit a989a69

8 files changed

Lines changed: 108 additions & 266 deletions

File tree

crates/core_arch/src/aarch64/sve/generated.rs

Lines changed: 16 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9799,7 +9799,7 @@ pub fn svdupq_n_f32(x0: f32, x1: f32, x2: f32, x3: f32) -> svfloat32_t {
97999799
unsafe extern "unadjusted" {
98009800
#[cfg_attr(
98019801
target_arch = "aarch64",
9802-
link_name = "llvm.experimental.vector.insert.nxv4f32.v4f32"
9802+
link_name = "llvm.vector.insert.nxv4f32.v4f32"
98039803
)]
98049804
fn _svdupq_n_f32(op0: svfloat32_t, op1: float32x4_t, idx: i64) -> svfloat32_t;
98059805
}
@@ -9817,7 +9817,7 @@ pub fn svdupq_n_s32(x0: i32, x1: i32, x2: i32, x3: i32) -> svint32_t {
98179817
unsafe extern "unadjusted" {
98189818
#[cfg_attr(
98199819
target_arch = "aarch64",
9820-
link_name = "llvm.experimental.vector.insert.nxv4i32.v4i32"
9820+
link_name = "llvm.vector.insert.nxv4i32.v4i32"
98219821
)]
98229822
fn _svdupq_n_s32(op0: svint32_t, op1: int32x4_t, idx: i64) -> svint32_t;
98239823
}
@@ -9851,7 +9851,7 @@ pub fn svdupq_n_f64(x0: f64, x1: f64) -> svfloat64_t {
98519851
unsafe extern "unadjusted" {
98529852
#[cfg_attr(
98539853
target_arch = "aarch64",
9854-
link_name = "llvm.experimental.vector.insert.nxv2f64.v2f64"
9854+
link_name = "llvm.vector.insert.nxv2f64.v2f64"
98559855
)]
98569856
fn _svdupq_n_f64(op0: svfloat64_t, op1: float64x2_t, idx: i64) -> svfloat64_t;
98579857
}
@@ -9869,7 +9869,7 @@ pub fn svdupq_n_s64(x0: i64, x1: i64) -> svint64_t {
98699869
unsafe extern "unadjusted" {
98709870
#[cfg_attr(
98719871
target_arch = "aarch64",
9872-
link_name = "llvm.experimental.vector.insert.nxv2i64.v2i64"
9872+
link_name = "llvm.vector.insert.nxv2i64.v2i64"
98739873
)]
98749874
fn _svdupq_n_s64(op0: svint64_t, op1: int64x2_t, idx: i64) -> svint64_t;
98759875
}
@@ -9904,7 +9904,7 @@ pub fn svdupq_n_s16(
99049904
unsafe extern "unadjusted" {
99059905
#[cfg_attr(
99069906
target_arch = "aarch64",
9907-
link_name = "llvm.experimental.vector.insert.nxv8i16.v8i16"
9907+
link_name = "llvm.vector.insert.nxv8i16.v8i16"
99089908
)]
99099909
fn _svdupq_n_s16(op0: svint16_t, op1: int16x8_t, idx: i64) -> svint16_t;
99109910
}
@@ -9972,7 +9972,7 @@ pub fn svdupq_n_s8(
99729972
unsafe extern "unadjusted" {
99739973
#[cfg_attr(
99749974
target_arch = "aarch64",
9975-
link_name = "llvm.experimental.vector.insert.nxv16i8.v16i8"
9975+
link_name = "llvm.vector.insert.nxv16i8.v16i8"
99769976
)]
99779977
fn _svdupq_n_s8(op0: svint8_t, op1: int8x16_t, idx: i64) -> svint8_t;
99789978
}
@@ -35208,7 +35208,7 @@ pub fn svreinterpret_u64_u64(op: svuint64_t) -> svuint64_t {
3520835208
#[cfg_attr(test, assert_instr(rev))]
3520935209
pub fn svrev_b8(op: svbool_t) -> svbool_t {
3521035210
unsafe extern "unadjusted" {
35211-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i1")]
35211+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i1")]
3521235212
fn _svrev_b8(op: svbool_t) -> svbool_t;
3521335213
}
3521435214
unsafe { _svrev_b8(op) }
@@ -35221,7 +35221,7 @@ pub fn svrev_b8(op: svbool_t) -> svbool_t {
3522135221
#[cfg_attr(test, assert_instr(rev))]
3522235222
pub fn svrev_b16(op: svbool_t) -> svbool_t {
3522335223
unsafe extern "unadjusted" {
35224-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i1")]
35224+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i1")]
3522535225
fn _svrev_b16(op: svbool8_t) -> svbool8_t;
3522635226
}
3522735227
unsafe { _svrev_b16(op.sve_into()).sve_into() }
@@ -35234,7 +35234,7 @@ pub fn svrev_b16(op: svbool_t) -> svbool_t {
3523435234
#[cfg_attr(test, assert_instr(rev))]
3523535235
pub fn svrev_b32(op: svbool_t) -> svbool_t {
3523635236
unsafe extern "unadjusted" {
35237-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i1")]
35237+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i1")]
3523835238
fn _svrev_b32(op: svbool4_t) -> svbool4_t;
3523935239
}
3524035240
unsafe { _svrev_b32(op.sve_into()).sve_into() }
@@ -35247,7 +35247,7 @@ pub fn svrev_b32(op: svbool_t) -> svbool_t {
3524735247
#[cfg_attr(test, assert_instr(rev))]
3524835248
pub fn svrev_b64(op: svbool_t) -> svbool_t {
3524935249
unsafe extern "unadjusted" {
35250-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i1")]
35250+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i1")]
3525135251
fn _svrev_b64(op: svbool2_t) -> svbool2_t;
3525235252
}
3525335253
unsafe { _svrev_b64(op.sve_into()).sve_into() }
@@ -35260,7 +35260,7 @@ pub fn svrev_b64(op: svbool_t) -> svbool_t {
3526035260
#[cfg_attr(test, assert_instr(rev))]
3526135261
pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t {
3526235262
unsafe extern "unadjusted" {
35263-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4f32")]
35263+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4f32")]
3526435264
fn _svrev_f32(op: svfloat32_t) -> svfloat32_t;
3526535265
}
3526635266
unsafe { _svrev_f32(op) }
@@ -35273,7 +35273,7 @@ pub fn svrev_f32(op: svfloat32_t) -> svfloat32_t {
3527335273
#[cfg_attr(test, assert_instr(rev))]
3527435274
pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t {
3527535275
unsafe extern "unadjusted" {
35276-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2f64")]
35276+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2f64")]
3527735277
fn _svrev_f64(op: svfloat64_t) -> svfloat64_t;
3527835278
}
3527935279
unsafe { _svrev_f64(op) }
@@ -35286,7 +35286,7 @@ pub fn svrev_f64(op: svfloat64_t) -> svfloat64_t {
3528635286
#[cfg_attr(test, assert_instr(rev))]
3528735287
pub fn svrev_s8(op: svint8_t) -> svint8_t {
3528835288
unsafe extern "unadjusted" {
35289-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv16i8")]
35289+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv16i8")]
3529035290
fn _svrev_s8(op: svint8_t) -> svint8_t;
3529135291
}
3529235292
unsafe { _svrev_s8(op) }
@@ -35299,7 +35299,7 @@ pub fn svrev_s8(op: svint8_t) -> svint8_t {
3529935299
#[cfg_attr(test, assert_instr(rev))]
3530035300
pub fn svrev_s16(op: svint16_t) -> svint16_t {
3530135301
unsafe extern "unadjusted" {
35302-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv8i16")]
35302+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv8i16")]
3530335303
fn _svrev_s16(op: svint16_t) -> svint16_t;
3530435304
}
3530535305
unsafe { _svrev_s16(op) }
@@ -35312,7 +35312,7 @@ pub fn svrev_s16(op: svint16_t) -> svint16_t {
3531235312
#[cfg_attr(test, assert_instr(rev))]
3531335313
pub fn svrev_s32(op: svint32_t) -> svint32_t {
3531435314
unsafe extern "unadjusted" {
35315-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv4i32")]
35315+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv4i32")]
3531635316
fn _svrev_s32(op: svint32_t) -> svint32_t;
3531735317
}
3531835318
unsafe { _svrev_s32(op) }
@@ -35325,7 +35325,7 @@ pub fn svrev_s32(op: svint32_t) -> svint32_t {
3532535325
#[cfg_attr(test, assert_instr(rev))]
3532635326
pub fn svrev_s64(op: svint64_t) -> svint64_t {
3532735327
unsafe extern "unadjusted" {
35328-
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.aarch64.sve.rev.nxv2i64")]
35328+
#[cfg_attr(target_arch = "aarch64", link_name = "llvm.vector.reverse.nxv2i64")]
3532935329
fn _svrev_s64(op: svint64_t) -> svint64_t;
3533035330
}
3533135331
unsafe { _svrev_s64(op) }

crates/core_arch/src/x86/avx512bf16.rs

Lines changed: 5 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
//!
33
//! [AVX512BF16 intrinsics]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#expand=1769&avx512techs=AVX512_BF16
44
5-
use crate::arch::asm;
65
use crate::core_arch::{simd::*, x86::*};
76
use crate::intrinsics::simd::*;
87

@@ -17,6 +16,8 @@ unsafe extern "C" {
1716
fn cvtne2ps2bf16_256(a: f32x8, b: f32x8) -> i16x16;
1817
#[link_name = "llvm.x86.avx512bf16.cvtne2ps2bf16.512"]
1918
fn cvtne2ps2bf16_512(a: f32x16, b: f32x16) -> i16x32;
19+
#[link_name = "llvm.x86.avx512bf16.mask.cvtneps2bf16.128"]
20+
fn cvtneps2bf16_128(a: f32x4, src: i16x8, k: __mmask8) -> i16x8;
2021
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.256"]
2122
fn cvtneps2bf16_256(a: f32x8) -> i16x8;
2223
#[link_name = "llvm.x86.avx512bf16.cvtneps2bf16.512"]
@@ -519,16 +520,7 @@ pub fn _mm_cvtsbh_ss(a: bf16) -> f32 {
519520
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
520521
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
521522
pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
522-
unsafe {
523-
let mut dst: __m128bh;
524-
asm!(
525-
"vcvtneps2bf16 {dst}, {src}",
526-
dst = lateout(xmm_reg) dst,
527-
src = in(xmm_reg) a,
528-
options(pure, nomem, nostack, preserves_flags)
529-
);
530-
dst
531-
}
523+
_mm_mask_cvtneps_pbh(__m128bh::splat(0), !0, a)
532524
}
533525

534526
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@@ -541,17 +533,7 @@ pub fn _mm_cvtneps_pbh(a: __m128) -> __m128bh {
541533
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
542534
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
543535
pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
544-
unsafe {
545-
let mut dst = src;
546-
asm!(
547-
"vcvtneps2bf16 {dst}{{{k}}},{src}",
548-
dst = inlateout(xmm_reg) dst,
549-
src = in(xmm_reg) a,
550-
k = in(kreg) k,
551-
options(pure, nomem, nostack, preserves_flags)
552-
);
553-
dst
554-
}
536+
unsafe { cvtneps2bf16_128(a.as_f32x4(), src.as_i16x8(), k).as_m128bh() }
555537
}
556538

557539
/// Converts packed single-precision (32-bit) floating-point elements in a to packed BF16 (16-bit)
@@ -564,17 +546,7 @@ pub fn _mm_mask_cvtneps_pbh(src: __m128bh, k: __mmask8, a: __m128) -> __m128bh {
564546
#[cfg_attr(test, assert_instr("vcvtneps2bf16"))]
565547
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
566548
pub fn _mm_maskz_cvtneps_pbh(k: __mmask8, a: __m128) -> __m128bh {
567-
unsafe {
568-
let mut dst: __m128bh;
569-
asm!(
570-
"vcvtneps2bf16 {dst}{{{k}}}{{z}},{src}",
571-
dst = lateout(xmm_reg) dst,
572-
src = in(xmm_reg) a,
573-
k = in(kreg) k,
574-
options(pure, nomem, nostack, preserves_flags)
575-
);
576-
dst
577-
}
549+
_mm_mask_cvtneps_pbh(__m128bh::splat(0), k, a)
578550
}
579551

580552
/// Converts a single-precision (32-bit) floating-point element in a to a BF16 (16-bit) floating-point

crates/core_arch/src/x86/avx512bitalg.rs

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,12 @@ use stdarch_test::assert_instr;
2828

2929
#[allow(improper_ctypes)]
3030
unsafe extern "C" {
31-
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.512"]
32-
fn bitshuffle_512(data: i8x64, indices: i8x64, mask: __mmask64) -> __mmask64;
33-
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.256"]
34-
fn bitshuffle_256(data: i8x32, indices: i8x32, mask: __mmask32) -> __mmask32;
35-
#[link_name = "llvm.x86.avx512.mask.vpshufbitqmb.128"]
36-
fn bitshuffle_128(data: i8x16, indices: i8x16, mask: __mmask16) -> __mmask16;
31+
#[link_name = "llvm.x86.avx512.vpshufbitqmb.512"]
32+
fn bitshuffle_512(data: i8x64, indices: i8x64) -> __mmask64;
33+
#[link_name = "llvm.x86.avx512.vpshufbitqmb.256"]
34+
fn bitshuffle_256(data: i8x32, indices: i8x32) -> __mmask32;
35+
#[link_name = "llvm.x86.avx512.vpshufbitqmb.128"]
36+
fn bitshuffle_128(data: i8x16, indices: i8x16) -> __mmask16;
3737
}
3838

3939
/// For each packed 16-bit integer maps the value to the number of logical 1 bits.
@@ -370,7 +370,7 @@ pub const fn _mm_mask_popcnt_epi8(src: __m128i, k: __mmask16, a: __m128i) -> __m
370370
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
371371
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
372372
pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
373-
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), !0) }
373+
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64()) }
374374
}
375375

376376
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -386,7 +386,7 @@ pub fn _mm512_bitshuffle_epi64_mask(b: __m512i, c: __m512i) -> __mmask64 {
386386
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
387387
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
388388
pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -> __mmask64 {
389-
unsafe { bitshuffle_512(b.as_i8x64(), c.as_i8x64(), k) }
389+
_mm512_bitshuffle_epi64_mask(b, c) & k
390390
}
391391

392392
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -399,7 +399,7 @@ pub fn _mm512_mask_bitshuffle_epi64_mask(k: __mmask64, b: __m512i, c: __m512i) -
399399
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
400400
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
401401
pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
402-
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), !0) }
402+
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32()) }
403403
}
404404

405405
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -415,7 +415,7 @@ pub fn _mm256_bitshuffle_epi64_mask(b: __m256i, c: __m256i) -> __mmask32 {
415415
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
416416
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
417417
pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -> __mmask32 {
418-
unsafe { bitshuffle_256(b.as_i8x32(), c.as_i8x32(), k) }
418+
_mm256_bitshuffle_epi64_mask(b, c) & k
419419
}
420420

421421
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -428,7 +428,7 @@ pub fn _mm256_mask_bitshuffle_epi64_mask(k: __mmask32, b: __m256i, c: __m256i) -
428428
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
429429
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
430430
pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
431-
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), !0) }
431+
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16()) }
432432
}
433433

434434
/// Considers the input `b` as packed 64-bit integers and `c` as packed 8-bit integers.
@@ -444,7 +444,7 @@ pub fn _mm_bitshuffle_epi64_mask(b: __m128i, c: __m128i) -> __mmask16 {
444444
#[stable(feature = "stdarch_x86_avx512", since = "1.89")]
445445
#[cfg_attr(test, assert_instr(vpshufbitqmb))]
446446
pub fn _mm_mask_bitshuffle_epi64_mask(k: __mmask16, b: __m128i, c: __m128i) -> __mmask16 {
447-
unsafe { bitshuffle_128(b.as_i8x16(), c.as_i8x16(), k) }
447+
_mm_bitshuffle_epi64_mask(b, c) & k
448448
}
449449

450450
#[cfg(test)]

0 commit comments

Comments
 (0)