Skip to content

Commit f4e9c59

Browse files
committed
Change implementation of v{us,su,}dot_lane
1 parent 55add51 commit f4e9c59

2 files changed

Lines changed: 89 additions & 143 deletions

File tree

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 48 additions & 88 deletions
Original file line numberDiff line numberDiff line change
@@ -9663,11 +9663,9 @@ pub fn vcvtq_u32_f32(a: float32x4_t) -> uint32x4_t {
96639663
)]
96649664
pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) -> int32x2_t {
96659665
static_assert_uimm_bits!(LANE, 1);
9666-
let c: int32x2_t = vreinterpret_s32_s8(c);
9667-
unsafe {
9668-
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
9669-
vdot_s32(a, b, vreinterpret_s8_s32(c))
9670-
}
9666+
let c = vreinterpret_s32_s8(c);
9667+
let c = vdup_lane_s32::<LANE>(c);
9668+
vdot_s32(a, b, vreinterpret_s8_s32(c))
96719669
}
96729670
#[doc = "Dot product arithmetic (indexed)"]
96739671
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_s32)"]
@@ -9690,12 +9688,9 @@ pub fn vdot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x8_t) ->
96909688
)]
96919689
pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t) -> int32x4_t {
96929690
static_assert_uimm_bits!(LANE, 1);
9693-
let c: int32x2_t = vreinterpret_s32_s8(c);
9694-
unsafe {
9695-
let c: int32x4_t =
9696-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
9697-
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
9698-
}
9691+
let c = vreinterpret_s32_s8(c);
9692+
let c = vdupq_lane_s32::<LANE>(c);
9693+
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
96999694
}
97009695
#[doc = "Dot product arithmetic (indexed)"]
97019696
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_lane_u32)"]
@@ -9718,11 +9713,9 @@ pub fn vdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x8_t)
97189713
)]
97199714
pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t) -> uint32x2_t {
97209715
static_assert_uimm_bits!(LANE, 1);
9721-
let c: uint32x2_t = vreinterpret_u32_u8(c);
9722-
unsafe {
9723-
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
9724-
vdot_u32(a, b, vreinterpret_u8_u32(c))
9725-
}
9716+
let c = vreinterpret_u32_u8(c);
9717+
let c = vdup_lane_u32::<LANE>(c);
9718+
vdot_u32(a, b, vreinterpret_u8_u32(c))
97269719
}
97279720
#[doc = "Dot product arithmetic (indexed)"]
97289721
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_lane_u32)"]
@@ -9745,12 +9738,9 @@ pub fn vdot_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x8_t)
97459738
)]
97469739
pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_t) -> uint32x4_t {
97479740
static_assert_uimm_bits!(LANE, 1);
9748-
let c: uint32x2_t = vreinterpret_u32_u8(c);
9749-
unsafe {
9750-
let c: uint32x4_t =
9751-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
9752-
vdotq_u32(a, b, vreinterpretq_u8_u32(c))
9753-
}
9741+
let c = vreinterpret_u32_u8(c);
9742+
let c = vdupq_lane_u32::<LANE>(c);
9743+
vdotq_u32(a, b, vreinterpretq_u8_u32(c))
97549744
}
97559745
#[doc = "Dot product arithmetic (indexed)"]
97569746
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_s32)"]
@@ -9766,11 +9756,9 @@ pub fn vdotq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x8_
97669756
#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
97679757
pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t) -> int32x2_t {
97689758
static_assert_uimm_bits!(LANE, 2);
9769-
let c: int32x4_t = vreinterpretq_s32_s8(c);
9770-
unsafe {
9771-
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
9772-
vdot_s32(a, b, vreinterpret_s8_s32(c))
9773-
}
9759+
let c = vreinterpretq_s32_s8(c);
9760+
let c = vdup_laneq_s32::<LANE>(c);
9761+
vdot_s32(a, b, vreinterpret_s8_s32(c))
97749762
}
97759763
#[doc = "Dot product arithmetic (indexed)"]
97769764
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_s32)"]
@@ -9786,12 +9774,9 @@ pub fn vdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: int8x16_t)
97869774
#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
97879775
pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t) -> int32x4_t {
97889776
static_assert_uimm_bits!(LANE, 2);
9789-
let c: int32x4_t = vreinterpretq_s32_s8(c);
9790-
unsafe {
9791-
let c: int32x4_t =
9792-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
9793-
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
9794-
}
9777+
let c = vreinterpretq_s32_s8(c);
9778+
let c = vdupq_laneq_s32::<LANE>(c);
9779+
vdotq_s32(a, b, vreinterpretq_s8_s32(c))
97959780
}
97969781
#[doc = "Dot product arithmetic (indexed)"]
97979782
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_laneq_u32)"]
@@ -9807,11 +9792,9 @@ pub fn vdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: int8x16_t
98079792
#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
98089793
pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_t) -> uint32x2_t {
98099794
static_assert_uimm_bits!(LANE, 2);
9810-
let c: uint32x4_t = vreinterpretq_u32_u8(c);
9811-
unsafe {
9812-
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
9813-
vdot_u32(a, b, transmute(c))
9814-
}
9795+
let c = vreinterpretq_u32_u8(c);
9796+
let c = vdup_laneq_u32::<LANE>(c);
9797+
vdot_u32(a, b, vreinterpret_u8_u32(c))
98159798
}
98169799
#[doc = "Dot product arithmetic (indexed)"]
98179800
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdotq_laneq_u32)"]
@@ -9827,12 +9810,9 @@ pub fn vdot_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint8x8_t, c: uint8x16_
98279810
#[unstable(feature = "stdarch_neon_dotprod", issue = "117224")]
98289811
pub fn vdotq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint8x16_t, c: uint8x16_t) -> uint32x4_t {
98299812
static_assert_uimm_bits!(LANE, 2);
9830-
let c: uint32x4_t = vreinterpretq_u32_u8(c);
9831-
unsafe {
9832-
let c: uint32x4_t =
9833-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
9834-
vdotq_u32(a, b, transmute(c))
9835-
}
9813+
let c = vreinterpretq_u32_u8(c);
9814+
let c = vdupq_laneq_u32::<LANE>(c);
9815+
vdotq_u32(a, b, vreinterpretq_u8_u32(c))
98369816
}
98379817
#[doc = "Dot product arithmetic (vector)"]
98389818
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vdot_s32)"]
@@ -69646,11 +69626,9 @@ pub fn vsubw_u32(a: uint64x2_t, b: uint32x2_t) -> uint64x2_t {
6964669626
)]
6964769627
pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t) -> int32x2_t {
6964869628
static_assert_uimm_bits!(LANE, 1);
69649-
let c: uint32x2_t = vreinterpret_u32_u8(c);
69650-
unsafe {
69651-
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
69652-
vusdot_s32(a, vreinterpret_u8_u32(c), b)
69653-
}
69629+
let c = vreinterpret_u32_u8(c);
69630+
let c = vdup_lane_u32::<LANE>(c);
69631+
vusdot_s32(a, vreinterpret_u8_u32(c), b)
6965469632
}
6965569633
#[doc = "Dot product index form with signed and unsigned integers"]
6965669634
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_lane_s32)"]
@@ -69677,12 +69655,9 @@ pub fn vsudot_lane_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x8_t)
6967769655
)]
6967869656
pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_t) -> int32x4_t {
6967969657
static_assert_uimm_bits!(LANE, 1);
69680-
let c: uint32x2_t = vreinterpret_u32_u8(c);
69681-
unsafe {
69682-
let c: uint32x4_t =
69683-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
69684-
vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
69685-
}
69658+
let c = vreinterpret_u32_u8(c);
69659+
let c = vdupq_lane_u32::<LANE>(c);
69660+
vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
6968669661
}
6968769662
#[doc = "Dot product index form with signed and unsigned integers"]
6968869663
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudot_laneq_s32)"]
@@ -69702,11 +69677,9 @@ pub fn vsudotq_lane_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x8_
6970269677
#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
6970369678
pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_t) -> int32x2_t {
6970469679
static_assert_uimm_bits!(LANE, 2);
69705-
unsafe {
69706-
let c: uint32x4_t = transmute(c);
69707-
let c: uint32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
69708-
vusdot_s32(a, transmute(c), b)
69709-
}
69680+
let c = vreinterpretq_u32_u8(c);
69681+
let c = vdup_laneq_u32::<LANE>(c);
69682+
vusdot_s32(a, vreinterpret_u8_u32(c), b)
6971069683
}
6971169684
#[doc = "Dot product index form with signed and unsigned integers"]
6971269685
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vsudotq_laneq_s32)"]
@@ -69726,12 +69699,9 @@ pub fn vsudot_laneq_s32<const LANE: i32>(a: int32x2_t, b: int8x8_t, c: uint8x16_
6972669699
#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
6972769700
pub fn vsudotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int8x16_t, c: uint8x16_t) -> int32x4_t {
6972869701
static_assert_uimm_bits!(LANE, 2);
69729-
unsafe {
69730-
let c: uint32x4_t = transmute(c);
69731-
let c: uint32x4_t =
69732-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
69733-
vusdotq_s32(a, transmute(c), b)
69734-
}
69702+
let c = vreinterpretq_u32_u8(c);
69703+
let c = vdupq_laneq_u32::<LANE>(c);
69704+
vusdotq_s32(a, vreinterpretq_u8_u32(c), b)
6973569705
}
6973669706
#[doc = "Table look-up"]
6973769707
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vtbl1)"]
@@ -71633,11 +71603,9 @@ pub fn vtstq_u32(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
7163371603
)]
7163471604
pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t) -> int32x2_t {
7163571605
static_assert_uimm_bits!(LANE, 1);
71636-
let c: int32x2_t = vreinterpret_s32_s8(c);
71637-
unsafe {
71638-
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
71639-
vusdot_s32(a, b, vreinterpret_s8_s32(c))
71640-
}
71606+
let c = vreinterpret_s32_s8(c);
71607+
let c = vdup_lane_s32::<LANE>(c);
71608+
vusdot_s32(a, b, vreinterpret_s8_s32(c))
7164171609
}
7164271610
#[doc = "Dot product index form with unsigned and signed integers"]
7164371611
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_lane_s32)"]
@@ -71660,12 +71628,9 @@ pub fn vusdot_lane_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x8_t)
7166071628
)]
7166171629
pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_t) -> int32x4_t {
7166271630
static_assert_uimm_bits!(LANE, 1);
71663-
let c: int32x2_t = vreinterpret_s32_s8(c);
71664-
unsafe {
71665-
let c: int32x4_t =
71666-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
71667-
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
71668-
}
71631+
let c = vreinterpret_s32_s8(c);
71632+
let c = vdupq_lane_s32::<LANE>(c);
71633+
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
7166971634
}
7167071635
#[doc = "Dot product index form with unsigned and signed integers"]
7167171636
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_laneq_s32)"]
@@ -71681,11 +71646,9 @@ pub fn vusdotq_lane_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x8_
7168171646
#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
7168271647
pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_t) -> int32x2_t {
7168371648
static_assert_uimm_bits!(LANE, 2);
71684-
unsafe {
71685-
let c: int32x4_t = transmute(c);
71686-
let c: int32x2_t = simd_shuffle!(c, c, [LANE as u32, LANE as u32]);
71687-
vusdot_s32(a, b, vreinterpret_s8_s32(c))
71688-
}
71649+
let c = vreinterpretq_s32_s8(c);
71650+
let c = vdup_laneq_s32::<LANE>(c);
71651+
vusdot_s32(a, b, vreinterpret_s8_s32(c))
7168971652
}
7169071653
#[doc = "Dot product index form with unsigned and signed integers"]
7169171654
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdotq_laneq_s32)"]
@@ -71701,12 +71664,9 @@ pub fn vusdot_laneq_s32<const LANE: i32>(a: int32x2_t, b: uint8x8_t, c: int8x16_
7170171664
#[unstable(feature = "stdarch_neon_i8mm", issue = "117223")]
7170271665
pub fn vusdotq_laneq_s32<const LANE: i32>(a: int32x4_t, b: uint8x16_t, c: int8x16_t) -> int32x4_t {
7170371666
static_assert_uimm_bits!(LANE, 2);
71704-
unsafe {
71705-
let c: int32x4_t = transmute(c);
71706-
let c: int32x4_t =
71707-
simd_shuffle!(c, c, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]);
71708-
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
71709-
}
71667+
let c = vreinterpretq_s32_s8(c);
71668+
let c = vdupq_laneq_s32::<LANE>(c);
71669+
vusdotq_s32(a, b, vreinterpretq_s8_s32(c))
7171071670
}
7171171671
#[doc = "Dot product vector form with unsigned and signed integers"]
7171271672
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vusdot_s32)"]

0 commit comments

Comments
 (0)