Skip to content

Commit 00a2f68

Browse files
committed
Change implementation of vmul_lane
1 parent 18d2025 commit 00a2f68

4 files changed

Lines changed: 57 additions & 202 deletions

File tree

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12591,7 +12591,7 @@ pub fn vmul_lane_f64<const LANE: i32>(a: float64x1_t, b: float64x1_t) -> float64
1259112591
#[cfg(not(target_arch = "arm64ec"))]
1259212592
pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float16x4_t {
1259312593
static_assert_uimm_bits!(LANE, 3);
12594-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 4])) }
12594+
unsafe { simd_mul(a, vdup_laneq_f16::<LANE>(b)) }
1259512595
}
1259612596
#[doc = "Floating-point multiply"]
1259712597
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f16)"]
@@ -12603,7 +12603,7 @@ pub fn vmul_laneq_f16<const LANE: i32>(a: float16x4_t, b: float16x8_t) -> float1
1260312603
#[cfg(not(target_arch = "arm64ec"))]
1260412604
pub fn vmulq_laneq_f16<const LANE: i32>(a: float16x8_t, b: float16x8_t) -> float16x8_t {
1260512605
static_assert_uimm_bits!(LANE, 3);
12606-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 8])) }
12606+
unsafe { simd_mul(a, vdupq_laneq_f16::<LANE>(b)) }
1260712607
}
1260812608
#[doc = "Floating-point multiply"]
1260912609
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f64)"]
@@ -12917,7 +12917,7 @@ pub fn vmull_p64(a: p64, b: p64) -> p128 {
1291712917
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1291812918
pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float64x2_t {
1291912919
static_assert!(LANE == 0);
12920-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
12920+
unsafe { simd_mul(a, vdupq_lane_f64::<LANE>(b)) }
1292112921
}
1292212922
#[doc = "Floating-point multiply"]
1292312923
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f64)"]
@@ -12928,7 +12928,7 @@ pub fn vmulq_lane_f64<const LANE: i32>(a: float64x2_t, b: float64x1_t) -> float6
1292812928
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1292912929
pub fn vmulq_laneq_f64<const LANE: i32>(a: float64x2_t, b: float64x2_t) -> float64x2_t {
1293012930
static_assert_uimm_bits!(LANE, 1);
12931-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32; 2])) }
12931+
unsafe { simd_mul(a, vdupq_laneq_f64::<LANE>(b)) }
1293212932
}
1293312933
#[doc = "Floating-point multiply"]
1293412934
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmuls_lane_f32)"]

crates/core_arch/src/arm_shared/neon/generated.rs

Lines changed: 22 additions & 167 deletions
Original file line numberDiff line numberDiff line change
@@ -29763,12 +29763,7 @@ pub fn vmulq_f32(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2976329763
#[cfg(not(target_arch = "arm64ec"))]
2976429764
pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16x4_t {
2976529765
static_assert_uimm_bits!(LANE, 2);
29766-
unsafe {
29767-
simd_mul(
29768-
a,
29769-
simd_shuffle!(v, v, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
29770-
)
29771-
}
29766+
unsafe { simd_mul(a, vdup_lane_f16::<LANE>(v)) }
2977229767
}
2977329768
#[doc = "Multiply"]
2977429769
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f16)"]
@@ -29792,25 +29787,7 @@ pub fn vmul_lane_f16<const LANE: i32>(a: float16x4_t, v: float16x4_t) -> float16
2979229787
#[cfg(not(target_arch = "arm64ec"))]
2979329788
pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float16x8_t {
2979429789
static_assert_uimm_bits!(LANE, 2);
29795-
unsafe {
29796-
simd_mul(
29797-
a,
29798-
simd_shuffle!(
29799-
v,
29800-
v,
29801-
[
29802-
LANE as u32,
29803-
LANE as u32,
29804-
LANE as u32,
29805-
LANE as u32,
29806-
LANE as u32,
29807-
LANE as u32,
29808-
LANE as u32,
29809-
LANE as u32
29810-
]
29811-
),
29812-
)
29813-
}
29790+
unsafe { simd_mul(a, vdupq_lane_f16::<LANE>(v)) }
2981429791
}
2981529792
#[doc = "Floating-point multiply"]
2981629793
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_f32)"]
@@ -29833,7 +29810,7 @@ pub fn vmulq_lane_f16<const LANE: i32>(a: float16x8_t, v: float16x4_t) -> float1
2983329810
)]
2983429811
pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32x2_t {
2983529812
static_assert_uimm_bits!(LANE, 1);
29836-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
29813+
unsafe { simd_mul(a, vdup_lane_f32::<LANE>(b)) }
2983729814
}
2983829815
#[doc = "Floating-point multiply"]
2983929816
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_f32)"]
@@ -29856,7 +29833,7 @@ pub fn vmul_lane_f32<const LANE: i32>(a: float32x2_t, b: float32x2_t) -> float32
2985629833
)]
2985729834
pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float32x2_t {
2985829835
static_assert_uimm_bits!(LANE, 2);
29859-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
29836+
unsafe { simd_mul(a, vdup_laneq_f32::<LANE>(b)) }
2986029837
}
2986129838
#[doc = "Floating-point multiply"]
2986229839
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_f32)"]
@@ -29879,12 +29856,7 @@ pub fn vmul_laneq_f32<const LANE: i32>(a: float32x2_t, b: float32x4_t) -> float3
2987929856
)]
2988029857
pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float32x4_t {
2988129858
static_assert_uimm_bits!(LANE, 1);
29882-
unsafe {
29883-
simd_mul(
29884-
a,
29885-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
29886-
)
29887-
}
29859+
unsafe { simd_mul(a, vdupq_lane_f32::<LANE>(b)) }
2988829860
}
2988929861
#[doc = "Floating-point multiply"]
2989029862
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_f32)"]
@@ -29907,12 +29879,7 @@ pub fn vmulq_lane_f32<const LANE: i32>(a: float32x4_t, b: float32x2_t) -> float3
2990729879
)]
2990829880
pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float32x4_t {
2990929881
static_assert_uimm_bits!(LANE, 2);
29910-
unsafe {
29911-
simd_mul(
29912-
a,
29913-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
29914-
)
29915-
}
29882+
unsafe { simd_mul(a, vdupq_laneq_f32::<LANE>(b)) }
2991629883
}
2991729884
#[doc = "Multiply"]
2991829885
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s16)"]
@@ -29935,12 +29902,7 @@ pub fn vmulq_laneq_f32<const LANE: i32>(a: float32x4_t, b: float32x4_t) -> float
2993529902
)]
2993629903
pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
2993729904
static_assert_uimm_bits!(LANE, 2);
29938-
unsafe {
29939-
simd_mul(
29940-
a,
29941-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
29942-
)
29943-
}
29905+
unsafe { simd_mul(a, vdup_lane_s16::<LANE>(b)) }
2994429906
}
2994529907
#[doc = "Multiply"]
2994629908
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s16)"]
@@ -29963,25 +29925,7 @@ pub fn vmul_lane_s16<const LANE: i32>(a: int16x4_t, b: int16x4_t) -> int16x4_t {
2996329925
)]
2996429926
pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t {
2996529927
static_assert_uimm_bits!(LANE, 2);
29966-
unsafe {
29967-
simd_mul(
29968-
a,
29969-
simd_shuffle!(
29970-
b,
29971-
b,
29972-
[
29973-
LANE as u32,
29974-
LANE as u32,
29975-
LANE as u32,
29976-
LANE as u32,
29977-
LANE as u32,
29978-
LANE as u32,
29979-
LANE as u32,
29980-
LANE as u32
29981-
]
29982-
),
29983-
)
29984-
}
29928+
unsafe { simd_mul(a, vdupq_lane_s16::<LANE>(b)) }
2998529929
}
2998629930
#[doc = "Multiply"]
2998729931
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_s32)"]
@@ -30004,7 +29948,7 @@ pub fn vmulq_lane_s16<const LANE: i32>(a: int16x8_t, b: int16x4_t) -> int16x8_t
3000429948
)]
3000529949
pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
3000629950
static_assert_uimm_bits!(LANE, 1);
30007-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
29951+
unsafe { simd_mul(a, vdup_lane_s32::<LANE>(b)) }
3000829952
}
3000929953
#[doc = "Multiply"]
3001029954
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_s32)"]
@@ -30027,12 +29971,7 @@ pub fn vmul_lane_s32<const LANE: i32>(a: int32x2_t, b: int32x2_t) -> int32x2_t {
3002729971
)]
3002829972
pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t {
3002929973
static_assert_uimm_bits!(LANE, 1);
30030-
unsafe {
30031-
simd_mul(
30032-
a,
30033-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30034-
)
30035-
}
29974+
unsafe { simd_mul(a, vdupq_lane_s32::<LANE>(b)) }
3003629975
}
3003729976
#[doc = "Multiply"]
3003829977
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u16)"]
@@ -30055,12 +29994,7 @@ pub fn vmulq_lane_s32<const LANE: i32>(a: int32x4_t, b: int32x2_t) -> int32x4_t
3005529994
)]
3005629995
pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_t {
3005729996
static_assert_uimm_bits!(LANE, 2);
30058-
unsafe {
30059-
simd_mul(
30060-
a,
30061-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30062-
)
30063-
}
29997+
unsafe { simd_mul(a, vdup_lane_u16::<LANE>(b)) }
3006429998
}
3006529999
#[doc = "Multiply"]
3006630000
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u16)"]
@@ -30083,25 +30017,7 @@ pub fn vmul_lane_u16<const LANE: i32>(a: uint16x4_t, b: uint16x4_t) -> uint16x4_
3008330017
)]
3008430018
pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8_t {
3008530019
static_assert_uimm_bits!(LANE, 2);
30086-
unsafe {
30087-
simd_mul(
30088-
a,
30089-
simd_shuffle!(
30090-
b,
30091-
b,
30092-
[
30093-
LANE as u32,
30094-
LANE as u32,
30095-
LANE as u32,
30096-
LANE as u32,
30097-
LANE as u32,
30098-
LANE as u32,
30099-
LANE as u32,
30100-
LANE as u32
30101-
]
30102-
),
30103-
)
30104-
}
30020+
unsafe { simd_mul(a, vdupq_lane_u16::<LANE>(b)) }
3010530021
}
3010630022
#[doc = "Multiply"]
3010730023
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_lane_u32)"]
@@ -30124,7 +30040,7 @@ pub fn vmulq_lane_u16<const LANE: i32>(a: uint16x8_t, b: uint16x4_t) -> uint16x8
3012430040
)]
3012530041
pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_t {
3012630042
static_assert_uimm_bits!(LANE, 1);
30127-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
30043+
unsafe { simd_mul(a, vdup_lane_u32::<LANE>(b)) }
3012830044
}
3012930045
#[doc = "Multiply"]
3013030046
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_lane_u32)"]
@@ -30147,12 +30063,7 @@ pub fn vmul_lane_u32<const LANE: i32>(a: uint32x2_t, b: uint32x2_t) -> uint32x2_
3014730063
)]
3014830064
pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4_t {
3014930065
static_assert_uimm_bits!(LANE, 1);
30150-
unsafe {
30151-
simd_mul(
30152-
a,
30153-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30154-
)
30155-
}
30066+
unsafe { simd_mul(a, vdupq_lane_u32::<LANE>(b)) }
3015630067
}
3015730068
#[doc = "Multiply"]
3015830069
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s16)"]
@@ -30175,12 +30086,7 @@ pub fn vmulq_lane_u32<const LANE: i32>(a: uint32x4_t, b: uint32x2_t) -> uint32x4
3017530086
)]
3017630087
pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t {
3017730088
static_assert_uimm_bits!(LANE, 3);
30178-
unsafe {
30179-
simd_mul(
30180-
a,
30181-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30182-
)
30183-
}
30089+
unsafe { simd_mul(a, vdup_laneq_s16::<LANE>(b)) }
3018430090
}
3018530091
#[doc = "Multiply"]
3018630092
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s16)"]
@@ -30203,25 +30109,7 @@ pub fn vmul_laneq_s16<const LANE: i32>(a: int16x4_t, b: int16x8_t) -> int16x4_t
3020330109
)]
3020430110
pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t {
3020530111
static_assert_uimm_bits!(LANE, 3);
30206-
unsafe {
30207-
simd_mul(
30208-
a,
30209-
simd_shuffle!(
30210-
b,
30211-
b,
30212-
[
30213-
LANE as u32,
30214-
LANE as u32,
30215-
LANE as u32,
30216-
LANE as u32,
30217-
LANE as u32,
30218-
LANE as u32,
30219-
LANE as u32,
30220-
LANE as u32
30221-
]
30222-
),
30223-
)
30224-
}
30112+
unsafe { simd_mul(a, vdupq_laneq_s16::<LANE>(b)) }
3022530113
}
3022630114
#[doc = "Multiply"]
3022730115
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_s32)"]
@@ -30244,7 +30132,7 @@ pub fn vmulq_laneq_s16<const LANE: i32>(a: int16x8_t, b: int16x8_t) -> int16x8_t
3024430132
)]
3024530133
pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t {
3024630134
static_assert_uimm_bits!(LANE, 2);
30247-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
30135+
unsafe { simd_mul(a, vdup_laneq_s32::<LANE>(b)) }
3024830136
}
3024930137
#[doc = "Multiply"]
3025030138
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_s32)"]
@@ -30267,12 +30155,7 @@ pub fn vmul_laneq_s32<const LANE: i32>(a: int32x2_t, b: int32x4_t) -> int32x2_t
3026730155
)]
3026830156
pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t {
3026930157
static_assert_uimm_bits!(LANE, 2);
30270-
unsafe {
30271-
simd_mul(
30272-
a,
30273-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30274-
)
30275-
}
30158+
unsafe { simd_mul(a, vdupq_laneq_s32::<LANE>(b)) }
3027630159
}
3027730160
#[doc = "Multiply"]
3027830161
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u16)"]
@@ -30295,12 +30178,7 @@ pub fn vmulq_laneq_s32<const LANE: i32>(a: int32x4_t, b: int32x4_t) -> int32x4_t
3029530178
)]
3029630179
pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4_t {
3029730180
static_assert_uimm_bits!(LANE, 3);
30298-
unsafe {
30299-
simd_mul(
30300-
a,
30301-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30302-
)
30303-
}
30181+
unsafe { simd_mul(a, vdup_laneq_u16::<LANE>(b)) }
3030430182
}
3030530183
#[doc = "Multiply"]
3030630184
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u16)"]
@@ -30323,25 +30201,7 @@ pub fn vmul_laneq_u16<const LANE: i32>(a: uint16x4_t, b: uint16x8_t) -> uint16x4
3032330201
)]
3032430202
pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x8_t {
3032530203
static_assert_uimm_bits!(LANE, 3);
30326-
unsafe {
30327-
simd_mul(
30328-
a,
30329-
simd_shuffle!(
30330-
b,
30331-
b,
30332-
[
30333-
LANE as u32,
30334-
LANE as u32,
30335-
LANE as u32,
30336-
LANE as u32,
30337-
LANE as u32,
30338-
LANE as u32,
30339-
LANE as u32,
30340-
LANE as u32
30341-
]
30342-
),
30343-
)
30344-
}
30204+
unsafe { simd_mul(a, vdupq_laneq_u16::<LANE>(b)) }
3034530205
}
3034630206
#[doc = "Multiply"]
3034730207
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_laneq_u32)"]
@@ -30364,7 +30224,7 @@ pub fn vmulq_laneq_u16<const LANE: i32>(a: uint16x8_t, b: uint16x8_t) -> uint16x
3036430224
)]
3036530225
pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2_t {
3036630226
static_assert_uimm_bits!(LANE, 2);
30367-
unsafe { simd_mul(a, simd_shuffle!(b, b, [LANE as u32, LANE as u32])) }
30227+
unsafe { simd_mul(a, vdup_laneq_u32::<LANE>(b)) }
3036830228
}
3036930229
#[doc = "Multiply"]
3037030230
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmulq_laneq_u32)"]
@@ -30387,12 +30247,7 @@ pub fn vmul_laneq_u32<const LANE: i32>(a: uint32x2_t, b: uint32x4_t) -> uint32x2
3038730247
)]
3038830248
pub fn vmulq_laneq_u32<const LANE: i32>(a: uint32x4_t, b: uint32x4_t) -> uint32x4_t {
3038930249
static_assert_uimm_bits!(LANE, 2);
30390-
unsafe {
30391-
simd_mul(
30392-
a,
30393-
simd_shuffle!(b, b, [LANE as u32, LANE as u32, LANE as u32, LANE as u32]),
30394-
)
30395-
}
30250+
unsafe { simd_mul(a, vdupq_laneq_u32::<LANE>(b)) }
3039630251
}
3039730252
#[doc = "Vector multiply by scalar"]
3039830253
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vmul_n_f16)"]

0 commit comments

Comments
 (0)