Skip to content

Commit fa2257c

Browse files
committed
Change implementation of vqdmull{_high}{_lane}
1 parent 784a593 commit fa2257c

2 files changed

Lines changed: 51 additions & 71 deletions

File tree

crates/core_arch/src/aarch64/neon/generated.rs

Lines changed: 28 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -14849,11 +14849,9 @@ pub fn vqdmulhs_laneq_s32<const N: i32>(a: i32, b: int32x4_t) -> i32 {
1484914849
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1485014850
pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x4_t {
1485114851
static_assert_uimm_bits!(N, 2);
14852-
unsafe {
14853-
let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
14854-
let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
14855-
vqdmull_s16(a, b)
14856-
}
14852+
let a = vget_high_s16(a);
14853+
let b = vdup_lane_s16::<N>(b);
14854+
vqdmull_s16(a, b)
1485714855
}
1485814856
#[doc = "Signed saturating doubling multiply long"]
1485914857
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s32)"]
@@ -14864,11 +14862,9 @@ pub fn vqdmull_high_lane_s16<const N: i32>(a: int16x8_t, b: int16x4_t) -> int32x
1486414862
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1486514863
pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64x2_t {
1486614864
static_assert_uimm_bits!(N, 2);
14867-
unsafe {
14868-
let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
14869-
let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
14870-
vqdmull_s32(a, b)
14871-
}
14865+
let a = vget_high_s32(a);
14866+
let b = vdup_laneq_s32::<N>(b);
14867+
vqdmull_s32(a, b)
1487214868
}
1487314869
#[doc = "Signed saturating doubling multiply long"]
1487414870
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_lane_s32)"]
@@ -14879,11 +14875,9 @@ pub fn vqdmull_high_laneq_s32<const N: i32>(a: int32x4_t, b: int32x4_t) -> int64
1487914875
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1488014876
pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x2_t {
1488114877
static_assert_uimm_bits!(N, 1);
14882-
unsafe {
14883-
let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
14884-
let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
14885-
vqdmull_s32(a, b)
14886-
}
14878+
let a = vget_high_s32(a);
14879+
let b = vdup_lane_s32::<N>(b);
14880+
vqdmull_s32(a, b)
1488714881
}
1488814882
#[doc = "Signed saturating doubling multiply long"]
1488914883
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_laneq_s16)"]
@@ -14894,11 +14888,9 @@ pub fn vqdmull_high_lane_s32<const N: i32>(a: int32x4_t, b: int32x2_t) -> int64x
1489414888
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1489514889
pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32x4_t {
1489614890
static_assert_uimm_bits!(N, 3);
14897-
unsafe {
14898-
let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
14899-
let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
14900-
vqdmull_s16(a, b)
14901-
}
14891+
let a = vget_high_s16(a);
14892+
let b = vdup_laneq_s16::<N>(b);
14893+
vqdmull_s16(a, b)
1490214894
}
1490314895
#[doc = "Signed saturating doubling multiply long"]
1490414896
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s16)"]
@@ -14907,11 +14899,9 @@ pub fn vqdmull_high_laneq_s16<const N: i32>(a: int16x8_t, b: int16x8_t) -> int32
1490714899
#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
1490814900
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1490914901
pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
14910-
unsafe {
14911-
let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
14912-
let b: int16x4_t = vdup_n_s16(b);
14913-
vqdmull_s16(a, b)
14914-
}
14902+
let a = vget_high_s16(a);
14903+
let b = vdup_n_s16(b);
14904+
vqdmull_s16(a, b)
1491514905
}
1491614906
#[doc = "Signed saturating doubling multiply long"]
1491714907
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_n_s32)"]
@@ -14920,11 +14910,9 @@ pub fn vqdmull_high_n_s16(a: int16x8_t, b: i16) -> int32x4_t {
1492014910
#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
1492114911
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1492214912
pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
14923-
unsafe {
14924-
let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
14925-
let b: int32x2_t = vdup_n_s32(b);
14926-
vqdmull_s32(a, b)
14927-
}
14913+
let a = vget_high_s32(a);
14914+
let b = vdup_n_s32(b);
14915+
vqdmull_s32(a, b)
1492814916
}
1492914917
#[doc = "Signed saturating doubling multiply long"]
1493014918
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s16)"]
@@ -14933,11 +14921,9 @@ pub fn vqdmull_high_n_s32(a: int32x4_t, b: i32) -> int64x2_t {
1493314921
#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
1493414922
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1493514923
pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
14936-
unsafe {
14937-
let a: int16x4_t = simd_shuffle!(a, a, [4, 5, 6, 7]);
14938-
let b: int16x4_t = simd_shuffle!(b, b, [4, 5, 6, 7]);
14939-
vqdmull_s16(a, b)
14940-
}
14924+
let a = vget_high_s16(a);
14925+
let b = vget_high_s16(b);
14926+
vqdmull_s16(a, b)
1494114927
}
1494214928
#[doc = "Signed saturating doubling multiply long"]
1494314929
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_high_s32)"]
@@ -14946,11 +14932,9 @@ pub fn vqdmull_high_s16(a: int16x8_t, b: int16x8_t) -> int32x4_t {
1494614932
#[cfg_attr(all(test, target_endian = "little"), assert_instr(sqdmull2))]
1494714933
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1494814934
pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
14949-
unsafe {
14950-
let a: int32x2_t = simd_shuffle!(a, a, [2, 3]);
14951-
let b: int32x2_t = simd_shuffle!(b, b, [2, 3]);
14952-
vqdmull_s32(a, b)
14953-
}
14935+
let a = vget_high_s32(a);
14936+
let b = vget_high_s32(b);
14937+
vqdmull_s32(a, b)
1495414938
}
1495514939
#[doc = "Vector saturating doubling long multiply by scalar"]
1495614940
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s16)"]
@@ -14961,10 +14945,8 @@ pub fn vqdmull_high_s32(a: int32x4_t, b: int32x4_t) -> int64x2_t {
1496114945
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1496214946
pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t {
1496314947
static_assert_uimm_bits!(N, 3);
14964-
unsafe {
14965-
let b: int16x4_t = simd_shuffle!(b, b, [N as u32, N as u32, N as u32, N as u32]);
14966-
vqdmull_s16(a, b)
14967-
}
14948+
let b = vdup_laneq_s16::<N>(b);
14949+
vqdmull_s16(a, b)
1496814950
}
1496914951
#[doc = "Vector saturating doubling long multiply by scalar"]
1497014952
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmull_laneq_s32)"]
@@ -14975,10 +14957,8 @@ pub fn vqdmull_laneq_s16<const N: i32>(a: int16x4_t, b: int16x8_t) -> int32x4_t
1497514957
#[stable(feature = "neon_intrinsics", since = "1.59.0")]
1497614958
pub fn vqdmull_laneq_s32<const N: i32>(a: int32x2_t, b: int32x4_t) -> int64x2_t {
1497714959
static_assert_uimm_bits!(N, 2);
14978-
unsafe {
14979-
let b: int32x2_t = simd_shuffle!(b, b, [N as u32, N as u32]);
14980-
vqdmull_s32(a, b)
14981-
}
14960+
let b = vdup_laneq_s32::<N>(b);
14961+
vqdmull_s32(a, b)
1498214962
}
1498314963
#[doc = "Signed saturating doubling multiply long"]
1498414964
#[doc = "[Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/vqdmullh_lane_s16)"]

crates/stdarch-gen-arm/spec/neon/aarch64.spec.yml

Lines changed: 23 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -6963,11 +6963,11 @@ intrinsics:
69636963
- *neon-stable
69646964
safety: safe
69656965
types:
6966-
- [int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]']
6967-
- [int32x4_t, int64x2_t, int32x2_t, '[2, 3]']
6966+
- [int16x8_t, int32x4_t]
6967+
- [int32x4_t, int64x2_t]
69686968
compose:
6969-
- Let: [a, "{neon_type[2]}", {FnCall: [simd_shuffle!, [a, a, '{type[3]}']]}]
6970-
- Let: [b, "{neon_type[2]}", {FnCall: [simd_shuffle!, [b, b, '{type[3]}']]}]
6969+
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
6970+
- Let: [b, {FnCall: ['vget_high_{neon_type[0]}', [b]]}]
69716971
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
69726972

69736973
- name: "vqdmull_high_n_{type[1]}"
@@ -6979,11 +6979,11 @@ intrinsics:
69796979
- *neon-stable
69806980
safety: safe
69816981
types:
6982-
- [int16x8_t, "i16", int32x4_t, int16x4_t, '[4, 5, 6, 7]']
6983-
- [int32x4_t, "i32", int64x2_t, int32x2_t, '[2, 3]']
6982+
- [int16x8_t, "i16", int32x4_t]
6983+
- [int32x4_t, "i32", int64x2_t]
69846984
compose:
6985-
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
6986-
- Let: [b, "{neon_type[3]}", {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
6985+
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
6986+
- Let: [b, {FnCall: ["vdup_n{neon_type[0].noq}", [b]]}]
69876987
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
69886988

69896989
- name: "vqdmull{type[3]}"
@@ -7038,7 +7038,7 @@ intrinsics:
70387038
- Let: [b, "{type[0]}", {FnCall: ['vget{neon_type[1].lane_nox}', [b], [N]]}]
70397039
- FnCall: ["vqdmulls_s32", [a, b]]
70407040

7041-
- name: "vqdmull{type[6]}"
7041+
- name: "vqdmull{type[3]}"
70427042
doc: "Signed saturating doubling multiply long"
70437043
arguments: ["a: {neon_type[0]}", "b: {neon_type[1]}"]
70447044
return_type: "{neon_type[2]}"
@@ -7049,12 +7049,12 @@ intrinsics:
70497049
static_defs: ['const N: i32']
70507050
safety: safe
70517051
types:
7052-
- [int16x8_t, int16x4_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]', '_high_lane_s16']
7053-
- [int32x4_t, int32x4_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]', '_high_laneq_s32']
7052+
- [int16x8_t, int16x4_t, int32x4_t, '_high_lane_s16']
7053+
- [int32x4_t, int32x4_t, int64x2_t, '_high_laneq_s32']
70547054
compose:
70557055
- FnCall: [static_assert_uimm_bits!, [N, '2']]
7056-
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
7057-
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
7056+
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
7057+
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
70587058
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
70597059

70607060
- name: "vqdmull_high_lane_s32"
@@ -7068,11 +7068,11 @@ intrinsics:
70687068
static_defs: ['const N: i32']
70697069
safety: safe
70707070
types:
7071-
- [int32x4_t, int32x2_t, int64x2_t, int32x2_t, '[2, 3]', '[N as u32, N as u32]']
7071+
- [int32x4_t, int32x2_t, int64x2_t]
70727072
compose:
70737073
- FnCall: [static_assert_uimm_bits!, [N, '1']]
7074-
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
7075-
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
7074+
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
7075+
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
70767076
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
70777077

70787078
- name: "vqdmull_high_laneq_s16"
@@ -7086,11 +7086,11 @@ intrinsics:
70867086
static_defs: ['const N: i32']
70877087
safety: safe
70887088
types:
7089-
- [int16x8_t, int16x8_t, int32x4_t, int16x4_t, '[4, 5, 6, 7]', '[N as u32, N as u32, N as u32, N as u32]']
7089+
- [int16x8_t, int16x8_t, int32x4_t]
70907090
compose:
70917091
- FnCall: [static_assert_uimm_bits!, [N, '3']]
7092-
- Let: [a, "{neon_type[3]}", {FnCall: [simd_shuffle!, [a, a, "{type[4]}"]]}]
7093-
- Let: [b, "{neon_type[3]}", {FnCall: [simd_shuffle!, [b, b, "{type[5]}"]]}]
7092+
- Let: [a, {FnCall: ['vget_high_{neon_type[0]}', [a]]}]
7093+
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
70947094
- FnCall: ["vqdmull{neon_type[0].noq}", [a, b]]
70957095

70967096
- name: "vqdmull_laneq_s16"
@@ -7104,10 +7104,10 @@ intrinsics:
71047104
static_defs: ['const N: i32']
71057105
safety: safe
71067106
types:
7107-
- [int16x4_t, int16x8_t, int32x4_t, '[N as u32, N as u32, N as u32, N as u32]']
7107+
- [int16x4_t, int16x8_t, int32x4_t]
71087108
compose:
71097109
- FnCall: [static_assert_uimm_bits!, [N, '3']]
7110-
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
7110+
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
71117111
- FnCall: [vqdmull_s16, [a, b]]
71127112

71137113
- name: "vqdmull_laneq_s32"
@@ -7121,10 +7121,10 @@ intrinsics:
71217121
static_defs: ['const N: i32']
71227122
safety: safe
71237123
types:
7124-
- [int32x2_t, int32x4_t, int64x2_t, '[N as u32, N as u32]']
7124+
- [int32x2_t, int32x4_t, int64x2_t]
71257125
compose:
71267126
- FnCall: [static_assert_uimm_bits!, [N, '2']]
7127-
- Let: [b, "{neon_type[0]}", {FnCall: [simd_shuffle!, [b, b, "{type[3]}"]]}]
7127+
- Let: [b, {FnCall: ['vdup_lane{neon_type[1].nox}', [b], [N]]}]
71287128
- FnCall: [vqdmull_s32, [a, b]]
71297129

71307130
- name: "vqdmlal{type[4]}"

0 commit comments

Comments
 (0)