Skip to content

Commit 73dd2f3

Browse files
amilendragithub-actions[bot]
authored andcommitted
Automerge: [Clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a add/add-and-subtract/subtract pairwise operations (#187527)
Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics) - ADDQP (Add pairwise within quadword vector segments) - svint8_t svaddqp_s8(svint8_t, svint8_t) / svint8_t svaddqp(svint8_t, svint8_t) - svuint8_t svaddqp_u8(svuint8_t, svuint8_t) / svuint8_t svaddqp(svuint8_t, svuint8_t) - svint16_t svaddqp_s16(svint16_t, svint16_t) / svint16_t svaddqp(svint16_t, svint16_t) - svuint16_t svaddqp_u16(svuint16_t, svuint16_t) / svuint16_t svaddqp(svuint16_t, svuint16_t) - svint32_t svaddqp_s32(svint32_t, svint32_t) / svint32_t svaddqp(svint32_t, svint32_t) - svuint32_t svaddqp_u32(svuint32_t, svuint32_t) / svuint32_t svaddqp(svuint32_t, svuint32_t) - svint64_t svaddqp_s64(svint64_t, svint64_t) / svint64_t svaddqp(svint64_t, svint64_t) - svuint64_t svaddqp_u64(svuint64_t, svuint64_t) / svuint64_t svaddqp(svuint64_t, svuint64_t) - ADDSUBP (Add and subtract pairwise) - svint8_t svaddsubp_s8(svint8_t, svint8_t) / svint8_t svaddsubp(svint8_t, svint8_t) - svuint8_t svaddsubp_u8(svuint8_t, svuint8_t) / svuint8_t svaddsubp(svuint8_t, svuint8_t) - svint16_t svaddsubp_s16(svint16_t, svint16_t) / svint16_t svaddsubp(svint16_t, svint16_t) - svuint16_t svaddsubp_u16(svuint16_t, svuint16_t) / svuint16_t svaddsubp(svuint16_t, svuint16_t) - svint32_t svaddsubp_s32(svint32_t, svint32_t) / svint32_t svaddsubp(svint32_t, svint32_t) - svuint32_t svaddsubp_u32(svuint32_t, svuint32_t) / svuint32_t svaddsubp(svuint32_t, svuint32_t) - svint64_t svaddsubp_s64(svint64_t, svint64_t) / svint64_t svaddsubp(svint64_t, svint64_t) - svuint64_t svaddsubp_u64(svuint64_t, svuint64_t) / svuint64_t svaddsubp(svuint64_t, svuint64_t) - SUBP (Subtract pairwise) - svint8_t svsubp_s8(svbool_t, svint8_t, svint8_t) / svint8_t svsubp(svbool_t, svint8_t, svint8_t) - svuint8_t svsubp_u8(svbool_t, svuint8_t, svuint8_t) / svuint8_t svsubp(svbool_t, svuint8_t, svuint8_t) - svint16_t svsubp_s16(svbool_t, svint16_t, svint16_t) / svint16_t svsubp(svbool_t, svint16_t, svint16_t) - svuint16_t svsubp_u16(svbool_t, svuint16_t, svuint16_t) / svuint16_t svsubp(svbool_t, svuint16_t, svuint16_t) - svint32_t svsubp_s32(svbool_t, svint32_t, svint32_t) / svint32_t svsubp(svbool_t, svint32_t, svint32_t) - svuint32_t svsubp_u32(svbool_t, svuint32_t, svuint32_t) / svuint32_t svsubp(svbool_t, svuint32_t, svuint32_t) - svint64_t svsubp_s64(svbool_t, svint64_t, svint64_t) / svint64_t svsubp(svbool_t, svint64_t, svint64_t) - svuint64_t svsubp_u64(svbool_t, svuint64_t, svuint64_t) / svuint64_t svsubp(svbool_t, svuint64_t, svuint64_t)
2 parents 881fa4e + c6de992 commit 73dd2f3

10 files changed

Lines changed: 1839 additions & 3 deletions

File tree

clang/include/clang/Basic/arm_sve.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,19 @@ defm SVMINP_S : SInstPairwise<"svminp", "csli", "aarch64_sve_sminp", [
14211421
defm SVMINP_U : SInstPairwise<"svminp", "UcUsUiUl", "aarch64_sve_uminp", [VerifyRuntimeMode]>;
14221422
}
14231423

1424+
////////////////////////////////////////////////////////////////////////////////
1425+
// SVE2.3 - Add pairwise within quadword vector segments
1426+
1427+
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
1428+
def SVADDQP : SInst<"svaddqp[_{d}]", "ddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addqp",
1429+
[VerifyRuntimeMode]>;
1430+
def SVADDSUBP : SInst<"svaddsubp[_{d}]", "ddd", "csilUcUsUiUl", MergeNone, "aarch64_sve_addsubp",
1431+
[VerifyRuntimeMode]>;
1432+
def SVSUBP_M : SInst<"svsubp[_{d}]", "dPdd", "csilUcUsUiUl", MergeOp1, "aarch64_sve_subp", [VerifyRuntimeMode]>;
1433+
def SVSUBP_X : SInst<"svsubp[_{d}]", "dPdd", "csilUcUsUiUl", MergeAny, "aarch64_sve_subp", [VerifyRuntimeMode]>;
1434+
def SVSUBP_Z : SInst<"svsubp[_{d}]", "dPdd", "csilUcUsUiUl", MergeZero, "aarch64_sve_subp", [VerifyRuntimeMode]>;
1435+
}
1436+
14241437
////////////////////////////////////////////////////////////////////////////////
14251438
// SVE2 - Widening pairwise arithmetic
14261439

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_addqp.c

Lines changed: 265 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_addsubp.c

Lines changed: 265 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_subp.c

Lines changed: 928 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
2+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
3+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
4+
// expected-no-diagnostics
5+
6+
// REQUIRES: aarch64-registered-target
7+
8+
#include <arm_sve.h>
9+
10+
// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
11+
12+
void test(void) {
13+
svbool_t svbool_t_val;
14+
svint8_t svint8_t_val;
15+
svint16_t svint16_t_val;
16+
svint32_t svint32_t_val;
17+
svint64_t svint64_t_val;
18+
svuint8_t svuint8_t_val;
19+
svuint16_t svuint16_t_val;
20+
svuint32_t svuint32_t_val;
21+
svuint64_t svuint64_t_val;
22+
23+
svaddqp(svint8_t_val, svint8_t_val);
24+
svaddqp(svint16_t_val, svint16_t_val);
25+
svaddqp(svint32_t_val, svint32_t_val);
26+
svaddqp(svint64_t_val, svint64_t_val);
27+
svaddqp(svuint8_t_val, svuint8_t_val);
28+
svaddqp(svuint16_t_val, svuint16_t_val);
29+
svaddqp(svuint32_t_val, svuint32_t_val);
30+
svaddqp(svuint64_t_val, svuint64_t_val);
31+
svaddqp_s8(svint8_t_val, svint8_t_val);
32+
svaddqp_s16(svint16_t_val, svint16_t_val);
33+
svaddqp_s32(svint32_t_val, svint32_t_val);
34+
svaddqp_s64(svint64_t_val, svint64_t_val);
35+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
36+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
37+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
38+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
39+
svaddsubp(svint8_t_val, svint8_t_val);
40+
svaddsubp(svint16_t_val, svint16_t_val);
41+
svaddsubp(svint32_t_val, svint32_t_val);
42+
svaddsubp(svint64_t_val, svint64_t_val);
43+
svaddsubp(svuint8_t_val, svuint8_t_val);
44+
svaddsubp(svuint16_t_val, svuint16_t_val);
45+
svaddsubp(svuint32_t_val, svuint32_t_val);
46+
svaddsubp(svuint64_t_val, svuint64_t_val);
47+
svaddsubp_s8(svint8_t_val, svint8_t_val);
48+
svaddsubp_s16(svint16_t_val, svint16_t_val);
49+
svaddsubp_s32(svint32_t_val, svint32_t_val);
50+
svaddsubp_s64(svint64_t_val, svint64_t_val);
51+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
52+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
53+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
54+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
55+
svsubp_m(svbool_t_val, svint8_t_val, svint8_t_val);
56+
svsubp_m(svbool_t_val, svint16_t_val, svint16_t_val);
57+
svsubp_m(svbool_t_val, svint32_t_val, svint32_t_val);
58+
svsubp_m(svbool_t_val, svint64_t_val, svint64_t_val);
59+
svsubp_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
60+
svsubp_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
61+
svsubp_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
62+
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
63+
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
64+
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
65+
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
66+
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
67+
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
68+
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
69+
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
70+
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
71+
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
72+
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
73+
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
74+
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
75+
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
76+
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
77+
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
78+
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
79+
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
80+
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
81+
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
82+
svsubp_x(svbool_t_val, svint64_t_val, svint64_t_val);
83+
svsubp_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
84+
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
85+
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
86+
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
87+
}
88+
89+
void test_streaming(void) __arm_streaming{
90+
svbool_t svbool_t_val;
91+
svint8_t svint8_t_val;
92+
svint16_t svint16_t_val;
93+
svint32_t svint32_t_val;
94+
svint64_t svint64_t_val;
95+
svuint8_t svuint8_t_val;
96+
svuint16_t svuint16_t_val;
97+
svuint32_t svuint32_t_val;
98+
svuint64_t svuint64_t_val;
99+
100+
svaddqp(svint8_t_val, svint8_t_val);
101+
svaddqp(svint16_t_val, svint16_t_val);
102+
svaddqp(svint32_t_val, svint32_t_val);
103+
svaddqp(svint64_t_val, svint64_t_val);
104+
svaddqp(svuint8_t_val, svuint8_t_val);
105+
svaddqp(svuint16_t_val, svuint16_t_val);
106+
svaddqp(svuint32_t_val, svuint32_t_val);
107+
svaddqp(svuint64_t_val, svuint64_t_val);
108+
svaddqp_s8(svint8_t_val, svint8_t_val);
109+
svaddqp_s16(svint16_t_val, svint16_t_val);
110+
svaddqp_s32(svint32_t_val, svint32_t_val);
111+
svaddqp_s64(svint64_t_val, svint64_t_val);
112+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
113+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
114+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
115+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
116+
svaddsubp(svint8_t_val, svint8_t_val);
117+
svaddsubp(svint16_t_val, svint16_t_val);
118+
svaddsubp(svint32_t_val, svint32_t_val);
119+
svaddsubp(svint64_t_val, svint64_t_val);
120+
svaddsubp(svuint8_t_val, svuint8_t_val);
121+
svaddsubp(svuint16_t_val, svuint16_t_val);
122+
svaddsubp(svuint32_t_val, svuint32_t_val);
123+
svaddsubp(svuint64_t_val, svuint64_t_val);
124+
svaddsubp_s8(svint8_t_val, svint8_t_val);
125+
svaddsubp_s16(svint16_t_val, svint16_t_val);
126+
svaddsubp_s32(svint32_t_val, svint32_t_val);
127+
svaddsubp_s64(svint64_t_val, svint64_t_val);
128+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
129+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
130+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
131+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
132+
svsubp_m(svbool_t_val, svint8_t_val, svint8_t_val);
133+
svsubp_m(svbool_t_val, svint16_t_val, svint16_t_val);
134+
svsubp_m(svbool_t_val, svint32_t_val, svint32_t_val);
135+
svsubp_m(svbool_t_val, svint64_t_val, svint64_t_val);
136+
svsubp_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
137+
svsubp_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
138+
svsubp_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
139+
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
140+
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
141+
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
142+
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
143+
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
144+
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
145+
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
146+
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
147+
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
148+
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
149+
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
150+
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
151+
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
152+
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
153+
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
154+
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
155+
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
156+
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
157+
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
158+
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
159+
svsubp_x(svbool_t_val, svint64_t_val, svint64_t_val);
160+
svsubp_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
161+
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
162+
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
163+
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
164+
}
165+
166+
void test_streaming_compatible(void) __arm_streaming_compatible{
167+
svbool_t svbool_t_val;
168+
svint8_t svint8_t_val;
169+
svint16_t svint16_t_val;
170+
svint32_t svint32_t_val;
171+
svint64_t svint64_t_val;
172+
svuint8_t svuint8_t_val;
173+
svuint16_t svuint16_t_val;
174+
svuint32_t svuint32_t_val;
175+
svuint64_t svuint64_t_val;
176+
177+
svaddqp(svint8_t_val, svint8_t_val);
178+
svaddqp(svint16_t_val, svint16_t_val);
179+
svaddqp(svint32_t_val, svint32_t_val);
180+
svaddqp(svint64_t_val, svint64_t_val);
181+
svaddqp(svuint8_t_val, svuint8_t_val);
182+
svaddqp(svuint16_t_val, svuint16_t_val);
183+
svaddqp(svuint32_t_val, svuint32_t_val);
184+
svaddqp(svuint64_t_val, svuint64_t_val);
185+
svaddqp_s8(svint8_t_val, svint8_t_val);
186+
svaddqp_s16(svint16_t_val, svint16_t_val);
187+
svaddqp_s32(svint32_t_val, svint32_t_val);
188+
svaddqp_s64(svint64_t_val, svint64_t_val);
189+
svaddqp_u8(svuint8_t_val, svuint8_t_val);
190+
svaddqp_u16(svuint16_t_val, svuint16_t_val);
191+
svaddqp_u32(svuint32_t_val, svuint32_t_val);
192+
svaddqp_u64(svuint64_t_val, svuint64_t_val);
193+
svaddsubp(svint8_t_val, svint8_t_val);
194+
svaddsubp(svint16_t_val, svint16_t_val);
195+
svaddsubp(svint32_t_val, svint32_t_val);
196+
svaddsubp(svint64_t_val, svint64_t_val);
197+
svaddsubp(svuint8_t_val, svuint8_t_val);
198+
svaddsubp(svuint16_t_val, svuint16_t_val);
199+
svaddsubp(svuint32_t_val, svuint32_t_val);
200+
svaddsubp(svuint64_t_val, svuint64_t_val);
201+
svaddsubp_s8(svint8_t_val, svint8_t_val);
202+
svaddsubp_s16(svint16_t_val, svint16_t_val);
203+
svaddsubp_s32(svint32_t_val, svint32_t_val);
204+
svaddsubp_s64(svint64_t_val, svint64_t_val);
205+
svaddsubp_u8(svuint8_t_val, svuint8_t_val);
206+
svaddsubp_u16(svuint16_t_val, svuint16_t_val);
207+
svaddsubp_u32(svuint32_t_val, svuint32_t_val);
208+
svaddsubp_u64(svuint64_t_val, svuint64_t_val);
209+
svsubp_m(svbool_t_val, svint8_t_val, svint8_t_val);
210+
svsubp_m(svbool_t_val, svint16_t_val, svint16_t_val);
211+
svsubp_m(svbool_t_val, svint32_t_val, svint32_t_val);
212+
svsubp_m(svbool_t_val, svint64_t_val, svint64_t_val);
213+
svsubp_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
214+
svsubp_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
215+
svsubp_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
216+
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
217+
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
218+
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
219+
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
220+
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
221+
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
222+
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
223+
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
224+
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
225+
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
226+
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
227+
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
228+
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
229+
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
230+
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
231+
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
232+
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
233+
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
234+
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
235+
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
236+
svsubp_x(svbool_t_val, svint64_t_val, svint64_t_val);
237+
svsubp_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
238+
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
239+
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
240+
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
241+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2561,6 +2561,10 @@ def int_aarch64_sve_sminp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable
25612561
def int_aarch64_sve_umaxp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
25622562
def int_aarch64_sve_uminp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
25632563

2564+
def int_aarch64_sve_addqp : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
2565+
def int_aarch64_sve_addsubp : AdvSIMD_2VectorArg_Intrinsic<[IntrSpeculatable]>;
2566+
def int_aarch64_sve_subp : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
2567+
25642568
//
25652569
// SVE2 - Widening pairwise arithmetic
25662570
//

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4834,11 +4834,11 @@ let Predicates = [HasSVE2p2_or_SME2p2] in {
48344834
//===----------------------------------------------------------------------===//
48354835
let Predicates = [HasSVE2p3_or_SME2p3] in {
48364836
// SVE2 Add pairwise within quadword vector segments (unpredicated)
4837-
defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", null_frag>;
4837+
defm ADDQP_ZZZ : sve2_int_mul<0b110, "addqp", int_aarch64_sve_addqp>;
48384838

48394839
// SVE2 Add subtract/subtract pairwise
4840-
defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", null_frag>;
4841-
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>;
4840+
defm ADDSUBP_ZZZ : sve2_int_mul<0b111, "addsubp", int_aarch64_sve_addsubp>;
4841+
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", int_aarch64_sve_subp>;
48424842

48434843
// SVE2 integer absolute difference and accumulate long
48444844
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -force-streaming -verify-machineinstrs < %s | FileCheck %s
5+
6+
define <vscale x 16 x i8> @test_svaddqp_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
7+
; CHECK-LABEL: test_svaddqp_i8:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: addqp z0.b, z0.b, z1.b
10+
; CHECK-NEXT: ret
11+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.addqp.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
12+
ret <vscale x 16 x i8> %res
13+
}
14+
15+
define <vscale x 8 x i16> @test_svaddqp_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
16+
; CHECK-LABEL: test_svaddqp_i16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: addqp z0.h, z0.h, z1.h
19+
; CHECK-NEXT: ret
20+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.addqp.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
21+
ret <vscale x 8 x i16> %res
22+
}
23+
24+
define <vscale x 4 x i32> @test_svaddqp_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
25+
; CHECK-LABEL: test_svaddqp_i32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: addqp z0.s, z0.s, z1.s
28+
; CHECK-NEXT: ret
29+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.addqp.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
30+
ret <vscale x 4 x i32> %res
31+
}
32+
33+
define <vscale x 2 x i64> @test_svaddqp_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
34+
; CHECK-LABEL: test_svaddqp_i64:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: addqp z0.d, z0.d, z1.d
37+
; CHECK-NEXT: ret
38+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.addqp.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
39+
ret <vscale x 2 x i64> %res
40+
}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -force-streaming -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -force-streaming -verify-machineinstrs < %s | FileCheck %s
5+
6+
define <vscale x 16 x i8> @test_addsubp_i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
7+
; CHECK-LABEL: test_addsubp_i8:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: addsubp z0.b, z0.b, z1.b
10+
; CHECK-NEXT: ret
11+
%res = call <vscale x 16 x i8> @llvm.aarch64.sve.addsubp.nxv16i8(<vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm)
12+
ret <vscale x 16 x i8> %res
13+
}
14+
15+
define <vscale x 8 x i16> @test_addsubp_i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
16+
; CHECK-LABEL: test_addsubp_i16:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: addsubp z0.h, z0.h, z1.h
19+
; CHECK-NEXT: ret
20+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.addsubp.nxv8i16(<vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm)
21+
ret <vscale x 8 x i16> %res
22+
}
23+
24+
define <vscale x 4 x i32> @test_addsubp_i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
25+
; CHECK-LABEL: test_addsubp_i32:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: addsubp z0.s, z0.s, z1.s
28+
; CHECK-NEXT: ret
29+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.addsubp.nxv4i32(<vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm)
30+
ret <vscale x 4 x i32> %res
31+
}
32+
33+
define <vscale x 2 x i64> @test_addsubp_i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm) {
34+
; CHECK-LABEL: test_addsubp_i64:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: addsubp z0.d, z0.d, z1.d
37+
; CHECK-NEXT: ret
38+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.addsubp.nxv2i64(<vscale x 2 x i64> %zn, <vscale x 2 x i64> %zm)
39+
ret <vscale x 2 x i64> %res
40+
}

0 commit comments

Comments
 (0)