Skip to content

Commit 67deb54

Browse files
authored
[Clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a Two-way signed/unsigned absolute difference sum and accumulate long ops (#188972)
Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics) SABAL (Two-way signed absolute difference sum and accumulate long) - svint16_t svabal[_s16](svint16_t, svint8_t, svint8_t) / svint16_t svabal[_n_s16](svint16_t, svint8_t, int8_t) - svint32_t svabal[_s32](svint32_t, svint16_t, svint16_t) / svint32_t svabal[_n_s32](svint32_t, svint16_t, int16_t) - svint64_t svabal[_s64](svint64_t, svint32_t, svint32_t) / svint64_t svabal[_n_s64](svint64_t, svint32_t, int32_t) UABAL (Two-way unsigned absolute difference sum and accumulate long ) - svuint16_t svabal[_u16](svuint16_t, svuint8_t, svuint8_t) / svuint16_t svabal[_n_u16](svuint16_t, svuint8_t, uint8_t) - svuint32_t svabal[_u32](svuint32_t, svuint16_t, svuint16_t) / svuint32_t svabal[_n_u32](svuint32_t, svuint16_t, uint16_t) - svuint64_t svabal[_u64](svuint64_t, svuint32_t, svuint32_t) / svuint64_t svabal[_n_u64](svuint64_t, svuint32_t, uint32_t)
1 parent 8e8113f commit 67deb54

8 files changed

Lines changed: 759 additions & 3 deletions

File tree

clang/include/clang/Basic/arm_sve.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1350,6 +1350,17 @@ defm SVRECPE : SInstZPZ<"svrecpe", "Ui", "aarch64_sve_urecpe">;
13501350
defm SVRSQRTE : SInstZPZ<"svrsqrte", "Ui", "aarch64_sve_ursqrte">;
13511351
}
13521352

1353+
////////////////////////////////////////////////////////////////////////////////
1354+
// SVE2.3 - Two-way signed/unsigned absolute difference sum and accumulate long
1355+
1356+
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
1357+
def SVABAL_S : SInst<"svabal[_{d}]", "ddhh", "sil" , MergeNone, "aarch64_sve_sabal", [VerifyRuntimeMode]>;
1358+
def SVABAL_S_N : SInst<"svabal[_n_{d}]", "ddhR", "sil" , MergeNone, "aarch64_sve_sabal", [VerifyRuntimeMode]>;
1359+
1360+
def SVABAL_U : SInst<"svabal[_{d}]", "ddhh", "UsUiUl", MergeNone, "aarch64_sve_uabal", [VerifyRuntimeMode]>;
1361+
def SVABAL_U_N : SInst<"svabal[_n_{d}]", "ddhR", "UsUiUl", MergeNone, "aarch64_sve_uabal", [VerifyRuntimeMode]>;
1362+
}
1363+
13531364
//------------------------------------------------------------------------------
13541365

13551366
multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> {

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_svabal.c

Lines changed: 479 additions & 0 deletions
Large diffs are not rendered by default.

clang/test/Sema/AArch64/arm_sve_feature_dependent_sve_AND_LP_sve2p3_OR_sme2p3_RP___sme_AND_LP_sve2p3_OR_sme2p3_RP.c

Lines changed: 138 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@
1010
// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
1111

1212
void test(void) {
13+
int8_t int8_t_val;
14+
int16_t int16_t_val;
15+
int32_t int32_t_val;
1316
svbool_t svbool_t_val;
1417
svint8_t svint8_t_val;
1518
svint16_t svint16_t_val;
@@ -19,7 +22,34 @@ void test(void) {
1922
svuint16_t svuint16_t_val;
2023
svuint32_t svuint32_t_val;
2124
svuint64_t svuint64_t_val;
25+
uint8_t uint8_t_val;
26+
uint16_t uint16_t_val;
27+
uint32_t uint32_t_val;
2228

29+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
30+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
31+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
32+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
33+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
34+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
35+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
36+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
37+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
38+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
39+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
40+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
41+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
42+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
43+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
44+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
45+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
46+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
47+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
48+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
49+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
50+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
51+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
52+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
2353
svaddqp(svint8_t_val, svint8_t_val);
2454
svaddqp(svint16_t_val, svint16_t_val);
2555
svaddqp(svint32_t_val, svint32_t_val);
@@ -62,20 +92,28 @@ void test(void) {
6292
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
6393
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
6494
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
95+
svsubp_s8_z(svbool_t_val, svint8_t_val, svint8_t_val);
6596
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
6697
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
98+
svsubp_s16_z(svbool_t_val, svint16_t_val, svint16_t_val);
6799
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
68100
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
101+
svsubp_s32_z(svbool_t_val, svint32_t_val, svint32_t_val);
69102
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
70103
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
104+
svsubp_s64_z(svbool_t_val, svint64_t_val, svint64_t_val);
71105
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
72106
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
107+
svsubp_u8_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
73108
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
74109
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
110+
svsubp_u16_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
75111
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
76112
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
113+
svsubp_u32_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
77114
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
78115
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
116+
svsubp_u64_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
79117
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
80118
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
81119
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
@@ -84,9 +122,20 @@ void test(void) {
84122
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
85123
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
86124
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
125+
svsubp_z(svbool_t_val, svint8_t_val, svint8_t_val);
126+
svsubp_z(svbool_t_val, svint16_t_val, svint16_t_val);
127+
svsubp_z(svbool_t_val, svint32_t_val, svint32_t_val);
128+
svsubp_z(svbool_t_val, svint64_t_val, svint64_t_val);
129+
svsubp_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
130+
svsubp_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
131+
svsubp_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
132+
svsubp_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
87133
}
88134

89135
void test_streaming(void) __arm_streaming{
136+
int8_t int8_t_val;
137+
int16_t int16_t_val;
138+
int32_t int32_t_val;
90139
svbool_t svbool_t_val;
91140
svint8_t svint8_t_val;
92141
svint16_t svint16_t_val;
@@ -96,7 +145,34 @@ void test_streaming(void) __arm_streaming{
96145
svuint16_t svuint16_t_val;
97146
svuint32_t svuint32_t_val;
98147
svuint64_t svuint64_t_val;
148+
uint8_t uint8_t_val;
149+
uint16_t uint16_t_val;
150+
uint32_t uint32_t_val;
99151

152+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
153+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
154+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
155+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
156+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
157+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
158+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
159+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
160+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
161+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
162+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
163+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
164+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
165+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
166+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
167+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
168+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
169+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
170+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
171+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
172+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
173+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
174+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
175+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
100176
svaddqp(svint8_t_val, svint8_t_val);
101177
svaddqp(svint16_t_val, svint16_t_val);
102178
svaddqp(svint32_t_val, svint32_t_val);
@@ -139,20 +215,28 @@ void test_streaming(void) __arm_streaming{
139215
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
140216
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
141217
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
218+
svsubp_s8_z(svbool_t_val, svint8_t_val, svint8_t_val);
142219
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
143220
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
221+
svsubp_s16_z(svbool_t_val, svint16_t_val, svint16_t_val);
144222
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
145223
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
224+
svsubp_s32_z(svbool_t_val, svint32_t_val, svint32_t_val);
146225
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
147226
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
227+
svsubp_s64_z(svbool_t_val, svint64_t_val, svint64_t_val);
148228
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
149229
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
230+
svsubp_u8_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
150231
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
151232
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
233+
svsubp_u16_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
152234
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
153235
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
236+
svsubp_u32_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
154237
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
155238
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
239+
svsubp_u64_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
156240
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
157241
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
158242
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
@@ -161,9 +245,20 @@ void test_streaming(void) __arm_streaming{
161245
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
162246
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
163247
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
248+
svsubp_z(svbool_t_val, svint8_t_val, svint8_t_val);
249+
svsubp_z(svbool_t_val, svint16_t_val, svint16_t_val);
250+
svsubp_z(svbool_t_val, svint32_t_val, svint32_t_val);
251+
svsubp_z(svbool_t_val, svint64_t_val, svint64_t_val);
252+
svsubp_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
253+
svsubp_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
254+
svsubp_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
255+
svsubp_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
164256
}
165257

166258
void test_streaming_compatible(void) __arm_streaming_compatible{
259+
int8_t int8_t_val;
260+
int16_t int16_t_val;
261+
int32_t int32_t_val;
167262
svbool_t svbool_t_val;
168263
svint8_t svint8_t_val;
169264
svint16_t svint16_t_val;
@@ -173,7 +268,34 @@ void test_streaming_compatible(void) __arm_streaming_compatible{
173268
svuint16_t svuint16_t_val;
174269
svuint32_t svuint32_t_val;
175270
svuint64_t svuint64_t_val;
271+
uint8_t uint8_t_val;
272+
uint16_t uint16_t_val;
273+
uint32_t uint32_t_val;
176274

275+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
276+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
277+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
278+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
279+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
280+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
281+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
282+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
283+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
284+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
285+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
286+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
287+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
288+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
289+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
290+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
291+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
292+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
293+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
294+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
295+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
296+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
297+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
298+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
177299
svaddqp(svint8_t_val, svint8_t_val);
178300
svaddqp(svint16_t_val, svint16_t_val);
179301
svaddqp(svint32_t_val, svint32_t_val);
@@ -216,20 +338,28 @@ void test_streaming_compatible(void) __arm_streaming_compatible{
216338
svsubp_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
217339
svsubp_s8_m(svbool_t_val, svint8_t_val, svint8_t_val);
218340
svsubp_s8_x(svbool_t_val, svint8_t_val, svint8_t_val);
341+
svsubp_s8_z(svbool_t_val, svint8_t_val, svint8_t_val);
219342
svsubp_s16_m(svbool_t_val, svint16_t_val, svint16_t_val);
220343
svsubp_s16_x(svbool_t_val, svint16_t_val, svint16_t_val);
344+
svsubp_s16_z(svbool_t_val, svint16_t_val, svint16_t_val);
221345
svsubp_s32_m(svbool_t_val, svint32_t_val, svint32_t_val);
222346
svsubp_s32_x(svbool_t_val, svint32_t_val, svint32_t_val);
347+
svsubp_s32_z(svbool_t_val, svint32_t_val, svint32_t_val);
223348
svsubp_s64_m(svbool_t_val, svint64_t_val, svint64_t_val);
224349
svsubp_s64_x(svbool_t_val, svint64_t_val, svint64_t_val);
350+
svsubp_s64_z(svbool_t_val, svint64_t_val, svint64_t_val);
225351
svsubp_u8_m(svbool_t_val, svuint8_t_val, svuint8_t_val);
226352
svsubp_u8_x(svbool_t_val, svuint8_t_val, svuint8_t_val);
353+
svsubp_u8_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
227354
svsubp_u16_m(svbool_t_val, svuint16_t_val, svuint16_t_val);
228355
svsubp_u16_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
356+
svsubp_u16_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
229357
svsubp_u32_m(svbool_t_val, svuint32_t_val, svuint32_t_val);
230358
svsubp_u32_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
359+
svsubp_u32_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
231360
svsubp_u64_m(svbool_t_val, svuint64_t_val, svuint64_t_val);
232361
svsubp_u64_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
362+
svsubp_u64_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
233363
svsubp_x(svbool_t_val, svint8_t_val, svint8_t_val);
234364
svsubp_x(svbool_t_val, svint16_t_val, svint16_t_val);
235365
svsubp_x(svbool_t_val, svint32_t_val, svint32_t_val);
@@ -238,4 +368,12 @@ void test_streaming_compatible(void) __arm_streaming_compatible{
238368
svsubp_x(svbool_t_val, svuint16_t_val, svuint16_t_val);
239369
svsubp_x(svbool_t_val, svuint32_t_val, svuint32_t_val);
240370
svsubp_x(svbool_t_val, svuint64_t_val, svuint64_t_val);
371+
svsubp_z(svbool_t_val, svint8_t_val, svint8_t_val);
372+
svsubp_z(svbool_t_val, svint16_t_val, svint16_t_val);
373+
svsubp_z(svbool_t_val, svint32_t_val, svint32_t_val);
374+
svsubp_z(svbool_t_val, svint64_t_val, svint64_t_val);
375+
svsubp_z(svbool_t_val, svuint8_t_val, svuint8_t_val);
376+
svsubp_z(svbool_t_val, svuint16_t_val, svuint16_t_val);
377+
svsubp_z(svbool_t_val, svuint32_t_val, svuint32_t_val);
378+
svsubp_z(svbool_t_val, svuint64_t_val, svuint64_t_val);
241379
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// REQUIRES: aarch64-registered-target
2+
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s
4+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -verify=overload -verify-ignore-unexpected=error,note -emit-llvm -o - %s
5+
#include <arm_sve.h>
6+
7+
#ifdef SVE_OVERLOADED_FORMS
8+
// A simple used,unused... macro, long enough to represent any SVE builtin.
9+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3_UNUSED) A1
10+
#else
11+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
12+
#endif
13+
14+
void test_svabal(int8_t s8, int16_t s16, int32_t s32, uint8_t u8, uint16_t u16, uint32_t u32)
15+
{
16+
// expected-error@+2 {{'svabal_s64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
17+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
18+
SVE_ACLE_FUNC(svabal,,_s64)(svundef_s64(), svundef_s32(), svundef_s32());
19+
20+
// expected-error@+2 {{'svabal_n_s64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
21+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
22+
SVE_ACLE_FUNC(svabal,_n,_s64)(svundef_s64(), svundef_s32(), s32);
23+
24+
// expected-error@+2 {{'svabal_s32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
25+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
26+
SVE_ACLE_FUNC(svabal,,_s32)(svundef_s32(), svundef_s16(), svundef_s16());
27+
28+
// expected-error@+2 {{'svabal_n_s32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
29+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
30+
SVE_ACLE_FUNC(svabal,_n,_s32)(svundef_s32(), svundef_s16(), s16);
31+
32+
// expected-error@+2 {{'svabal_s16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
33+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
34+
SVE_ACLE_FUNC(svabal,,_s16)(svundef_s16(), svundef_s8(), svundef_s8());
35+
36+
// expected-error@+2 {{'svabal_n_s16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
37+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
38+
SVE_ACLE_FUNC(svabal,_n,_s16)(svundef_s16(), svundef_s8(), s8);
39+
40+
// expected-error@+2 {{'svabal_u64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
41+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
42+
SVE_ACLE_FUNC(svabal,,_u64)(svundef_u64(), svundef_u32(), svundef_u32());
43+
44+
// expected-error@+2 {{'svabal_n_u64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
45+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
46+
SVE_ACLE_FUNC(svabal,_n,_u64)(svundef_u64(), svundef_u32(), u32);
47+
48+
// expected-error@+2 {{'svabal_u32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
49+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
50+
SVE_ACLE_FUNC(svabal,,_u32)(svundef_u32(), svundef_u16(), svundef_u16());
51+
52+
// expected-error@+2 {{'svabal_n_u32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
53+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
54+
SVE_ACLE_FUNC(svabal,_n,_u32)(svundef_u32(), svundef_u16(), u16);
55+
56+
// expected-error@+2 {{'svabal_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
57+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
58+
SVE_ACLE_FUNC(svabal,,_u16)(svundef_u16(), svundef_u8(), svundef_u8());
59+
60+
// expected-error@+2 {{'svabal_n_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
61+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
62+
SVE_ACLE_FUNC(svabal,_n,_u16)(svundef_u16(), svundef_u8(), u8);
63+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2458,6 +2458,8 @@ def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intri
24582458
//
24592459

24602460
def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>;
2461+
def int_aarch64_sve_sabal : SVE2_3VectorArg_Long_Intrinsic<[IntrSpeculatable]>;
2462+
def int_aarch64_sve_uabal : SVE2_3VectorArg_Long_Intrinsic<[IntrSpeculatable]>;
24612463
def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24622464
def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24632465
def int_aarch64_sve_shsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4841,8 +4841,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
48414841
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", int_aarch64_sve_subp>;
48424842

48434843
// SVE2 integer absolute difference and accumulate long
4844-
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
4845-
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
4844+
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal", int_aarch64_sve_sabal>;
4845+
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal", int_aarch64_sve_uabal>;
48464846

48474847
// SVE2 integer dot product
48484848
def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;

0 commit comments

Comments
 (0)