Skip to content

Commit 2809d82

Browse files
committed
[Clang][AArch64][SVE2p3][SME2p3] Add intrinsics for v9.7a Two-way signed/unsigned absolute difference sum and accumulate long ops
Add the following new clang intrinsics based on the ACLE specification ARM-software/acle#428 (Add alpha support for 9.7 data processing intrinsics) SABAL (Two-way signed absolute difference sum and accumulate long) - svint16_t svabal[_s16](svint16_t, svint8_t, svint8_t) / svint16_t svabal[_n_s16](svint16_t, svint8_t, int8_t) - svint32_t svabal[_s32](svint32_t, svint16_t, svint16_t) / svint32_t svabal[_n_s32](svint32_t, svint16_t, int16_t) - svint64_t svabal[_s64](svint64_t, svint32_t, svint32_t) / svint64_t svabal[_n_s64](svint64_t, svint32_t, int32_t) UABAL (Two-way unsigned absolute difference sum and accumulate long ) - svuint16_t svabal[_u16](svuint16_t, svuint8_t, svuint8_t) / svuint16_t svabal[_n_u16](svuint16_t, svuint8_t, uint8_t) - svuint32_t svabal[_u32](svuint32_t, svuint16_t, svuint16_t) / svuint32_t svabal[_n_u32](svuint32_t, svuint16_t, uint16_t) - svuint64_t svabal[_u64](svuint64_t, svuint32_t, svuint32_t) / svuint64_t svabal[_n_u64](svuint64_t, svuint32_t, uint32_t)
1 parent bb3d251 commit 2809d82

8 files changed

Lines changed: 757 additions & 3 deletions

File tree

clang/include/clang/Basic/arm_sve.td

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1342,6 +1342,17 @@ defm SVRECPE : SInstZPZ<"svrecpe", "Ui", "aarch64_sve_urecpe">;
13421342
defm SVRSQRTE : SInstZPZ<"svrsqrte", "Ui", "aarch64_sve_ursqrte">;
13431343
}
13441344

1345+
////////////////////////////////////////////////////////////////////////////////
1346+
// SVE2.3 - Two-way signed/unsigned absolute difference sum and accumulate long
1347+
1348+
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
1349+
def SVABAL_S : SInst<"svabal[_{d}]", "ddhh", "sil" , MergeNone, "aarch64_sve_sabal", [VerifyRuntimeMode]>;
1350+
def SVABAL_S_N : SInst<"svabal[_n_{d}]", "ddhR", "sil" , MergeNone, "aarch64_sve_sabal", [VerifyRuntimeMode]>;
1351+
1352+
def SVABAL_U : SInst<"svabal[_{d}]", "ddhh", "UsUiUl", MergeNone, "aarch64_sve_uabal", [VerifyRuntimeMode]>;
1353+
def SVABAL_U_N : SInst<"svabal[_n_{d}]", "ddhR", "UsUiUl", MergeNone, "aarch64_sve_uabal", [VerifyRuntimeMode]>;
1354+
}
1355+
13451356
//------------------------------------------------------------------------------
13461357

13471358
multiclass SInstZPZxZ<string name, string types, string pat_v, string pat_n, string m_intrinsic, string x_intrinsic, list<FlagType> flags=[]> {

clang/test/CodeGen/AArch64/sve2p3-intrinsics/acle_sve2p3_svabal.c

Lines changed: 479 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
// NOTE: File has been autogenerated by utils/aarch64_builtins_test_generator.py
2+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sve -target-feature +sve2p3 -verify
3+
// RUN: %clang_cc1 %s -fsyntax-only -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3 -target-feature +sve -verify
4+
// expected-no-diagnostics
5+
6+
// REQUIRES: aarch64-registered-target
7+
8+
#include <arm_sve.h>
9+
10+
// Properties: guard="sve,(sve2p3|sme2p3)" streaming_guard="sme,(sve2p3|sme2p3)" flags="feature-dependent"
11+
12+
void test(void) {
13+
int8_t int8_t_val;
14+
int16_t int16_t_val;
15+
int32_t int32_t_val;
16+
svint8_t svint8_t_val;
17+
svint16_t svint16_t_val;
18+
svint32_t svint32_t_val;
19+
svint64_t svint64_t_val;
20+
svuint8_t svuint8_t_val;
21+
svuint16_t svuint16_t_val;
22+
svuint32_t svuint32_t_val;
23+
svuint64_t svuint64_t_val;
24+
uint8_t uint8_t_val;
25+
uint16_t uint16_t_val;
26+
uint32_t uint32_t_val;
27+
28+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
29+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
30+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
31+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
32+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
33+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
34+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
35+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
36+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
37+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
38+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
39+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
40+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
41+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
42+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
43+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
44+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
45+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
46+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
47+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
48+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
49+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
50+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
51+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
52+
}
53+
54+
void test_streaming(void) __arm_streaming{
55+
int8_t int8_t_val;
56+
int16_t int16_t_val;
57+
int32_t int32_t_val;
58+
svint8_t svint8_t_val;
59+
svint16_t svint16_t_val;
60+
svint32_t svint32_t_val;
61+
svint64_t svint64_t_val;
62+
svuint8_t svuint8_t_val;
63+
svuint16_t svuint16_t_val;
64+
svuint32_t svuint32_t_val;
65+
svuint64_t svuint64_t_val;
66+
uint8_t uint8_t_val;
67+
uint16_t uint16_t_val;
68+
uint32_t uint32_t_val;
69+
70+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
71+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
72+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
73+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
74+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
75+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
76+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
77+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
78+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
79+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
80+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
81+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
82+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
83+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
84+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
85+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
86+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
87+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
88+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
89+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
90+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
91+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
92+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
93+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
94+
}
95+
96+
void test_streaming_compatible(void) __arm_streaming_compatible{
97+
int8_t int8_t_val;
98+
int16_t int16_t_val;
99+
int32_t int32_t_val;
100+
svint8_t svint8_t_val;
101+
svint16_t svint16_t_val;
102+
svint32_t svint32_t_val;
103+
svint64_t svint64_t_val;
104+
svuint8_t svuint8_t_val;
105+
svuint16_t svuint16_t_val;
106+
svuint32_t svuint32_t_val;
107+
svuint64_t svuint64_t_val;
108+
uint8_t uint8_t_val;
109+
uint16_t uint16_t_val;
110+
uint32_t uint32_t_val;
111+
112+
svabal(svint16_t_val, svint8_t_val, int8_t_val);
113+
svabal(svint16_t_val, svint8_t_val, svint8_t_val);
114+
svabal(svint32_t_val, svint16_t_val, int16_t_val);
115+
svabal(svint32_t_val, svint16_t_val, svint16_t_val);
116+
svabal(svint64_t_val, svint32_t_val, int32_t_val);
117+
svabal(svint64_t_val, svint32_t_val, svint32_t_val);
118+
svabal(svuint16_t_val, svuint8_t_val, svuint8_t_val);
119+
svabal(svuint16_t_val, svuint8_t_val, uint8_t_val);
120+
svabal(svuint32_t_val, svuint16_t_val, svuint16_t_val);
121+
svabal(svuint32_t_val, svuint16_t_val, uint16_t_val);
122+
svabal(svuint64_t_val, svuint32_t_val, svuint32_t_val);
123+
svabal(svuint64_t_val, svuint32_t_val, uint32_t_val);
124+
svabal_n_s16(svint16_t_val, svint8_t_val, int8_t_val);
125+
svabal_n_s32(svint32_t_val, svint16_t_val, int16_t_val);
126+
svabal_n_s64(svint64_t_val, svint32_t_val, int32_t_val);
127+
svabal_n_u16(svuint16_t_val, svuint8_t_val, uint8_t_val);
128+
svabal_n_u32(svuint32_t_val, svuint16_t_val, uint16_t_val);
129+
svabal_n_u64(svuint64_t_val, svuint32_t_val, uint32_t_val);
130+
svabal_s16(svint16_t_val, svint8_t_val, svint8_t_val);
131+
svabal_s32(svint32_t_val, svint16_t_val, svint16_t_val);
132+
svabal_s64(svint64_t_val, svint32_t_val, svint32_t_val);
133+
svabal_u16(svuint16_t_val, svuint8_t_val, svuint8_t_val);
134+
svabal_u32(svuint32_t_val, svuint16_t_val, svuint16_t_val);
135+
svabal_u64(svuint64_t_val, svuint32_t_val, svuint32_t_val);
136+
}
Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
// REQUIRES: aarch64-registered-target
2+
3+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -verify -verify-ignore-unexpected=error,note -emit-llvm -o - %s
4+
// RUN: %clang_cc1 -DSVE_OVERLOADED_FORMS -triple aarch64-none-linux-gnu -target-feature +sve -verify=overload -verify-ignore-unexpected=error,note -emit-llvm -o - %s
5+
#include <arm_sve.h>
6+
7+
#ifdef SVE_OVERLOADED_FORMS
8+
// A simple used,unused... macro, long enough to represent any SVE builtin.
9+
#define SVE_ACLE_FUNC(A1,A2_UNUSED,A3_UNUSED) A1
10+
#else
11+
#define SVE_ACLE_FUNC(A1,A2,A3) A1##A2##A3
12+
#endif
13+
14+
void test_svabal(int8_t s8, int16_t s16, int32_t s32, uint8_t u8, uint16_t u16, uint32_t u32)
15+
{
16+
// expected-error@+2 {{'svabal_s64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
17+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
18+
SVE_ACLE_FUNC(svabal,,_s64)(svundef_s64(), svundef_s32(), svundef_s32());
19+
20+
// expected-error@+2 {{'svabal_n_s64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
21+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
22+
SVE_ACLE_FUNC(svabal,_n,_s64)(svundef_s64(), svundef_s32(), s32);
23+
24+
// expected-error@+2 {{'svabal_s32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
25+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
26+
SVE_ACLE_FUNC(svabal,,_s32)(svundef_s32(), svundef_s16(), svundef_s16());
27+
28+
// expected-error@+2 {{'svabal_n_s32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
29+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
30+
SVE_ACLE_FUNC(svabal,_n,_s32)(svundef_s32(), svundef_s16(), s16);
31+
32+
// expected-error@+2 {{'svabal_s16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
33+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
34+
SVE_ACLE_FUNC(svabal,,_s16)(svundef_s16(), svundef_s8(), svundef_s8());
35+
36+
// expected-error@+2 {{'svabal_n_s16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
37+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
38+
SVE_ACLE_FUNC(svabal,_n,_s16)(svundef_s16(), svundef_s8(), s8);
39+
40+
// expected-error@+2 {{'svabal_u64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
41+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
42+
SVE_ACLE_FUNC(svabal,,_u64)(svundef_u64(), svundef_u32(), svundef_u32());
43+
44+
// expected-error@+2 {{'svabal_n_u64' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
45+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
46+
SVE_ACLE_FUNC(svabal,_n,_u64)(svundef_u64(), svundef_u32(), u32);
47+
48+
// expected-error@+2 {{'svabal_u32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
49+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
50+
SVE_ACLE_FUNC(svabal,,_u32)(svundef_u32(), svundef_u16(), svundef_u16());
51+
52+
// expected-error@+2 {{'svabal_n_u32' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
53+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
54+
SVE_ACLE_FUNC(svabal,_n,_u32)(svundef_u32(), svundef_u16(), u16);
55+
56+
// expected-error@+2 {{'svabal_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
57+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
58+
SVE_ACLE_FUNC(svabal,,_u16)(svundef_u16(), svundef_u8(), svundef_u8());
59+
60+
// expected-error@+2 {{'svabal_n_u16' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
61+
// overload-error@+1 {{'svabal' needs target feature (sve,(sve2p3|sme2p3))|(sme,(sve2p3|sme2p3))}}
62+
SVE_ACLE_FUNC(svabal,_n,_u16)(svundef_u16(), svundef_u8(), u8);
63+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2458,6 +2458,8 @@ def int_aarch64_sve_stnt1_scatter_scalar_offset : AdvSIMD_ScatterStore_VS_Intri
24582458
//
24592459

24602460
def int_aarch64_sve_saba : AdvSIMD_3VectorArg_Intrinsic<[IntrSpeculatable]>;
2461+
def int_aarch64_sve_sabal : SVE2_3VectorArg_Long_Intrinsic<[IntrSpeculatable]>;
2462+
def int_aarch64_sve_uabal : SVE2_3VectorArg_Long_Intrinsic<[IntrSpeculatable]>;
24612463
def int_aarch64_sve_shadd : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24622464
def int_aarch64_sve_shsub : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;
24632465
def int_aarch64_sve_shsub_u : AdvSIMD_Pred2VectorArg_Intrinsic<[IntrSpeculatable]>;

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4841,8 +4841,8 @@ let Predicates = [HasSVE2p3_or_SME2p3] in {
48414841
defm SUBP_ZPmZZ : sve2_int_arith_pred<0b100001, "subp", null_frag>;
48424842

48434843
// SVE2 integer absolute difference and accumulate long
4844-
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal">;
4845-
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal">;
4844+
defm SABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b0, "sabal", int_aarch64_sve_sabal>;
4845+
defm UABAL_ZZZ : sve2_int_two_way_absdiff_accum_long<0b1, "uabal", int_aarch64_sve_uabal>;
48464846

48474847
// SVE2 integer dot product
48484848
def SDOT_ZZZ_BtoH : sve_intx_dot<0b01, 0b00000, 0b0, "sdot", ZPR16, ZPR8>;

llvm/lib/Target/AArch64/SVEInstrFormats.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4695,10 +4695,15 @@ multiclass sve2_int_absdiff_accum_long<bits<2> opc, string asm,
46954695
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _D)>;
46964696
}
46974697

4698-
multiclass sve2_int_two_way_absdiff_accum_long<bit U, string asm> {
4698+
multiclass sve2_int_two_way_absdiff_accum_long<bit U, string asm,
4699+
SDPatternOperator op> {
46994700
def _BtoH : sve2_int_absdiff_accum<0b001, { 0b01, U, 0b1 }, asm, ZPR16, ZPR8>;
47004701
def _HtoS : sve2_int_absdiff_accum<0b010, { 0b01, U, 0b1 }, asm, ZPR32, ZPR16>;
47014702
def _StoD : sve2_int_absdiff_accum<0b011, { 0b01, U, 0b1 }, asm, ZPR64, ZPR32>;
4703+
4704+
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i16, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _BtoH)>;
4705+
def : SVE_3_Op_Pat<nxv4i32, op, nxv4i32, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _HtoS)>;
4706+
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i64, nxv4i32, nxv4i32, !cast<Instruction>(NAME # _StoD)>;
47024707
}
47034708

47044709
multiclass sve2_int_addsub_long_carry<bits<2> opc, string asm,
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
2+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve2p3 -enable-subreg-liveness -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s
4+
; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sme,+sve2p3 -enable-subreg-liveness -force-streaming -verify-machineinstrs < %s | FileCheck %s
5+
6+
define <vscale x 8 x i16> @test_svabal_s16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
7+
; CHECK-LABEL: test_svabal_s16:
8+
; CHECK: // %bb.0:
9+
; CHECK-NEXT: sabal z0.h, z1.b, z1.b
10+
; CHECK-NEXT: ret
11+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.sabal.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zn)
12+
ret <vscale x 8 x i16> %res
13+
}
14+
15+
define <vscale x 4 x i32> @test_svabal_s32(<vscale x 4 x i32> %zda, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
16+
; CHECK-LABEL: test_svabal_s32:
17+
; CHECK: // %bb.0:
18+
; CHECK-NEXT: sabal z0.s, z1.h, z1.h
19+
; CHECK-NEXT: ret
20+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.sabal.nxv4i32(<vscale x 4 x i32> %zda, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zn)
21+
ret <vscale x 4 x i32> %res
22+
}
23+
24+
define <vscale x 2 x i64> @test_svabal_s64(<vscale x 2 x i64> %zda, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
25+
; CHECK-LABEL: test_svabal_s64:
26+
; CHECK: // %bb.0:
27+
; CHECK-NEXT: sabal z0.d, z1.s, z1.s
28+
; CHECK-NEXT: ret
29+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.sabal.nxv2i64(<vscale x 2 x i64> %zda, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zn)
30+
ret <vscale x 2 x i64> %res
31+
}
32+
33+
define <vscale x 8 x i16> @test_svabal_u16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zm) {
34+
; CHECK-LABEL: test_svabal_u16:
35+
; CHECK: // %bb.0:
36+
; CHECK-NEXT: uabal z0.h, z1.b, z1.b
37+
; CHECK-NEXT: ret
38+
%res = call <vscale x 8 x i16> @llvm.aarch64.sve.uabal.nxv8i16(<vscale x 8 x i16> %zda, <vscale x 16 x i8> %zn, <vscale x 16 x i8> %zn)
39+
ret <vscale x 8 x i16> %res
40+
}
41+
42+
define <vscale x 4 x i32> @test_svabal_u32(<vscale x 4 x i32> %zda, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zm) {
43+
; CHECK-LABEL: test_svabal_u32:
44+
; CHECK: // %bb.0:
45+
; CHECK-NEXT: uabal z0.s, z1.h, z1.h
46+
; CHECK-NEXT: ret
47+
%res = call <vscale x 4 x i32> @llvm.aarch64.sve.uabal.nxv4i32(<vscale x 4 x i32> %zda, <vscale x 8 x i16> %zn, <vscale x 8 x i16> %zn)
48+
ret <vscale x 4 x i32> %res
49+
}
50+
51+
define <vscale x 2 x i64> @test_svabal_u64(<vscale x 2 x i64> %zda, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zm) {
52+
; CHECK-LABEL: test_svabal_u64:
53+
; CHECK: // %bb.0:
54+
; CHECK-NEXT: uabal z0.d, z1.s, z1.s
55+
; CHECK-NEXT: ret
56+
%res = call <vscale x 2 x i64> @llvm.aarch64.sve.uabal.nxv2i64(<vscale x 2 x i64> %zda, <vscale x 4 x i32> %zn, <vscale x 4 x i32> %zn)
57+
ret <vscale x 2 x i64> %res
58+
}

0 commit comments

Comments
 (0)