Skip to content

Commit 759eecd

Browse files
committed
[AArch64] Add 9.7 data processing intrinsics
Add Clang/LLVM intrinsics for svcvt, scvtflt, ucvtf, ucvtflt and fcvtzsn, fcvtzun. The Clang intrinsics are guarded by the sve2.3 and sme2.3 feature flags. ACLE Patch: ARM-software/acle#428
1 parent a1054ec commit 759eecd

8 files changed

Lines changed: 785 additions & 8 deletions

File tree

clang/include/clang/Basic/arm_sve.td

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -997,6 +997,33 @@ def SVCVTLT_Z_F32_F16 : SInst<"svcvtlt_f32[_f16]", "dPh", "f", MergeZeroExp, "a
997997
def SVCVTLT_Z_F64_F32 : SInst<"svcvtlt_f64[_f32]", "dPh", "d", MergeZeroExp, "aarch64_sve_fcvtlt_f64f32", [IsOverloadNone, VerifyRuntimeMode]>;
998998

999999
}
1000+
1001+
let SVETargetGuard = "sve2p3|sme2p3", SMETargetGuard = "sve2p3|sme2p3" in {
1002+
def SVCVT_S8_F16 : SInst<"svcvt_s8[_f16_x2]", "d2.O", "c", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1003+
def SVCVT_S16_F32 : SInst<"svcvt_s16[_f32_x2]", "d2.M", "s", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1004+
def SVCVT_S32_F64 : SInst<"svcvt_s32[_f64_x2]", "d2.N", "i", MergeNone, "aarch64_sve_fcvtzsn", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1005+
1006+
def SVCVT_U8_F16 : SInst<"svcvt_u8[_f16_x2]", "d2.O", "Uc", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1007+
def SVCVT_U16_F32 : SInst<"svcvt_u16[_f32_x2]", "d2.M", "Us", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1008+
def SVCVT_U32_F64 : SInst<"svcvt_u32[_f64_x2]", "d2.N", "Ui", MergeNone, "aarch64_sve_fcvtzun", [IsOverloadWhileOrMultiVecCvt, VerifyRuntimeMode]>;
1009+
1010+
def SVCVTT_F16_S8 : SInst<"svcvtt_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
1011+
def SVCVTT_F32_S16 : SInst<"svcvtt_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
1012+
def SVCVTT_F64_S32 : SInst<"svcvtt_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
1013+
1014+
def SVCVTT_F16_U8 : SInst<"svcvtt_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtflt_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
1015+
def SVCVTT_F32_U16 : SInst<"svcvtt_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtflt_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
1016+
def SVCVTT_F64_U32 : SInst<"svcvtt_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtflt_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
1017+
1018+
def SVCVTB_F16_S8 : SInst<"svcvtb_f16[_s8]", "Od", "c", MergeNone, "aarch64_sve_scvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
1019+
def SVCVTB_F32_S16 : SInst<"svcvtb_f32[_s16]", "Md", "s", MergeNone, "aarch64_sve_scvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
1020+
def SVCVTB_F64_S32 : SInst<"svcvtb_f64[_s32]", "Nd", "i", MergeNone, "aarch64_sve_scvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
1021+
1022+
def SVCVTB_F16_U8 : SInst<"svcvtb_f16[_u8]", "Od", "Uc", MergeNone, "aarch64_sve_ucvtfb_f16i8", [IsOverloadNone, VerifyRuntimeMode]>;
1023+
def SVCVTB_F32_U16 : SInst<"svcvtb_f32[_u16]", "Md", "Us", MergeNone, "aarch64_sve_ucvtfb_f32i16", [IsOverloadNone, VerifyRuntimeMode]>;
1024+
def SVCVTB_F64_U32 : SInst<"svcvtb_f64[_u32]", "Nd", "Ui", MergeNone, "aarch64_sve_ucvtfb_f64i32", [IsOverloadNone, VerifyRuntimeMode]>;
1025+
}
1026+
10001027
////////////////////////////////////////////////////////////////////////////////
10011028
// Permutations and selection
10021029

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
4+
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
7+
8+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
9+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
10+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
11+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12+
//
13+
// REQUIRES: aarch64-registered-target
14+
15+
#include <arm_sve.h>
16+
17+
#if defined __ARM_FEATURE_SME
18+
#define MODE_ATTR __arm_streaming
19+
#else
20+
#define MODE_ATTR
21+
#endif
22+
23+
// CHECK-LABEL: @test_svcvt_s8_f16_x2(
24+
// CHECK-NEXT: entry:
25+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
26+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
27+
//
28+
// CPP-CHECK-LABEL: @_Z20test_svcvt_s8_f16_x213svfloat16x2_t(
29+
// CPP-CHECK-NEXT: entry:
30+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzsn.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
31+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
32+
//
33+
svint8_t test_svcvt_s8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
34+
return svcvt_s8_f16_x2(zn);
35+
}
36+
37+
// CHECK-LABEL: @test_svcvt_s16_f32_x2(
38+
// CHECK-NEXT: entry:
39+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
40+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
41+
//
42+
// CPP-CHECK-LABEL: @_Z21test_svcvt_s16_f32_x213svfloat32x2_t(
43+
// CPP-CHECK-NEXT: entry:
44+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzsn.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
45+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
46+
//
47+
svint16_t test_svcvt_s16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
48+
return svcvt_s16_f32_x2(zn);
49+
}
50+
51+
// CHECK-LABEL: @test_svcvt_s32_f64_x2(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
54+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
55+
//
56+
// CPP-CHECK-LABEL: @_Z21test_svcvt_s32_f64_x213svfloat64x2_t(
57+
// CPP-CHECK-NEXT: entry:
58+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzsn.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
59+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
60+
//
61+
svint32_t test_svcvt_s32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
62+
return svcvt_s32_f64_x2(zn);
63+
}
64+
65+
// CHECK-LABEL: @test_svcvt_u8_f16_x2(
66+
// CHECK-NEXT: entry:
67+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
68+
// CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
69+
//
70+
// CPP-CHECK-LABEL: @_Z20test_svcvt_u8_f16_x213svfloat16x2_t(
71+
// CPP-CHECK-NEXT: entry:
72+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 16 x i8> @llvm.aarch64.sve.fcvtzun.nxv16i8.nxv8f16(<vscale x 8 x half> [[ZN_COERCE0:%.*]], <vscale x 8 x half> [[ZN_COERCE1:%.*]])
73+
// CPP-CHECK-NEXT: ret <vscale x 16 x i8> [[TMP0]]
74+
//
75+
svuint8_t test_svcvt_u8_f16_x2(svfloat16x2_t zn) MODE_ATTR {
76+
return svcvt_u8_f16_x2(zn);
77+
}
78+
79+
// CHECK-LABEL: @test_svcvt_u16_f32_x2(
80+
// CHECK-NEXT: entry:
81+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
82+
// CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
83+
//
84+
// CPP-CHECK-LABEL: @_Z21test_svcvt_u16_f32_x213svfloat32x2_t(
85+
// CPP-CHECK-NEXT: entry:
86+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x i16> @llvm.aarch64.sve.fcvtzun.nxv8i16.nxv4f32(<vscale x 4 x float> [[ZN_COERCE0:%.*]], <vscale x 4 x float> [[ZN_COERCE1:%.*]])
87+
// CPP-CHECK-NEXT: ret <vscale x 8 x i16> [[TMP0]]
88+
//
89+
svuint16_t test_svcvt_u16_f32_x2(svfloat32x2_t zn) MODE_ATTR {
90+
return svcvt_u16_f32_x2(zn);
91+
}
92+
93+
// CHECK-LABEL: @test_svcvt_u32_f64_x2(
94+
// CHECK-NEXT: entry:
95+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
96+
// CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
97+
//
98+
// CPP-CHECK-LABEL: @_Z21test_svcvt_u32_f64_x213svfloat64x2_t(
99+
// CPP-CHECK-NEXT: entry:
100+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x i32> @llvm.aarch64.sve.fcvtzun.nxv4i32.nxv2f64(<vscale x 2 x double> [[ZN_COERCE0:%.*]], <vscale x 2 x double> [[ZN_COERCE1:%.*]])
101+
// CPP-CHECK-NEXT: ret <vscale x 4 x i32> [[TMP0]]
102+
//
103+
svuint32_t test_svcvt_u32_f64_x2(svfloat64x2_t zn) MODE_ATTR {
104+
return svcvt_u32_f64_x2(zn);
105+
}
Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,189 @@
1+
// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py
2+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
3+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sve -target-feature +sve2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
4+
5+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - %s | FileCheck %s
6+
// RUN: %clang_cc1 -triple aarch64 -target-feature +sme -target-feature +sme2p3 -O1 -Werror -Wall -emit-llvm -o - -x c++ %s | FileCheck %s -check-prefix=CPP-CHECK
7+
8+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -target-feature +sve2p3\
9+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
10+
// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sme -target-feature +sme2p3\
11+
// RUN: -S -disable-O0-optnone -Werror -Wall -o /dev/null %s
12+
//
13+
// REQUIRES: aarch64-registered-target
14+
15+
#include <arm_sve.h>
16+
17+
#if defined __ARM_FEATURE_SME
18+
#define MODE_ATTR __arm_streaming
19+
#else
20+
#define MODE_ATTR
21+
#endif
22+
23+
// CHECK-LABEL: @test_svcvtb_f16_s8(
24+
// CHECK-NEXT: entry:
25+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
26+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
27+
//
28+
// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_s8u10__SVInt8_t(
29+
// CPP-CHECK-NEXT: entry:
30+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
31+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
32+
//
33+
svfloat16_t test_svcvtb_f16_s8(svint8_t zn) MODE_ATTR {
34+
return svcvtb_f16_s8(zn);
35+
}
36+
37+
// CHECK-LABEL: @test_svcvtb_f32_s16(
38+
// CHECK-NEXT: entry:
39+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
40+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
41+
//
42+
// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_s16u11__SVInt16_t(
43+
// CPP-CHECK-NEXT: entry:
44+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
45+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
46+
//
47+
svfloat32_t test_svcvtb_f32_s16(svint16_t zn) MODE_ATTR {
48+
return svcvtb_f32_s16(zn);
49+
}
50+
51+
// CHECK-LABEL: @test_svcvtb_f64_s32(
52+
// CHECK-NEXT: entry:
53+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
54+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
55+
//
56+
// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_s32u11__SVInt32_t(
57+
// CPP-CHECK-NEXT: entry:
58+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
59+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
60+
//
61+
svfloat64_t test_svcvtb_f64_s32(svint32_t zn) MODE_ATTR {
62+
return svcvtb_f64_s32(zn);
63+
}
64+
65+
// CHECK-LABEL: @test_svcvtb_f16_u8(
66+
// CHECK-NEXT: entry:
67+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
68+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
69+
//
70+
// CPP-CHECK-LABEL: @_Z18test_svcvtb_f16_u8u11__SVUint8_t(
71+
// CPP-CHECK-NEXT: entry:
72+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtfb.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
73+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
74+
//
75+
svfloat16_t test_svcvtb_f16_u8(svuint8_t zn) MODE_ATTR {
76+
return svcvtb_f16_u8(zn);
77+
}
78+
79+
// CHECK-LABEL: @test_svcvtb_f32_u16(
80+
// CHECK-NEXT: entry:
81+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
82+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
83+
//
84+
// CPP-CHECK-LABEL: @_Z19test_svcvtb_f32_u16u12__SVUint16_t(
85+
// CPP-CHECK-NEXT: entry:
86+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtfb.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
87+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
88+
//
89+
svfloat32_t test_svcvtb_f32_u16(svuint16_t zn) MODE_ATTR {
90+
return svcvtb_f32_u16(zn);
91+
}
92+
93+
// CHECK-LABEL: @test_svcvtb_f64_u32(
94+
// CHECK-NEXT: entry:
95+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
96+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
97+
//
98+
// CPP-CHECK-LABEL: @_Z19test_svcvtb_f64_u32u12__SVUint32_t(
99+
// CPP-CHECK-NEXT: entry:
100+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtfb.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
101+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
102+
//
103+
svfloat64_t test_svcvtb_f64_u32(svuint32_t zn) MODE_ATTR {
104+
return svcvtb_f64_u32(zn);
105+
}
106+
107+
// CHECK-LABEL: @test_svcvt_f16_s8(
108+
// CHECK-NEXT: entry:
109+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
110+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
111+
//
112+
// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_s8u10__SVInt8_t(
113+
// CPP-CHECK-NEXT: entry:
114+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.scvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
115+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
116+
//
117+
svfloat16_t test_svcvt_f16_s8(svint8_t zn) MODE_ATTR {
118+
return svcvtt_f16_s8(zn);
119+
}
120+
121+
// CHECK-LABEL: @test_svcvt_f32_s16(
122+
// CHECK-NEXT: entry:
123+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
124+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
125+
//
126+
// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_s16u11__SVInt16_t(
127+
// CPP-CHECK-NEXT: entry:
128+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.scvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
129+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
130+
//
131+
svfloat32_t test_svcvt_f32_s16(svint16_t zn) MODE_ATTR {
132+
return svcvtt_f32_s16(zn);
133+
}
134+
135+
// CHECK-LABEL: @test_svcvt_f64_s32(
136+
// CHECK-NEXT: entry:
137+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
138+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
139+
//
140+
// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_s32u11__SVInt32_t(
141+
// CPP-CHECK-NEXT: entry:
142+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.scvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
143+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
144+
//
145+
svfloat64_t test_svcvt_f64_s32(svint32_t zn) MODE_ATTR {
146+
return svcvtt_f64_s32(zn);
147+
}
148+
149+
// CHECK-LABEL: @test_svcvt_f16_u8(
150+
// CHECK-NEXT: entry:
151+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
152+
// CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
153+
//
154+
// CPP-CHECK-LABEL: @_Z17test_svcvt_f16_u8u11__SVUint8_t(
155+
// CPP-CHECK-NEXT: entry:
156+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 8 x half> @llvm.aarch64.sve.ucvtflt.f16i8(<vscale x 16 x i8> [[ZN:%.*]])
157+
// CPP-CHECK-NEXT: ret <vscale x 8 x half> [[TMP0]]
158+
//
159+
svfloat16_t test_svcvt_f16_u8(svuint8_t zn) MODE_ATTR {
160+
return svcvtt_f16_u8(zn);
161+
}
162+
163+
// CHECK-LABEL: @test_svcvt_f32_u16(
164+
// CHECK-NEXT: entry:
165+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
166+
// CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
167+
//
168+
// CPP-CHECK-LABEL: @_Z18test_svcvt_f32_u16u12__SVUint16_t(
169+
// CPP-CHECK-NEXT: entry:
170+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 4 x float> @llvm.aarch64.sve.ucvtflt.f32i16(<vscale x 8 x i16> [[ZN:%.*]])
171+
// CPP-CHECK-NEXT: ret <vscale x 4 x float> [[TMP0]]
172+
//
173+
svfloat32_t test_svcvt_f32_u16(svuint16_t zn) MODE_ATTR {
174+
return svcvtt_f32_u16(zn);
175+
}
176+
177+
// CHECK-LABEL: @test_svcvt_f64_u32(
178+
// CHECK-NEXT: entry:
179+
// CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
180+
// CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
181+
//
182+
// CPP-CHECK-LABEL: @_Z18test_svcvt_f64_u32u12__SVUint32_t(
183+
// CPP-CHECK-NEXT: entry:
184+
// CPP-CHECK-NEXT: [[TMP0:%.*]] = tail call <vscale x 2 x double> @llvm.aarch64.sve.ucvtflt.f64i32(<vscale x 4 x i32> [[ZN:%.*]])
185+
// CPP-CHECK-NEXT: ret <vscale x 2 x double> [[TMP0]]
186+
//
187+
svfloat64_t test_svcvt_f64_u32(svuint32_t zn) MODE_ATTR {
188+
return svcvtt_f64_u32(zn);
189+
}

llvm/include/llvm/IR/IntrinsicsAArch64.td

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1051,6 +1051,7 @@ def llvm_nxv4i1_ty : LLVMType<nxv4i1>;
10511051
def llvm_nxv8i1_ty : LLVMType<nxv8i1>;
10521052
def llvm_nxv16i1_ty : LLVMType<nxv16i1>;
10531053
def llvm_nxv16i8_ty : LLVMType<nxv16i8>;
1054+
def llvm_nxv8i16_ty : LLVMType<nxv8i16>;
10541055
def llvm_nxv4i32_ty : LLVMType<nxv4i32>;
10551056
def llvm_nxv2i64_ty : LLVMType<nxv2i64>;
10561057
def llvm_nxv8f16_ty : LLVMType<nxv8f16>;
@@ -2610,6 +2611,29 @@ def int_aarch64_sve_fmlslb_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
26102611
def int_aarch64_sve_fmlslt : SVE2_3VectorArg_Long_Intrinsic;
26112612
def int_aarch64_sve_fmlslt_lane : SVE2_3VectorArgIndexed_Long_Intrinsic;
26122613

2614+
//
2615+
// SVE2 - Multi-vector narrowing convert to floating point
2616+
//
2617+
2618+
class Builtin_SVCVT_UNPRED<LLVMType OUT, LLVMType IN>
2619+
: DefaultAttrsIntrinsic<[OUT], [IN], [IntrNoMem]>;
2620+
2621+
def int_aarch64_sve_scvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
2622+
def int_aarch64_sve_scvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
2623+
def int_aarch64_sve_scvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
2624+
2625+
def int_aarch64_sve_scvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
2626+
def int_aarch64_sve_scvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
2627+
def int_aarch64_sve_scvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
2628+
2629+
def int_aarch64_sve_ucvtfb_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
2630+
def int_aarch64_sve_ucvtfb_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
2631+
def int_aarch64_sve_ucvtfb_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
2632+
2633+
def int_aarch64_sve_ucvtflt_f16i8: Builtin_SVCVT_UNPRED<llvm_nxv8f16_ty, llvm_nxv16i8_ty>;
2634+
def int_aarch64_sve_ucvtflt_f32i16: Builtin_SVCVT_UNPRED<llvm_nxv4f32_ty, llvm_nxv8i16_ty>;
2635+
def int_aarch64_sve_ucvtflt_f64i32: Builtin_SVCVT_UNPRED<llvm_nxv2f64_ty, llvm_nxv4i32_ty>;
2636+
26132637
//
26142638
// SVE2 - Floating-point integer binary logarithm
26152639
//
@@ -3526,6 +3550,10 @@ let TargetPrefix = "aarch64" in {
35263550
[LLVMSubdivide2VectorType<0>, LLVMSubdivide2VectorType<0>],
35273551
[IntrNoMem]>;
35283552

3553+
class SVE2_CVT_VG2_Single_Intrinsic
3554+
: DefaultAttrsIntrinsic<[llvm_anyvector_ty],
3555+
[llvm_anyvector_ty, LLVMMatchType<1>],
3556+
[IntrNoMem]>;
35293557
//
35303558
// Multi-vector fused multiply-add/subtract
35313559
//
@@ -4053,6 +4081,11 @@ def int_aarch64_sve_famin_u : AdvSIMD_Pred2VectorArg_Intrinsic;
40534081
def int_aarch64_neon_famax : AdvSIMD_2VectorArg_Intrinsic;
40544082
def int_aarch64_neon_famin : AdvSIMD_2VectorArg_Intrinsic;
40554083

4084+
// SVE2.3/SME2.3 - Multi-vector narrowing convert to floating point
4085+
4086+
def int_aarch64_sve_fcvtzsn: SVE2_CVT_VG2_Single_Intrinsic;
4087+
def int_aarch64_sve_fcvtzun: SVE2_CVT_VG2_Single_Intrinsic;
4088+
40564089
//
40574090
// FP8 Intrinsics
40584091
//

0 commit comments

Comments
 (0)