Skip to content

Commit 074b6be

Browse files
[SLP] Vectorize struct-returning intrinsics
Allow SLP to combine across lanes calls that return a literal struct (llvm.sincos, llvm.*.with.overflow, llvm.frexp, ...) into a single call returning a struct of vectors, by widening {T, T, ...} to {<VF x T>, ...} via VectorTypeUtils and emitting extractvalue + extractelement for external uses. Original Pull Request: llvm#195521 Original Pull Request2: llvm#196756 Recommit after revert llvm#198265 (comment) Added check for valid vectorizable type, small corner cases fixes Reviewers: Pull Request: llvm#199433
1 parent fb9c9eb commit 074b6be

14 files changed

Lines changed: 3775 additions & 3880 deletions

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 553 additions & 187 deletions
Large diffs are not rendered by default.
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=aarch64-pc-windows-gnu -mcpu=cortex-x3 -slp-revec < %s | FileCheck %s
3+
4+
define <vscale x 16 x i1> @test() {
5+
; CHECK-LABEL: define <vscale x 16 x i1> @test(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[VECTOR_PH:.*]]:
8+
; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
9+
; CHECK: [[VECTOR_BODY]]:
10+
; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ]
11+
; CHECK-NEXT: [[ACTIVE_LANE_MASK:%.*]] = phi <vscale x 16 x i1> [ zeroinitializer, %[[VECTOR_PH]] ]
12+
; CHECK-NEXT: ret <vscale x 16 x i1> [[ACTIVE_LANE_MASK]]
13+
;
14+
vector.ph:
15+
br label %vector.body
16+
17+
vector.body:
18+
%index = phi i64 [ 0, %vector.ph ]
19+
%active.lane.mask = phi <vscale x 16 x i1> [ zeroinitializer, %vector.ph ]
20+
ret <vscale x 16 x i1> %active.lane.mask
21+
}
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=amdgcn-amd-amdhsa < %s | FileCheck %s
3+
4+
define void @test() {
5+
; CHECK-LABEL: define void @test() {
6+
; CHECK-NEXT: [[ENTRY:.*]]:
7+
; CHECK-NEXT: br label %[[LAND_RHS_I_I_I1756:.*]]
8+
; CHECK: [[LAND_RHS_I_I_I1756]]:
9+
; CHECK-NEXT: [[VALUE_SROA_8_026_I_I_I1760:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP4:%.*]], %[[LAND_RHS_I_I_I1756]] ]
10+
; CHECK-NEXT: [[VALUE_SROA_12_025_I_I_I1761:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[TMP3:%.*]], %[[LAND_RHS_I_I_I1756]] ]
11+
; CHECK-NEXT: [[SHR_1_I_I_I_I1784:%.*]] = lshr i32 [[VALUE_SROA_8_026_I_I_I1760]], 0
12+
; CHECK-NEXT: [[ADD3_1_I_I_I_I1799:%.*]] = or i32 0, [[SHR_1_I_I_I_I1784]]
13+
; CHECK-NEXT: [[SHR_2_I_I_I_I1786:%.*]] = lshr i32 [[VALUE_SROA_12_025_I_I_I1761]], 0
14+
; CHECK-NEXT: [[ADD3_2_I_I_I_I1801:%.*]] = or i32 [[ADD3_1_I_I_I_I1799]], [[SHR_2_I_I_I_I1786]]
15+
; CHECK-NEXT: [[SHR_5_I_I_I_I1792:%.*]] = lshr i32 0, 0
16+
; CHECK-NEXT: [[ADD3_5_I_I_I_I1807:%.*]] = or i32 [[ADD3_2_I_I_I_I1801]], [[SHR_5_I_I_I_I1792]]
17+
; CHECK-NEXT: [[CMP4_I_I_I_I_I1815:%.*]] = icmp eq i32 [[ADD3_5_I_I_I_I1807]], 0
18+
; CHECK-NEXT: [[TMP0:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
19+
; CHECK-NEXT: [[TMP1:%.*]] = extractvalue { i32, i1 } [[TMP0]], 1
20+
; CHECK-NEXT: [[OR_I40_2_I_I_I_I1917:%.*]] = zext i1 [[TMP1]] to i32
21+
; CHECK-NEXT: [[TMP2:%.*]] = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 [[OR_I40_2_I_I_I_I1917]])
22+
; CHECK-NEXT: [[TMP3]] = extractvalue { i32, i1 } [[TMP2]], 0
23+
; CHECK-NEXT: [[TMP4]] = extractvalue { i32, i1 } [[TMP0]], 0
24+
; CHECK-NEXT: br label %[[LAND_RHS_I_I_I1756]]
25+
;
26+
entry:
27+
br label %land.rhs.i.i.i1756
28+
29+
land.rhs.i.i.i1756:
30+
%value.sroa.8.026.i.i.i1760 = phi i32 [ 0, %entry ], [ %4, %land.rhs.i.i.i1756 ]
31+
%value.sroa.12.025.i.i.i1761 = phi i32 [ 0, %entry ], [ %3, %land.rhs.i.i.i1756 ]
32+
%shr.1.i.i.i.i1784 = lshr i32 %value.sroa.8.026.i.i.i1760, 0
33+
%add3.1.i.i.i.i1799 = or i32 0, %shr.1.i.i.i.i1784
34+
%shr.2.i.i.i.i1786 = lshr i32 %value.sroa.12.025.i.i.i1761, 0
35+
%add3.2.i.i.i.i1801 = or i32 %add3.1.i.i.i.i1799, %shr.2.i.i.i.i1786
36+
%shr.5.i.i.i.i1792 = lshr i32 0, 0
37+
%add3.5.i.i.i.i1807 = or i32 %add3.2.i.i.i.i1801, %shr.5.i.i.i.i1792
38+
%cmp4.i.i.i.i.i1815 = icmp eq i32 %add3.5.i.i.i.i1807, 0
39+
%0 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 0)
40+
%1 = extractvalue { i32, i1 } %0, 1
41+
%or.i40.2.i.i.i.i1917 = zext i1 %1 to i32
42+
%2 = call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 0, i32 %or.i40.2.i.i.i.i1917)
43+
%3 = extractvalue { i32, i1 } %2, 0
44+
%4 = extractvalue { i32, i1 } %0, 0
45+
br label %land.rhs.i.i.i1756
46+
}
47+
48+
declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32)
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 6
2+
; RUN: opt -S --passes=slp-vectorizer -mtriple=riscv64-unknown-unknown-elf -mattr=+v < %s | FileCheck %s
3+
4+
define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test() {
5+
; CHECK-LABEL: define target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @test(
6+
; CHECK-SAME: ) #[[ATTR0:[0-9]+]] {
7+
; CHECK-NEXT: [[BB:.*:]]
8+
; CHECK-NEXT: [[CALL:%.*]] = tail call <vscale x 4 x i64> @llvm.riscv.vand.nxv4i64.i64.i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i64 0, i64 0)
9+
; CHECK-NEXT: [[CALL1:%.*]] = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv32i8_2t.nxv4i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) poison, <vscale x 4 x i64> [[CALL]], i32 0)
10+
; CHECK-NEXT: [[CALL2:%.*]] = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv32i8_2t.nxv4i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) [[CALL1]], <vscale x 4 x i64> zeroinitializer, i32 0)
11+
; CHECK-NEXT: ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) [[CALL2]]
12+
;
13+
bb:
14+
%call = tail call <vscale x 4 x i64> @llvm.riscv.vand.nxv4i64.i64.i64(<vscale x 4 x i64> zeroinitializer, <vscale x 4 x i64> zeroinitializer, i64 0, i64 0)
15+
%call1 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv32i8_2t.nxv4i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) poison, <vscale x 4 x i64> %call, i32 0)
16+
%call2 = tail call target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv32i8_2t.nxv4i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %call1, <vscale x 4 x i64> zeroinitializer, i32 0)
17+
ret target("riscv.vector.tuple", <vscale x 32 x i8>, 2) %call2
18+
}
19+
20+
declare <vscale x 4 x i64> @llvm.riscv.vand.nxv4i64.i64.i64(<vscale x 4 x i64>, <vscale x 4 x i64>, i64, i64)
21+
22+
declare target("riscv.vector.tuple", <vscale x 32 x i8>, 2) @llvm.riscv.tuple.insert.triscv.vector.tuple_nxv32i8_2t.nxv4i64(target("riscv.vector.tuple", <vscale x 32 x i8>, 2), <vscale x 4 x i64>, i32 immarg)

0 commit comments

Comments
 (0)