Skip to content

Commit 89f0246

Browse files
authored
[RISCV][CodeGen] Use vzip.vv for e64 interleave shuffles with Zvzip (#199923)
Allow e64 interleave shuffles to use the standard Zvzip `vzip.vv` lowering when the operand type is legal for Zvzip, just the same as what XrivosVizip already does.
1 parent ed11d7a commit 89f0246

3 files changed

Lines changed: 21 additions & 41 deletions

File tree

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5220,10 +5220,13 @@ static bool isLegalVTForZvzipOperand(MVT VT, const RISCVSubtarget &Subtarget) {
52205220
static bool isInterleaveShuffle(ArrayRef<int> Mask, MVT VT, int &EvenSrc,
52215221
int &OddSrc, const RISCVSubtarget &Subtarget) {
52225222
// We need to be able to widen elements to the next larger integer type or
5223-
// use the zip2a instruction at e64.
5224-
if (VT.getScalarSizeInBits() >= Subtarget.getELen() &&
5225-
!Subtarget.hasVendorXRivosVizip())
5226-
return false;
5223+
// use the zip2a/vzip instruction at e64.
5224+
if (VT.getScalarSizeInBits() >= Subtarget.getELen()) {
5225+
if (!Subtarget.hasVendorXRivosVizip() && !Subtarget.hasStdExtZvzip())
5226+
return false;
5227+
if (Subtarget.hasStdExtZvzip() && !isLegalVTForZvzipOperand(VT, Subtarget))
5228+
return false;
5229+
}
52275230

52285231
int Size = Mask.size();
52295232
int NumElts = VT.getVectorNumElements();

llvm/test/CodeGen/RISCV/rvv/fixed-vectors-shuffle-int-interleave.ll

Lines changed: 8 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -129,15 +129,10 @@ define <4 x i64> @interleave_v2i64(<2 x i64> %x, <2 x i64> %y) {
129129
;
130130
; ZVZIP-LABEL: interleave_v2i64:
131131
; ZVZIP: # %bb.0:
132-
; ZVZIP-NEXT: vsetivli zero, 1, e8, m1, ta, ma
132+
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
133133
; ZVZIP-NEXT: vmv1r.v v10, v9
134-
; ZVZIP-NEXT: vmv.v.i v0, 10
135-
; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
136-
; ZVZIP-NEXT: vslideup.vi v12, v10, 1
137-
; ZVZIP-NEXT: vslideup.vi v12, v10, 2
138-
; ZVZIP-NEXT: vmv2r.v v10, v8
139-
; ZVZIP-NEXT: vslideup.vi v10, v8, 1
140-
; ZVZIP-NEXT: vmerge.vvm v8, v10, v12, v0
134+
; ZVZIP-NEXT: vmv1r.v v11, v8
135+
; ZVZIP-NEXT: vzip.vv v8, v11, v10
141136
; ZVZIP-NEXT: ret
142137
%a = shufflevector <2 x i64> %x, <2 x i64> %y, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
143138
ret <4 x i64> %a
@@ -1088,15 +1083,11 @@ define <4 x i64> @unary_interleave_v4i64(<4 x i64> %x) {
10881083
;
10891084
; ZVZIP-LABEL: unary_interleave_v4i64:
10901085
; ZVZIP: # %bb.0:
1091-
; ZVZIP-NEXT: lui a0, 12304
1092-
; ZVZIP-NEXT: addi a0, a0, 512
1093-
; ZVZIP-NEXT: vsetivli zero, 4, e32, m1, ta, ma
1094-
; ZVZIP-NEXT: vmv.s.x v10, a0
1095-
; ZVZIP-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1096-
; ZVZIP-NEXT: vsext.vf2 v12, v10
1097-
; ZVZIP-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1098-
; ZVZIP-NEXT: vrgatherei16.vv v10, v8, v12
1099-
; ZVZIP-NEXT: vmv.v.v v8, v10
1086+
; ZVZIP-NEXT: vsetivli zero, 2, e64, m2, ta, ma
1087+
; ZVZIP-NEXT: vslidedown.vi v12, v8, 2
1088+
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
1089+
; ZVZIP-NEXT: vzip.vv v10, v8, v12
1090+
; ZVZIP-NEXT: vmv2r.v v8, v10
11001091
; ZVZIP-NEXT: ret
11011092
%a = shufflevector <4 x i64> %x, <4 x i64> poison, <4 x i32> <i32 0, i32 2, i32 1, i32 3>
11021093
ret <4 x i64> %a

llvm/test/CodeGen/RISCV/rvv/vector-interleave-fixed.ll

Lines changed: 6 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -200,17 +200,10 @@ define <4 x i64> @vector_interleave_v4i64_v2i64(<2 x i64> %a, <2 x i64> %b) {
200200
;
201201
; ZVZIP-LABEL: vector_interleave_v4i64_v2i64:
202202
; ZVZIP: # %bb.0:
203-
; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
203+
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
204204
; ZVZIP-NEXT: vmv1r.v v10, v9
205-
; ZVZIP-NEXT: lui a0, 12304
206-
; ZVZIP-NEXT: addi a0, a0, 512
207-
; ZVZIP-NEXT: vslideup.vi v8, v10, 2
208-
; ZVZIP-NEXT: vmv.s.x v10, a0
209-
; ZVZIP-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
210-
; ZVZIP-NEXT: vsext.vf2 v12, v10
211-
; ZVZIP-NEXT: vsetvli zero, zero, e64, m2, ta, ma
212-
; ZVZIP-NEXT: vrgatherei16.vv v10, v8, v12
213-
; ZVZIP-NEXT: vmv.v.v v8, v10
205+
; ZVZIP-NEXT: vmv1r.v v11, v8
206+
; ZVZIP-NEXT: vzip.vv v8, v11, v10
214207
; ZVZIP-NEXT: ret
215208
%res = call <4 x i64> @llvm.vector.interleave2.v4i64(<2 x i64> %a, <2 x i64> %b)
216209
ret <4 x i64> %res
@@ -1389,17 +1382,10 @@ define <4 x double> @vector_interleave_v4f64_v2f64(<2 x double> %a, <2 x double>
13891382
;
13901383
; ZVZIP-LABEL: vector_interleave_v4f64_v2f64:
13911384
; ZVZIP: # %bb.0:
1392-
; ZVZIP-NEXT: vsetivli zero, 4, e64, m2, ta, ma
1385+
; ZVZIP-NEXT: vsetivli zero, 2, e64, m1, ta, ma
13931386
; ZVZIP-NEXT: vmv1r.v v10, v9
1394-
; ZVZIP-NEXT: lui a0, 12304
1395-
; ZVZIP-NEXT: addi a0, a0, 512
1396-
; ZVZIP-NEXT: vslideup.vi v8, v10, 2
1397-
; ZVZIP-NEXT: vmv.s.x v10, a0
1398-
; ZVZIP-NEXT: vsetvli zero, zero, e16, mf2, ta, ma
1399-
; ZVZIP-NEXT: vsext.vf2 v12, v10
1400-
; ZVZIP-NEXT: vsetvli zero, zero, e64, m2, ta, ma
1401-
; ZVZIP-NEXT: vrgatherei16.vv v10, v8, v12
1402-
; ZVZIP-NEXT: vmv.v.v v8, v10
1387+
; ZVZIP-NEXT: vmv1r.v v11, v8
1388+
; ZVZIP-NEXT: vzip.vv v8, v11, v10
14031389
; ZVZIP-NEXT: ret
14041390
%res = call <4 x double> @llvm.vector.interleave2.v4f64(<2 x double> %a, <2 x double> %b)
14051391
ret <4 x double> %res

0 commit comments

Comments
 (0)