Skip to content

Commit c7a91a8

Browse files
committed
[AArch64]Use sve for bitreverse when available
Speedups after the patch uint8x8_t 1.01x uint16x4_t 0.99x uint32x2_t 2.04x uint64x1_t 2.04x uint8x16_t 1.02x uint16x8_t 1.02x uint32x4_t 2.01x uint64x2_t 2.03x
1 parent 499e6c4 commit c7a91a8

2 files changed

Lines changed: 68 additions & 29 deletions

File tree

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1332,6 +1332,8 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
13321332
setOperationAction(ISD::CTLS, VT, Legal);
13331333
setOperationAction(ISD::BITREVERSE, MVT::v8i8, Legal);
13341334
setOperationAction(ISD::BITREVERSE, MVT::v16i8, Legal);
1335+
setOperationAction(ISD::BITREVERSE, MVT::v4i16, Custom);
1336+
setOperationAction(ISD::BITREVERSE, MVT::v8i16, Custom);
13351337
setOperationAction(ISD::BITREVERSE, MVT::v2i32, Custom);
13361338
setOperationAction(ISD::BITREVERSE, MVT::v4i32, Custom);
13371339
setOperationAction(ISD::BITREVERSE, MVT::v1i64, Custom);
@@ -11821,7 +11823,7 @@ SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
1182111823

1182211824
if (VT.isScalableVector() ||
1182311825
useSVEForFixedLengthVectorVT(
11824-
VT, /*OverrideNEON=*/Subtarget->useSVEForFixedLengthVectors()))
11826+
VT, /*OverrideNEON=*/Subtarget->isSVEorStreamingSVEAvailable()))
1182511827
return LowerToPredicatedOp(Op, DAG, AArch64ISD::BITREVERSE_MERGE_PASSTHRU);
1182611828

1182711829
SDLoc DL(Op);
@@ -11832,6 +11834,20 @@ SDValue AArch64TargetLowering::LowerBitreverse(SDValue Op,
1183211834
default:
1183311835
llvm_unreachable("Invalid type for bitreverse!");
1183411836

11837+
case MVT::v4i16: {
11838+
SDValue Bswap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
11839+
VST = MVT::v8i8;
11840+
REVB = DAG.getBitcast(VST, Bswap);
11841+
break;
11842+
}
11843+
11844+
case MVT::v8i16: {
11845+
SDValue Bswap = DAG.getNode(ISD::BSWAP, DL, VT, Op.getOperand(0));
11846+
VST = MVT::v16i8;
11847+
REVB = DAG.getBitcast(VST, Bswap);
11848+
break;
11849+
}
11850+
1183511851
case MVT::v2i32: {
1183611852
VST = MVT::v8i8;
1183711853
REVB = DAG.getNode(AArch64ISD::REV32, DL, VST, Op.getOperand(0));

llvm/test/CodeGen/AArch64/bitreverse.ll

Lines changed: 51 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -17,8 +17,9 @@ define <2 x i16> @f(<2 x i16> %a) {
1717
;
1818
; SVE-LABEL: f:
1919
; SVE: // %bb.0:
20-
; SVE-NEXT: rev32 v0.8b, v0.8b
21-
; SVE-NEXT: rbit v0.8b, v0.8b
20+
; SVE-NEXT: ptrue p0.s, vl2
21+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
22+
; SVE-NEXT: rbit z0.s, p0/m, z0.s
2223
; SVE-NEXT: ushr v0.2s, v0.2s, #16
2324
; SVE-NEXT: ret
2425
;
@@ -220,8 +221,9 @@ define <4 x i8> @g_vec_4x8(<4 x i8> %a) {
220221
;
221222
; SVE-LABEL: g_vec_4x8:
222223
; SVE: // %bb.0:
223-
; SVE-NEXT: rev16 v0.8b, v0.8b
224-
; SVE-NEXT: rbit v0.8b, v0.8b
224+
; SVE-NEXT: ptrue p0.h, vl4
225+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
226+
; SVE-NEXT: rbit z0.h, p0/m, z0.h
225227
; SVE-NEXT: ushr v0.4h, v0.4h, #8
226228
; SVE-NEXT: ret
227229
;
@@ -263,8 +265,10 @@ define <4 x i16> @g_vec_4x16(<4 x i16> %a) {
263265
;
264266
; SVE-LABEL: g_vec_4x16:
265267
; SVE: // %bb.0:
266-
; SVE-NEXT: rev16 v0.8b, v0.8b
267-
; SVE-NEXT: rbit v0.8b, v0.8b
268+
; SVE-NEXT: ptrue p0.h, vl4
269+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
270+
; SVE-NEXT: rbit z0.h, p0/m, z0.h
271+
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
268272
; SVE-NEXT: ret
269273
%b = call <4 x i16> @llvm.bitreverse.v4i16(<4 x i16> %a)
270274
ret <4 x i16> %b
@@ -281,8 +285,10 @@ define <8 x i16> @g_vec_8x16(<8 x i16> %a) {
281285
;
282286
; SVE-LABEL: g_vec_8x16:
283287
; SVE: // %bb.0:
284-
; SVE-NEXT: rev16 v0.16b, v0.16b
285-
; SVE-NEXT: rbit v0.16b, v0.16b
288+
; SVE-NEXT: ptrue p0.h, vl8
289+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
290+
; SVE-NEXT: rbit z0.h, p0/m, z0.h
291+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
286292
; SVE-NEXT: ret
287293
%b = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> %a)
288294
ret <8 x i16> %b
@@ -301,10 +307,13 @@ define <16 x i16> @g_vec_16x16(<16 x i16> %a) {
301307
;
302308
; SVE-LABEL: g_vec_16x16:
303309
; SVE: // %bb.0:
304-
; SVE-NEXT: rev16 v0.16b, v0.16b
305-
; SVE-NEXT: rev16 v1.16b, v1.16b
306-
; SVE-NEXT: rbit v0.16b, v0.16b
307-
; SVE-NEXT: rbit v1.16b, v1.16b
310+
; SVE-NEXT: ptrue p0.h, vl8
311+
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
312+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
313+
; SVE-NEXT: rbit z0.h, p0/m, z0.h
314+
; SVE-NEXT: rbit z1.h, p0/m, z1.h
315+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
316+
; SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
308317
; SVE-NEXT: ret
309318
%b = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> %a)
310319
ret <16 x i16> %b
@@ -321,8 +330,10 @@ define <2 x i32> @g_vec_2x32(<2 x i32> %a) {
321330
;
322331
; SVE-LABEL: g_vec_2x32:
323332
; SVE: // %bb.0:
324-
; SVE-NEXT: rev32 v0.8b, v0.8b
325-
; SVE-NEXT: rbit v0.8b, v0.8b
333+
; SVE-NEXT: ptrue p0.s, vl2
334+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
335+
; SVE-NEXT: rbit z0.s, p0/m, z0.s
336+
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
326337
; SVE-NEXT: ret
327338
%b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
328339
ret <2 x i32> %b
@@ -339,8 +350,10 @@ define <4 x i32> @g_vec_4x32(<4 x i32> %a) {
339350
;
340351
; SVE-LABEL: g_vec_4x32:
341352
; SVE: // %bb.0:
342-
; SVE-NEXT: rev32 v0.16b, v0.16b
343-
; SVE-NEXT: rbit v0.16b, v0.16b
353+
; SVE-NEXT: ptrue p0.s, vl4
354+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
355+
; SVE-NEXT: rbit z0.s, p0/m, z0.s
356+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
344357
; SVE-NEXT: ret
345358
%b = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> %a)
346359
ret <4 x i32> %b
@@ -359,10 +372,13 @@ define <8 x i32> @g_vec_8x32(<8 x i32> %a) {
359372
;
360373
; SVE-LABEL: g_vec_8x32:
361374
; SVE: // %bb.0:
362-
; SVE-NEXT: rev32 v0.16b, v0.16b
363-
; SVE-NEXT: rev32 v1.16b, v1.16b
364-
; SVE-NEXT: rbit v0.16b, v0.16b
365-
; SVE-NEXT: rbit v1.16b, v1.16b
375+
; SVE-NEXT: ptrue p0.s, vl4
376+
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
377+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
378+
; SVE-NEXT: rbit z0.s, p0/m, z0.s
379+
; SVE-NEXT: rbit z1.s, p0/m, z1.s
380+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
381+
; SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
366382
; SVE-NEXT: ret
367383
%b = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> %a)
368384
ret <8 x i32> %b
@@ -379,8 +395,10 @@ define <1 x i64> @g_vec_1x64(<1 x i64> %a) {
379395
;
380396
; SVE-LABEL: g_vec_1x64:
381397
; SVE: // %bb.0:
382-
; SVE-NEXT: rev64 v0.8b, v0.8b
383-
; SVE-NEXT: rbit v0.8b, v0.8b
398+
; SVE-NEXT: ptrue p0.d, vl1
399+
; SVE-NEXT: // kill: def $d0 killed $d0 def $z0
400+
; SVE-NEXT: rbit z0.d, p0/m, z0.d
401+
; SVE-NEXT: // kill: def $d0 killed $d0 killed $z0
384402
; SVE-NEXT: ret
385403
;
386404
; GISEL-LABEL: g_vec_1x64:
@@ -404,8 +422,10 @@ define <2 x i64> @g_vec_2x64(<2 x i64> %a) {
404422
;
405423
; SVE-LABEL: g_vec_2x64:
406424
; SVE: // %bb.0:
407-
; SVE-NEXT: rev64 v0.16b, v0.16b
408-
; SVE-NEXT: rbit v0.16b, v0.16b
425+
; SVE-NEXT: ptrue p0.d, vl2
426+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
427+
; SVE-NEXT: rbit z0.d, p0/m, z0.d
428+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
409429
; SVE-NEXT: ret
410430
%b = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> %a)
411431
ret <2 x i64> %b
@@ -424,10 +444,13 @@ define <4 x i64> @g_vec_4x64(<4 x i64> %a) {
424444
;
425445
; SVE-LABEL: g_vec_4x64:
426446
; SVE: // %bb.0:
427-
; SVE-NEXT: rev64 v0.16b, v0.16b
428-
; SVE-NEXT: rev64 v1.16b, v1.16b
429-
; SVE-NEXT: rbit v0.16b, v0.16b
430-
; SVE-NEXT: rbit v1.16b, v1.16b
447+
; SVE-NEXT: ptrue p0.d, vl2
448+
; SVE-NEXT: // kill: def $q1 killed $q1 def $z1
449+
; SVE-NEXT: // kill: def $q0 killed $q0 def $z0
450+
; SVE-NEXT: rbit z0.d, p0/m, z0.d
451+
; SVE-NEXT: rbit z1.d, p0/m, z1.d
452+
; SVE-NEXT: // kill: def $q0 killed $q0 killed $z0
453+
; SVE-NEXT: // kill: def $q1 killed $q1 killed $z1
431454
; SVE-NEXT: ret
432455
%b = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> %a)
433456
ret <4 x i64> %b

0 commit comments

Comments
 (0)