Skip to content

Commit 02e0d50

Browse files
[SVE] Remove AArch64ISD::ADD_PRED and AArch64ISD::SUB_PRED.
These nodes provide an indirection that is not necessary because SVE has unpredicated add/sub instructions and there's no downside to using them for partial register operations. In fact, the test changes show that unifying how fixed-length and scalable vector add/sub are lowered enables better use of existing isel patterns. Differential Revision: https://reviews.llvm.org/D119355
1 parent 039a88b commit 02e0d50

16 files changed

Lines changed: 265 additions & 338 deletions

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1996,7 +1996,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
19961996
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
19971997
MAKE_CASE(AArch64ISD::ABDS_PRED)
19981998
MAKE_CASE(AArch64ISD::ABDU_PRED)
1999-
MAKE_CASE(AArch64ISD::ADD_PRED)
20001999
MAKE_CASE(AArch64ISD::MUL_PRED)
20012000
MAKE_CASE(AArch64ISD::MULHS_PRED)
20022001
MAKE_CASE(AArch64ISD::MULHU_PRED)
@@ -2006,7 +2005,6 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
20062005
MAKE_CASE(AArch64ISD::SMIN_PRED)
20072006
MAKE_CASE(AArch64ISD::SRA_PRED)
20082007
MAKE_CASE(AArch64ISD::SRL_PRED)
2009-
MAKE_CASE(AArch64ISD::SUB_PRED)
20102008
MAKE_CASE(AArch64ISD::UDIV_PRED)
20112009
MAKE_CASE(AArch64ISD::UMAX_PRED)
20122010
MAKE_CASE(AArch64ISD::UMIN_PRED)
@@ -5240,11 +5238,9 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
52405238
return LowerFixedLengthVectorLoadToSVE(Op, DAG);
52415239
return LowerLOAD(Op, DAG);
52425240
case ISD::ADD:
5243-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::ADD_PRED);
52445241
case ISD::AND:
5245-
return LowerToScalableOp(Op, DAG);
52465242
case ISD::SUB:
5247-
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SUB_PRED);
5243+
return LowerToScalableOp(Op, DAG);
52485244
case ISD::FMAXIMUM:
52495245
return LowerToPredicatedOp(Op, DAG, AArch64ISD::FMAX_PRED);
52505246
case ISD::FMAXNUM:

llvm/lib/Target/AArch64/AArch64ISelLowering.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,6 @@ enum NodeType : unsigned {
7979
// Predicated instructions where inactive lanes produce undefined results.
8080
ABDS_PRED,
8181
ABDU_PRED,
82-
ADD_PRED,
8382
FADD_PRED,
8483
FDIV_PRED,
8584
FMA_PRED,
@@ -98,7 +97,6 @@ enum NodeType : unsigned {
9897
SMIN_PRED,
9998
SRA_PRED,
10099
SRL_PRED,
101-
SUB_PRED,
102100
UDIV_PRED,
103101
UMAX_PRED,
104102
UMIN_PRED,

llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -175,7 +175,6 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
175175
]>;
176176

177177
// Predicated operations with the result of inactive lanes being unspecified.
178-
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
179178
def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
180179
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
181180
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
@@ -194,7 +193,6 @@ def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
194193
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
195194
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
196195
def AArch64smulh_p : SDNode<"AArch64ISD::MULHS_PRED", SDT_AArch64Arith>;
197-
def AArch64sub_p : SDNode<"AArch64ISD::SUB_PRED", SDT_AArch64Arith>;
198196
def AArch64uabd_p : SDNode<"AArch64ISD::ABDU_PRED", SDT_AArch64Arith>;
199197
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
200198
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
@@ -328,9 +326,6 @@ let Predicates = [HasSVEorStreamingSVE] in {
328326
defm ADD_ZPmZ : sve_int_bin_pred_arit_0<0b000, "add", "ADD_ZPZZ", int_aarch64_sve_add, DestructiveBinaryComm>;
329327
defm SUB_ZPmZ : sve_int_bin_pred_arit_0<0b001, "sub", "SUB_ZPZZ", int_aarch64_sve_sub, DestructiveBinaryCommWithRev, "SUBR_ZPmZ">;
330328
defm SUBR_ZPmZ : sve_int_bin_pred_arit_0<0b011, "subr", "SUBR_ZPZZ", int_aarch64_sve_subr, DestructiveBinaryCommWithRev, "SUB_ZPmZ", /*isReverseInstr*/ 1>;
331-
332-
defm ADD_ZPZZ : sve_int_bin_pred_bhsd<AArch64add_p>;
333-
defm SUB_ZPZZ : sve_int_bin_pred_bhsd<AArch64sub_p>;
334329
} // End HasSVEorStreamingSVE
335330

336331
let Predicates = [HasSVEorStreamingSVE, UseExperimentalZeroingPseudos] in {

llvm/test/CodeGen/AArch64/sve-fixed-length-int-arith.ll

Lines changed: 43 additions & 43 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/sve-fixed-length-int-extends.ll

Lines changed: 56 additions & 56 deletions
Large diffs are not rendered by default.

llvm/test/CodeGen/AArch64/sve-fixed-length-int-immediates.ll

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,8 @@ define void @add_v64i8(<64 x i8>* %a) #0 {
1515
; CHECK-LABEL: add_v64i8:
1616
; CHECK: // %bb.0:
1717
; CHECK-NEXT: ptrue p0.b, vl64
18-
; CHECK-NEXT: mov z1.b, #7 // =0x7
1918
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
20-
; CHECK-NEXT: add z0.b, p0/m, z0.b, z1.b
19+
; CHECK-NEXT: add z0.b, z0.b, #7 // =0x7
2120
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
2221
; CHECK-NEXT: ret
2322
%op1 = load <64 x i8>, <64 x i8>* %a
@@ -32,9 +31,8 @@ define void @add_v32i16(<32 x i16>* %a) #0 {
3231
; CHECK-LABEL: add_v32i16:
3332
; CHECK: // %bb.0:
3433
; CHECK-NEXT: ptrue p0.h, vl32
35-
; CHECK-NEXT: mov z1.h, #15 // =0xf
3634
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
37-
; CHECK-NEXT: add z0.h, p0/m, z0.h, z1.h
35+
; CHECK-NEXT: add z0.h, z0.h, #15 // =0xf
3836
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
3937
; CHECK-NEXT: ret
4038
%op1 = load <32 x i16>, <32 x i16>* %a
@@ -49,9 +47,8 @@ define void @add_v16i32(<16 x i32>* %a) #0 {
4947
; CHECK-LABEL: add_v16i32:
5048
; CHECK: // %bb.0:
5149
; CHECK-NEXT: ptrue p0.s, vl16
52-
; CHECK-NEXT: mov z1.s, #31 // =0x1f
5350
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
54-
; CHECK-NEXT: add z0.s, p0/m, z0.s, z1.s
51+
; CHECK-NEXT: add z0.s, z0.s, #31 // =0x1f
5552
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
5653
; CHECK-NEXT: ret
5754
%op1 = load <16 x i32>, <16 x i32>* %a
@@ -66,9 +63,8 @@ define void @add_v8i64(<8 x i64>* %a) #0 {
6663
; CHECK-LABEL: add_v8i64:
6764
; CHECK: // %bb.0:
6865
; CHECK-NEXT: ptrue p0.d, vl8
69-
; CHECK-NEXT: mov z1.d, #63 // =0x3f
7066
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
71-
; CHECK-NEXT: add z0.d, p0/m, z0.d, z1.d
67+
; CHECK-NEXT: add z0.d, z0.d, #63 // =0x3f
7268
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
7369
; CHECK-NEXT: ret
7470
%op1 = load <8 x i64>, <8 x i64>* %a
@@ -719,9 +715,8 @@ define void @sub_v64i8(<64 x i8>* %a) #0 {
719715
; CHECK-LABEL: sub_v64i8:
720716
; CHECK: // %bb.0:
721717
; CHECK-NEXT: ptrue p0.b, vl64
722-
; CHECK-NEXT: mov z1.b, #7 // =0x7
723718
; CHECK-NEXT: ld1b { z0.b }, p0/z, [x0]
724-
; CHECK-NEXT: sub z0.b, p0/m, z0.b, z1.b
719+
; CHECK-NEXT: sub z0.b, z0.b, #7 // =0x7
725720
; CHECK-NEXT: st1b { z0.b }, p0, [x0]
726721
; CHECK-NEXT: ret
727722
%op1 = load <64 x i8>, <64 x i8>* %a
@@ -736,9 +731,8 @@ define void @sub_v32i16(<32 x i16>* %a) #0 {
736731
; CHECK-LABEL: sub_v32i16:
737732
; CHECK: // %bb.0:
738733
; CHECK-NEXT: ptrue p0.h, vl32
739-
; CHECK-NEXT: mov z1.h, #15 // =0xf
740734
; CHECK-NEXT: ld1h { z0.h }, p0/z, [x0]
741-
; CHECK-NEXT: sub z0.h, p0/m, z0.h, z1.h
735+
; CHECK-NEXT: sub z0.h, z0.h, #15 // =0xf
742736
; CHECK-NEXT: st1h { z0.h }, p0, [x0]
743737
; CHECK-NEXT: ret
744738
%op1 = load <32 x i16>, <32 x i16>* %a
@@ -753,9 +747,8 @@ define void @sub_v16i32(<16 x i32>* %a) #0 {
753747
; CHECK-LABEL: sub_v16i32:
754748
; CHECK: // %bb.0:
755749
; CHECK-NEXT: ptrue p0.s, vl16
756-
; CHECK-NEXT: mov z1.s, #31 // =0x1f
757750
; CHECK-NEXT: ld1w { z0.s }, p0/z, [x0]
758-
; CHECK-NEXT: sub z0.s, p0/m, z0.s, z1.s
751+
; CHECK-NEXT: sub z0.s, z0.s, #31 // =0x1f
759752
; CHECK-NEXT: st1w { z0.s }, p0, [x0]
760753
; CHECK-NEXT: ret
761754
%op1 = load <16 x i32>, <16 x i32>* %a
@@ -770,9 +763,8 @@ define void @sub_v8i64(<8 x i64>* %a) #0 {
770763
; CHECK-LABEL: sub_v8i64:
771764
; CHECK: // %bb.0:
772765
; CHECK-NEXT: ptrue p0.d, vl8
773-
; CHECK-NEXT: mov z1.d, #63 // =0x3f
774766
; CHECK-NEXT: ld1d { z0.d }, p0/z, [x0]
775-
; CHECK-NEXT: sub z0.d, p0/m, z0.d, z1.d
767+
; CHECK-NEXT: sub z0.d, z0.d, #63 // =0x3f
776768
; CHECK-NEXT: st1d { z0.d }, p0, [x0]
777769
; CHECK-NEXT: ret
778770
%op1 = load <8 x i64>, <8 x i64>* %a

llvm/test/CodeGen/AArch64/sve-fixed-length-int-reduce.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ define i8 @uaddv_v64i8(<64 x i8>* %a) #0 {
6767
; VBITS_EQ_256-DAG: mov w[[NUMELTS:[0-9]+]], #32
6868
; VBITS_EQ_256-DAG: ld1b { [[LO:z[0-9]+]].b }, [[PG]]/z, [x0]
6969
; VBITS_EQ_256-DAG: ld1b { [[HI:z[0-9]+]].b }, [[PG]]/z, [x0, x[[NUMELTS]]]
70-
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[PG]]/m, [[HI]].b, [[LO]].b
70+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].b, [[LO]].b, [[HI]].b
7171
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].b
7272
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
7373
; VBITS_EQ_256-NEXT: ret
@@ -143,7 +143,7 @@ define i16 @uaddv_v32i16(<32 x i16>* %a) #0 {
143143
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #16
144144
; VBITS_EQ_256-DAG: ld1h { [[LO:z[0-9]+]].h }, [[PG]]/z, [x0]
145145
; VBITS_EQ_256-DAG: ld1h { [[HI:z[0-9]+]].h }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #1]
146-
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[PG]]/m, [[HI]].h, [[LO]].h
146+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].h, [[LO]].h, [[HI]].h
147147
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].h
148148
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
149149
; VBITS_EQ_256-NEXT: ret
@@ -219,7 +219,7 @@ define i32 @uaddv_v16i32(<16 x i32>* %a) #0 {
219219
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #8
220220
; VBITS_EQ_256-DAG: ld1w { [[LO:z[0-9]+]].s }, [[PG]]/z, [x0]
221221
; VBITS_EQ_256-DAG: ld1w { [[HI:z[0-9]+]].s }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #2]
222-
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[PG]]/m, [[HI]].s, [[LO]].s
222+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].s, [[LO]].s, [[HI]].s
223223
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].s
224224
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
225225
; VBITS_EQ_256-NEXT: ret
@@ -295,7 +295,7 @@ define i64 @uaddv_v8i64(<8 x i64>* %a) #0 {
295295
; VBITS_EQ_256-DAG: mov x[[NUMELTS:[0-9]+]], #4
296296
; VBITS_EQ_256-DAG: ld1d { [[LO:z[0-9]+]].d }, [[PG]]/z, [x0]
297297
; VBITS_EQ_256-DAG: ld1d { [[HI:z[0-9]+]].d }, [[PG]]/z, [x0, x[[NUMELTS]], lsl #3]
298-
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[PG]]/m, [[HI]].d, [[LO]].d
298+
; VBITS_EQ_256-DAG: add [[ADD:z[0-9]+]].d, [[LO]].d, [[HI]].d
299299
; VBITS_EQ_256-DAG: addv [[REDUCE:d[0-9]+]], [[PG]], [[ADD]].d
300300
; VBITS_EQ_256-NEXT: fmov x0, [[REDUCE]]
301301
; VBITS_EQ_256-NEXT: ret

0 commit comments

Comments
 (0)