Skip to content

Commit 06725d7

Browse files
[GISel] Keep non-negative info in SUB(CTLZ) (llvm#189314)
Implement non-negative value tracking for SUB-CTLZ chains in GlobalISel, matching the behavior previously added to SelectionDAG. Additionally, refactor the SelectionDAG implementation from the previous patch to improve performance and code density. Related to llvm#136516 and llvm#186338 (comment)
1 parent 26e0d15 commit 06725d7

15 files changed

Lines changed: 49 additions & 62 deletions

File tree

llvm/lib/CodeGen/GlobalISel/GISelValueTracking.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -299,7 +299,8 @@ void GISelValueTracking::computeKnownBitsImpl(Register R, KnownBits &Known,
299299
Depth + 1);
300300
computeKnownBitsImpl(MI.getOperand(2).getReg(), Known2, DemandedElts,
301301
Depth + 1);
302-
Known = KnownBits::sub(Known, Known2);
302+
Known = KnownBits::sub(Known, Known2, MI.getFlag(MachineInstr::NoSWrap),
303+
MI.getFlag(MachineInstr::NoUWrap));
303304
break;
304305
}
305306
case TargetOpcode::G_XOR: {

llvm/lib/CodeGen/GlobalISel/LegalizerHelper.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2857,8 +2857,13 @@ LegalizerHelper::widenScalar(MachineInstr &MI, unsigned TypeIdx, LLT WideTy) {
28572857
// This is already the correct result for CTPOP and CTTZs
28582858
if (Opcode == TargetOpcode::G_CTLZ || Opcode == TargetOpcode::G_CTLS) {
28592859
// The correct result is NewOp - (Difference in widety and current ty).
2860+
// At this stage SUB is guaranteed to be positive no-wrap,
2861+
// that to be used in further KnownBits optimizations for CTLZ.
28602862
MIBNewOp = MIRBuilder.buildSub(
2861-
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff));
2863+
WideTy, MIBNewOp, MIRBuilder.buildConstant(WideTy, SizeDiff),
2864+
Opcode == TargetOpcode::G_CTLZ
2865+
? std::optional<unsigned>(MachineInstr::NoUWrap)
2866+
: std::nullopt);
28622867
}
28632868

28642869
MIRBuilder.buildZExtOrTrunc(MI.getOperand(0), MIBNewOp);

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -745,17 +745,16 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) {
745745

746746
// At this stage SUB is guaranteed to be positive no-wrap,
747747
// that to be used in further KnownBits optimizations.
748-
SDNodeFlags SubFlags;
749-
SubFlags.setNoUnsignedWrap(true);
750748
if (!N->isVPOpcode())
751749
return DAG.getNode(ISD::SUB, dl, NVT,
752750
DAG.getNode(N->getOpcode(), dl, NVT, Op),
753-
ExtractLeadingBits, SubFlags);
751+
ExtractLeadingBits, SDNodeFlags::NoUnsignedWrap);
754752
SDValue Mask = N->getOperand(1);
755753
SDValue EVL = N->getOperand(2);
756754
return DAG.getNode(ISD::VP_SUB, dl, NVT,
757755
DAG.getNode(N->getOpcode(), dl, NVT, Op, Mask, EVL),
758-
ExtractLeadingBits, Mask, EVL, SubFlags);
756+
ExtractLeadingBits, Mask, EVL,
757+
SDNodeFlags::NoUnsignedWrap);
759758
}
760759
if (CtlzOpcode == ISD::CTLZ_ZERO_UNDEF ||
761760
CtlzOpcode == ISD::VP_CTLZ_ZERO_UNDEF) {

llvm/test/CodeGen/AArch64/GlobalISel/legalize-ctlz.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ body: |
234234
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s64) = G_AND [[COPY]], [[C]]
235235
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s64) = G_CTLZ [[AND]](s64)
236236
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 29
237-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = G_SUB [[CTLZ]], [[C1]]
237+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s64) = nuw G_SUB [[CTLZ]], [[C1]]
238238
; CHECK-NEXT: $x0 = COPY [[SUB]](s64)
239239
; CHECK-NEXT: RET_ReallyLR implicit $x0
240240
%1:_(s64) = COPY $x0
@@ -260,7 +260,7 @@ body: |
260260
; CHECK-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[COPY]], [[C]]
261261
; CHECK-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32)
262262
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
263-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C1]]
263+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[CTLZ]], [[C1]]
264264
; CHECK-NEXT: $w0 = COPY [[SUB]](s32)
265265
; CHECK-NEXT: RET_ReallyLR implicit $w0
266266
%1:_(s32) = COPY $w0

llvm/test/CodeGen/AArch64/cls.ll

Lines changed: 12 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -177,20 +177,12 @@ declare <4 x i32> @llvm.aarch64.neon.cls.v4i32(<4 x i32>) nounwind readnone
177177
; Test ensures that the compiler generates no extra instructions
178178
; for __builtin_clzg output type conversion
179179
define i32 @foo8(i8 %0) {
180-
; CHECK-SD-LABEL: foo8:
181-
; CHECK-SD: // %bb.0:
182-
; CHECK-SD-NEXT: and w8, w0, #0xff
183-
; CHECK-SD-NEXT: clz w8, w8
184-
; CHECK-SD-NEXT: sub w0, w8, #24
185-
; CHECK-SD-NEXT: ret
186-
;
187-
; CHECK-GI-LABEL: foo8:
188-
; CHECK-GI: // %bb.0:
189-
; CHECK-GI-NEXT: and w8, w0, #0xff
190-
; CHECK-GI-NEXT: clz w8, w8
191-
; CHECK-GI-NEXT: sub w8, w8, #24
192-
; CHECK-GI-NEXT: and w0, w8, #0xff
193-
; CHECK-GI-NEXT: ret
180+
; CHECK-LABEL: foo8:
181+
; CHECK: // %bb.0:
182+
; CHECK-NEXT: and w8, w0, #0xff
183+
; CHECK-NEXT: clz w8, w8
184+
; CHECK-NEXT: sub w0, w8, #24
185+
; CHECK-NEXT: ret
194186
%2 = tail call i8 @llvm.ctlz.i8(i8 %0, i1 false)
195187
%3 = zext nneg i8 %2 to i32
196188
ret i32 %3
@@ -199,20 +191,12 @@ define i32 @foo8(i8 %0) {
199191
; Test ensures that the compiler generates no extra instructions
200192
; for __builtin_clzg output type conversion
201193
define i32 @foo16(i16 %0) {
202-
; CHECK-SD-LABEL: foo16:
203-
; CHECK-SD: // %bb.0:
204-
; CHECK-SD-NEXT: and w8, w0, #0xffff
205-
; CHECK-SD-NEXT: clz w8, w8
206-
; CHECK-SD-NEXT: sub w0, w8, #16
207-
; CHECK-SD-NEXT: ret
208-
;
209-
; CHECK-GI-LABEL: foo16:
210-
; CHECK-GI: // %bb.0:
211-
; CHECK-GI-NEXT: and w8, w0, #0xffff
212-
; CHECK-GI-NEXT: clz w8, w8
213-
; CHECK-GI-NEXT: sub w8, w8, #16
214-
; CHECK-GI-NEXT: and w0, w8, #0xffff
215-
; CHECK-GI-NEXT: ret
194+
; CHECK-LABEL: foo16:
195+
; CHECK: // %bb.0:
196+
; CHECK-NEXT: and w8, w0, #0xffff
197+
; CHECK-NEXT: clz w8, w8
198+
; CHECK-NEXT: sub w0, w8, #16
199+
; CHECK-NEXT: ret
216200
%2 = tail call i16 @llvm.ctlz.i16(i16 %0, i1 false)
217201
%3 = zext nneg i16 %2 to i32
218202
ret i32 %3

llvm/test/CodeGen/AArch64/pr61549.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ define i35 @f(i35 %0) {
2323
; GISEL-NEXT: and x8, x8, #0x7ffffffff
2424
; GISEL-NEXT: clz x8, x8
2525
; GISEL-NEXT: sub x8, x8, #29
26-
; GISEL-NEXT: ubfx x0, x8, #5, #30
26+
; GISEL-NEXT: lsr x0, x8, #5
2727
; GISEL-NEXT: ret
2828
%2 = srem i35 1, %0
2929
%3 = call i35 @llvm.ctlz.i35(i35 %2, i1 false)

llvm/test/CodeGen/AMDGPU/GlobalISel/legalize-ctlz.mir

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ body: |
9595
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
9696
; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
9797
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
98-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]]
98+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[UMIN]], [[C2]]
9999
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
100100
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
101101
; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32)
@@ -170,11 +170,11 @@ body: |
170170
; CHECK-NEXT: [[AMDGPU_FFBH_U32_:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[AND]](s32)
171171
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
172172
; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C2]]
173-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C]]
173+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[UMIN]], [[C]]
174174
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
175175
; CHECK-NEXT: [[AMDGPU_FFBH_U32_1:%[0-9]+]]:_(s32) = G_AMDGPU_FFBH_U32 [[LSHR]](s32)
176176
; CHECK-NEXT: [[UMIN1:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_1]], [[C2]]
177-
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[UMIN1]], [[C]]
177+
; CHECK-NEXT: [[SUB1:%[0-9]+]]:_(s32) = nuw G_SUB [[UMIN1]], [[C]]
178178
; CHECK-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY [[SUB1]](s32)
179179
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C1]]
180180
; CHECK-NEXT: [[AND2:%[0-9]+]]:_(s32) = G_AND [[COPY2]], [[C1]]
@@ -204,7 +204,7 @@ body: |
204204
; CHECK-NEXT: [[C1:%[0-9]+]]:_(s32) = G_CONSTANT i32 32
205205
; CHECK-NEXT: [[UMIN:%[0-9]+]]:_(s32) = G_UMIN [[AMDGPU_FFBH_U32_]], [[C1]]
206206
; CHECK-NEXT: [[C2:%[0-9]+]]:_(s32) = G_CONSTANT i32 25
207-
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[UMIN]], [[C2]]
207+
; CHECK-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[UMIN]], [[C2]]
208208
; CHECK-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
209209
; CHECK-NEXT: [[AND1:%[0-9]+]]:_(s32) = G_AND [[COPY1]], [[C]]
210210
; CHECK-NEXT: $vgpr0 = COPY [[AND1]](s32)

llvm/test/CodeGen/AMDGPU/ctlz.ll

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1925,16 +1925,15 @@ define amdgpu_kernel void @v_ctlz_i17_sel_ne_bitwidth(ptr addrspace(1) noalias %
19251925
; GFX10-GISEL: ; %bb.0:
19261926
; GFX10-GISEL-NEXT: s_load_dwordx4 s[0:3], s[4:5], 0x24
19271927
; GFX10-GISEL-NEXT: v_lshlrev_b32_e32 v0, 2, v0
1928+
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
19281929
; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
19291930
; GFX10-GISEL-NEXT: global_load_dword v0, v0, s[2:3]
19301931
; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
19311932
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x1ffff, v0
19321933
; GFX10-GISEL-NEXT: v_ffbh_u32_e32 v0, v0
19331934
; GFX10-GISEL-NEXT: v_min_u32_e32 v0, 32, v0
19341935
; GFX10-GISEL-NEXT: v_add_nc_u32_e32 v0, -15, v0
1935-
; GFX10-GISEL-NEXT: v_and_b32_e32 v1, 0x1ffff, v0
1936-
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 17, v1
1937-
; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, 0
1936+
; GFX10-GISEL-NEXT: v_cmp_ne_u32_e32 vcc_lo, 17, v0
19381937
; GFX10-GISEL-NEXT: v_cndmask_b32_e32 v0, 0x1ffff, v0, vcc_lo
19391938
; GFX10-GISEL-NEXT: v_and_b32_e32 v0, 0x1ffff, v0
19401939
; GFX10-GISEL-NEXT: global_store_short v1, v0, s[0:1]

llvm/test/CodeGen/ARM/GlobalISel/arm-legalize-bitcounts.mir

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -116,7 +116,7 @@ body: |
116116
; LIBCALLS: [[COUNT:%[0-9]+]]:_(s32) = G_SELECT [[CMP]](s1), [[BITS]], [[UNDEFCOUNT]]
117117
; LIBCALLS-NOT: G_CTLZ
118118
; CHECK: [[BITDIFF:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
119-
; CHECK: [[R32:%[0-9]+]]:_(s32) = G_SUB [[COUNT]], [[BITDIFF]]
119+
; CHECK: [[R32:%[0-9]+]]:_(s32) = nuw G_SUB [[COUNT]], [[BITDIFF]]
120120
%2(s16) = G_CTLZ %1
121121
122122
; LIBCALLS: [[SHIFTEDR:%[0-9]+]]:_(s32) = G_SHL [[R32]], [[BITDIFF]]

llvm/test/CodeGen/RISCV/GlobalISel/legalizer/legalize-ctls-rv32.mir

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ body: |
9090
; RV32ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C2]]
9191
; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32)
9292
; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 24
93-
; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C3]]
93+
; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[CTLZ]], [[C3]]
9494
; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
9595
; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]]
9696
; RV32ZBB-NEXT: $x10 = COPY [[SUB1]](s32)
@@ -185,7 +185,7 @@ body: |
185185
; RV32ZBB-NEXT: [[AND:%[0-9]+]]:_(s32) = G_AND [[XOR]], [[C2]]
186186
; RV32ZBB-NEXT: [[CTLZ:%[0-9]+]]:_(s32) = G_CTLZ [[AND]](s32)
187187
; RV32ZBB-NEXT: [[C3:%[0-9]+]]:_(s32) = G_CONSTANT i32 16
188-
; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = G_SUB [[CTLZ]], [[C3]]
188+
; RV32ZBB-NEXT: [[SUB:%[0-9]+]]:_(s32) = nuw G_SUB [[CTLZ]], [[C3]]
189189
; RV32ZBB-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY [[SUB]](s32)
190190
; RV32ZBB-NEXT: [[SUB1:%[0-9]+]]:_(s32) = G_SUB [[COPY1]], [[C1]]
191191
; RV32ZBB-NEXT: $x10 = COPY [[SUB1]](s32)

0 commit comments

Comments
 (0)