Skip to content

Commit 48a1ee7

Browse files
[AMDGPU] Remove redundant s_wait_xcnt after implicit XCNT drains (llvm#198823)
On gfx1250 several instructions implicitly drain XCNT in hardware: `s_barrier_wait`/`signal`/`signal_isfirst`, `s_sendmsg`, PC-changes etc. This patch will remove redundant `s_wait_xcnt` after implicit XCNT drains. Pre-commit tests on llvm#198772 Fix: LCOMPILER-1665
1 parent 1da70ad commit 48a1ee7

6 files changed

Lines changed: 39 additions & 22 deletions

File tree

llvm/lib/Target/AMDGPU/SIInsertWaitcnts.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3066,6 +3066,11 @@ void SIInsertWaitcnts::updateEventWaitcntAfter(MachineInstr &Inst,
30663066
int64_t Imm = TII.getNamedOperand(Inst, AMDGPU::OpName::waitexp)->getImm();
30673067
ScoreBrackets->applyWaitcnt(AMDGPU::EXP_CNT, Imm);
30683068
}
3069+
3070+
// Set XCNT to zero in the bracket for instructions that implicitly drain
3071+
// XCNT.
3072+
if (ST.hasWaitXcnt() && SIInstrInfo::isXcntDrain(Inst))
3073+
ScoreBrackets->applyWaitcnt(AMDGPU::X_CNT, 0);
30693074
}
30703075

30713076
bool WaitcntBrackets::mergeScore(const MergeInfo &M, unsigned &Score,

llvm/lib/Target/AMDGPU/SIInstrInfo.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3536,6 +3536,35 @@ void SIInstrInfo::insertSelect(MachineBasicBlock &MBB,
35363536
}
35373537
}
35383538

3539+
bool SIInstrInfo::isXcntDrain(const MachineInstr &MI) {
3540+
3541+
if (MI.isBranch() || MI.isCall() || MI.isReturn() || MI.isIndirectBranch())
3542+
return true;
3543+
3544+
switch (MI.getOpcode()) {
3545+
case AMDGPU::S_ENDPGM:
3546+
case AMDGPU::S_ENDPGM_SAVED:
3547+
case AMDGPU::S_TRAP:
3548+
case AMDGPU::S_GETREG_B32:
3549+
case AMDGPU::S_SETREG_B32:
3550+
case AMDGPU::S_SETREG_B32_mode:
3551+
case AMDGPU::S_SETREG_IMM32_B32:
3552+
case AMDGPU::S_SETREG_IMM32_B32_mode:
3553+
case AMDGPU::S_SENDMSG:
3554+
case AMDGPU::S_SENDMSGHALT:
3555+
case AMDGPU::S_SENDMSG_RTN_B32:
3556+
case AMDGPU::S_SENDMSG_RTN_B64:
3557+
case AMDGPU::S_BARRIER_WAIT:
3558+
case AMDGPU::S_BARRIER_SIGNAL_M0:
3559+
case AMDGPU::S_BARRIER_SIGNAL_IMM:
3560+
case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_M0:
3561+
case AMDGPU::S_BARRIER_SIGNAL_ISFIRST_IMM:
3562+
return true;
3563+
default:
3564+
return false;
3565+
}
3566+
}
3567+
35393568
bool SIInstrInfo::isFoldableCopy(const MachineInstr &MI) {
35403569
switch (MI.getOpcode()) {
35413570
case AMDGPU::V_MOV_B16_t16_e32:

llvm/lib/Target/AMDGPU/SIInstrInfo.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -516,6 +516,9 @@ class SIInstrInfo final : public AMDGPUGenInstrInfo {
516516
isFLAT(Opcode);
517517
}
518518

519+
/// True if MI implicitly drains XCNT.
520+
static bool isXcntDrain(const MachineInstr &MI);
521+
519522
static bool isSOP1(const MachineInstr &MI) {
520523
return MI.getDesc().TSFlags & SIInstrFlags::SOP1;
521524
}

llvm/test/CodeGen/AMDGPU/flat-saddr-atomics.ll

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12487,7 +12487,6 @@ define void @flat_atomic_fmax_f64_saddr_nortn(ptr inreg %ptr, double %data) {
1248712487
; GFX1250-SDAG-NEXT: .LBB113_4: ; %atomicrmw.private
1248812488
; GFX1250-SDAG-NEXT: s_sub_co_i32 s2, s0, src_flat_scratch_base_lo
1248912489
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
12490-
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
1249112490
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
1249212491
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s2, -1
1249312492
; GFX1250-SDAG-NEXT: scratch_load_b64 v[2:3], off, s0
@@ -12780,7 +12779,6 @@ define void @flat_atomic_fmin_f64_saddr_nortn(ptr inreg %ptr, double %data) {
1278012779
; GFX1250-SDAG-NEXT: .LBB115_4: ; %atomicrmw.private
1278112780
; GFX1250-SDAG-NEXT: s_sub_co_i32 s2, s0, src_flat_scratch_base_lo
1278212781
; GFX1250-SDAG-NEXT: s_cmp_lg_u64 s[0:1], 0
12783-
; GFX1250-SDAG-NEXT: s_wait_xcnt 0x0
1278412782
; GFX1250-SDAG-NEXT: v_max_num_f64_e32 v[0:1], v[0:1], v[0:1]
1278512783
; GFX1250-SDAG-NEXT: s_cselect_b32 s0, s2, -1
1278612784
; GFX1250-SDAG-NEXT: scratch_load_b64 v[2:3], off, s0

llvm/test/CodeGen/AMDGPU/wait-xcnt-drain.mir

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -18,14 +18,12 @@ body: |
1818
; GCN-NEXT: bb.1:
1919
; GCN-NEXT: liveins: $vgpr3
2020
; GCN-NEXT: {{ $}}
21-
; GCN-NEXT: S_WAIT_XCNT 0
2221
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
2322
; GCN-NEXT: S_ENDPGM 0
2423
; GCN-NEXT: {{ $}}
2524
; GCN-NEXT: bb.2:
2625
; GCN-NEXT: liveins: $vgpr3
2726
; GCN-NEXT: {{ $}}
28-
; GCN-NEXT: S_WAIT_XCNT 0
2927
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 32, $vgpr3, implicit $exec
3028
; GCN-NEXT: S_ENDPGM 0
3129
bb.0:
@@ -58,7 +56,6 @@ body: |
5856
; GCN-NEXT: S_WAIT_KMCNT 0
5957
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
6058
; GCN-NEXT: $sgpr30_sgpr31 = SI_CALL killed $sgpr0_sgpr1, 0, csr_amdgpu
61-
; GCN-NEXT: S_WAIT_XCNT 0
6259
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
6360
; GCN-NEXT: SI_RETURN implicit $vgpr2
6461
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -84,7 +81,6 @@ body: |
8481
; GCN-NEXT: bb.1:
8582
; GCN-NEXT: liveins: $vgpr3
8683
; GCN-NEXT: {{ $}}
87-
; GCN-NEXT: S_WAIT_XCNT 0
8884
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
8985
; GCN-NEXT: S_ENDPGM 0
9086
bb.0:
@@ -112,7 +108,6 @@ body: |
112108
; GCN-NEXT: {{ $}}
113109
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
114110
; GCN-NEXT: S_TRAP 0
115-
; GCN-NEXT: S_WAIT_XCNT 0
116111
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
117112
; GCN-NEXT: S_ENDPGM 0
118113
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -134,7 +129,6 @@ body: |
134129
; GCN-NEXT: {{ $}}
135130
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
136131
; GCN-NEXT: $sgpr0 = S_GETREG_B32 1, implicit $mode
137-
; GCN-NEXT: S_WAIT_XCNT 0
138132
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
139133
; GCN-NEXT: S_ENDPGM 0
140134
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -156,7 +150,6 @@ body: |
156150
; GCN-NEXT: {{ $}}
157151
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
158152
; GCN-NEXT: S_SETREG_B32 $sgpr0, 1, implicit-def $mode, implicit $mode
159-
; GCN-NEXT: S_WAIT_XCNT 0
160153
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
161154
; GCN-NEXT: S_ENDPGM 0
162155
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -178,7 +171,6 @@ body: |
178171
; GCN-NEXT: {{ $}}
179172
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
180173
; GCN-NEXT: S_SETREG_B32_mode $sgpr0, 1, implicit-def $mode, implicit $mode
181-
; GCN-NEXT: S_WAIT_XCNT 0
182174
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
183175
; GCN-NEXT: S_ENDPGM 0
184176
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -200,7 +192,6 @@ body: |
200192
; GCN-NEXT: {{ $}}
201193
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
202194
; GCN-NEXT: S_SETREG_IMM32_B32 0, 1, implicit-def $mode, implicit $mode
203-
; GCN-NEXT: S_WAIT_XCNT 0
204195
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
205196
; GCN-NEXT: S_ENDPGM 0
206197
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -222,7 +213,6 @@ body: |
222213
; GCN-NEXT: {{ $}}
223214
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
224215
; GCN-NEXT: S_SETREG_IMM32_B32_mode 0, 1, implicit-def $mode, implicit $mode
225-
; GCN-NEXT: S_WAIT_XCNT 0
226216
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
227217
; GCN-NEXT: S_ENDPGM 0
228218
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -244,7 +234,6 @@ body: |
244234
; GCN-NEXT: {{ $}}
245235
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
246236
; GCN-NEXT: S_SENDMSG 1, implicit $exec, implicit $m0
247-
; GCN-NEXT: S_WAIT_XCNT 0
248237
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
249238
; GCN-NEXT: S_ENDPGM 0
250239
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -266,7 +255,6 @@ body: |
266255
; GCN-NEXT: {{ $}}
267256
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
268257
; GCN-NEXT: S_SENDMSGHALT 1, implicit $exec, implicit $m0
269-
; GCN-NEXT: S_WAIT_XCNT 0
270258
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
271259
; GCN-NEXT: S_ENDPGM 0
272260
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -288,7 +276,6 @@ body: |
288276
; GCN-NEXT: {{ $}}
289277
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
290278
; GCN-NEXT: $sgpr0 = S_SENDMSG_RTN_B32 1
291-
; GCN-NEXT: S_WAIT_XCNT 0
292279
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
293280
; GCN-NEXT: S_ENDPGM 0
294281
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -310,7 +297,6 @@ body: |
310297
; GCN-NEXT: {{ $}}
311298
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
312299
; GCN-NEXT: $sgpr0_sgpr1 = S_SENDMSG_RTN_B64 1
313-
; GCN-NEXT: S_WAIT_XCNT 0
314300
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
315301
; GCN-NEXT: S_ENDPGM 0
316302
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -332,7 +318,6 @@ body: |
332318
; GCN-NEXT: {{ $}}
333319
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
334320
; GCN-NEXT: S_BARRIER_WAIT 1
335-
; GCN-NEXT: S_WAIT_XCNT 0
336321
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
337322
; GCN-NEXT: S_ENDPGM 0
338323
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -354,7 +339,6 @@ body: |
354339
; GCN-NEXT: {{ $}}
355340
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
356341
; GCN-NEXT: S_BARRIER_SIGNAL_M0 implicit $m0
357-
; GCN-NEXT: S_WAIT_XCNT 0
358342
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
359343
; GCN-NEXT: S_ENDPGM 0
360344
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -376,7 +360,6 @@ body: |
376360
; GCN-NEXT: {{ $}}
377361
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
378362
; GCN-NEXT: S_BARRIER_SIGNAL_IMM 1
379-
; GCN-NEXT: S_WAIT_XCNT 0
380363
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
381364
; GCN-NEXT: S_ENDPGM 0
382365
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -398,7 +381,6 @@ body: |
398381
; GCN-NEXT: {{ $}}
399382
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
400383
; GCN-NEXT: S_BARRIER_SIGNAL_ISFIRST_M0 implicit $m0, implicit-def $scc, implicit $scc
401-
; GCN-NEXT: S_WAIT_XCNT 0
402384
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
403385
; GCN-NEXT: S_ENDPGM 0
404386
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -420,7 +402,6 @@ body: |
420402
; GCN-NEXT: {{ $}}
421403
; GCN-NEXT: GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
422404
; GCN-NEXT: S_BARRIER_SIGNAL_ISFIRST_IMM 1, implicit-def $scc, implicit $scc
423-
; GCN-NEXT: S_WAIT_XCNT 0
424405
; GCN-NEXT: $vgpr2 = V_LSHLREV_B32_e64 16, $vgpr3, implicit $exec
425406
; GCN-NEXT: S_ENDPGM 0
426407
GLOBAL_STORE_DWORD killed $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec

llvm/test/CodeGen/AMDGPU/wait-xcnt.mir

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1185,8 +1185,9 @@ body: |
11851185
; GCN-NEXT: bb.2:
11861186
; GCN-NEXT: liveins: $sgpr0_sgpr1, $sgpr2, $vgpr2
11871187
; GCN-NEXT: {{ $}}
1188-
; GCN-NEXT: S_WAIT_XCNT 0
1188+
; GCN-NEXT: S_WAIT_XCNT 1
11891189
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
1190+
; GCN-NEXT: S_WAIT_XCNT 0
11901191
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 0, implicit $exec
11911192
; GCN-NEXT: $sgpr0 = S_MOV_B32 $sgpr0
11921193
bb.0:

0 commit comments

Comments
 (0)