Skip to content

Commit ad107fe

Browse files
pratikasharigcbot
authored andcommitted
Mark propagated values as global in post RA spill cleanup
Post RA spill cleanup pass creates new virtual variables and ties them to appropriate physical registers. If data flow is run after this pass, it can lead to missing links because of new virtual variables. As a fix, any time we break such a virtual variable link, we mark it as global so that later passes don't assume it's local and try to optimize it.
1 parent bf1026d commit ad107fe

2 files changed

Lines changed: 63 additions & 23 deletions

File tree

visa/SpillFillPropagation.cpp

Lines changed: 47 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ SpillFillPropagation::SpillFillPropagation(G4_Kernel &k, IR_Builder &b,
2323
void SpillFillPropagation::clearTable() {
2424
offsetToGRFs.clear();
2525
grfToOffsets.clear();
26+
grfToDeclare.clear();
2627
}
2728

2829
// Record that a physical GRF holds the value for a scratch offset.
@@ -50,6 +51,8 @@ void SpillFillPropagation::invalidateGRF(unsigned grfNum) {
5051
}
5152
grfToOffsets.erase(it);
5253
}
54+
55+
grfToDeclare[grfNum].clear();
5356
}
5457

5558
// Remove all physical GRFs mapping to given offset
@@ -141,9 +144,9 @@ bool SpillFillPropagation::replaceFillWithMovs(
141144
continue;
142145
}
143146

144-
bool canDoTwoRows =
145-
(i + 1 < numRows) && (srcGRFs[i + 1] != dstStartGRF + i + 1) &&
146-
(srcGRFs[i] + 1 == srcGRFs[i + 1]);
147+
bool canDoTwoRows = (i + 1 < numRows) &&
148+
(srcGRFs[i + 1] != dstStartGRF + i + 1) &&
149+
(srcGRFs[i] + 1 == srcGRFs[i + 1]);
147150

148151
unsigned rowsThisMov = canDoTwoRows ? 2 : 1;
149152
G4_ExecSize execSize =
@@ -158,16 +161,12 @@ bool SpillFillPropagation::replaceFillWithMovs(
158161
name, G4_GRF, kernel.numEltPerGRF<Type_UD>(), rowsThisMov, Type_UD);
159162
srcDcl->getRegVar()->setPhyReg(builder.phyregpool.getGreg(srcGRFs[i]), 0);
160163

161-
G4_SrcRegRegion *src = builder.createSrc(
162-
srcDcl->getRegVar(), 0, 0, builder.getRegionStride1(), type);
164+
G4_SrcRegRegion *src = builder.createSrc(srcDcl->getRegVar(), 0, 0,
165+
builder.getRegionStride1(), type);
163166

164167
G4_INST *mov =
165168
builder.createMov(execSize, dst, src, InstOpt_WriteEnable, false);
166-
// This mov's dst may be propagated to successor BB. So mark it as global
167-
// here. Since this is post RA pass, it doesn't reuse old virtual variables.
168-
// Therefore if data is run again in later passes, it could incorrectly
169-
// treat mov dst as a local otherwise (and remove it in some cases).
170-
dstTopDcl->setForceGlobal();
169+
171170
mov->setVISAId(fill->getVISAId());
172171

173172
bb->insertBefore(fillIt, mov);
@@ -179,11 +178,26 @@ bool SpillFillPropagation::replaceFillWithMovs(
179178
i += rowsThisMov;
180179
}
181180

181+
markGlobalDcls(srcGRFs);
182+
182183
// Erase the fill intrinsic.
183184
fillIt = bb->erase(fillIt);
184185
return true;
185186
}
186187

188+
void SpillFillPropagation::markGlobalDcls(
189+
const std::vector<unsigned int> &GRFs) {
190+
// Mark all G4_Declare* containing each GRF as global so they're preserved
191+
for (auto GRF : GRFs) {
192+
auto it = grfToDeclare.find(GRF);
193+
if (it == grfToDeclare.end())
194+
continue;
195+
for (auto dcl : it->second) {
196+
dcl->setForceGlobal();
197+
}
198+
}
199+
}
200+
187201
// Replace a pending fill with GRF-to-GRF movs inserted after the source
188202
// instruction pointed to by rit (backward pass). Erases the pending fill
189203
// instruction and re-seats rit. Returns false if src/dst overlap unsafely.
@@ -245,6 +259,7 @@ bool SpillFillPropagation::replaceFillWithMovsAfter(
245259
i += rowsThisMov;
246260
}
247261

262+
pf.dstTopDcl->setForceGlobal();
248263

249264
// Erase the pending fill instruction.
250265
bb->erase(pf.instIt);
@@ -297,13 +312,21 @@ static bool hasIndirectOperand(G4_INST *inst) {
297312
return false;
298313
}
299314

315+
void SpillFillPropagation::mapGRFsToDcl(unsigned int startGRF,
316+
unsigned int numGRFs, G4_Declare *dcl) {
317+
for (unsigned grf = startGRF; grf != (startGRF + numGRFs); ++grf) {
318+
grfToDeclare[grf].insert(dcl);
319+
}
320+
}
321+
300322
// Forward (top-down) pass: track which GRFs hold spilled data and replace
301323
// fills with movs when the data is already available in a GRF.
302324
void SpillFillPropagation::processBBForward(G4_BB *bb) {
303325
// Load carried-over state or start fresh.
304326
auto entryIt = bbEntryState.find(bb);
305327
if (entryIt != bbEntryState.end()) {
306-
offsetToGRFs = std::move(entryIt->second);
328+
offsetToGRFs = std::move(entryIt->second.offsetToGRFs);
329+
grfToDeclare = std::move(entryIt->second.grfToDeclare);
307330
// Rebuild the reverse map.
308331
grfToOffsets.clear();
309332
for (auto &[off, grfs] : offsetToGRFs)
@@ -327,6 +350,8 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
327350
getGRFRange(spill->getPayload());
328351
for (unsigned i = 0; i < numRows; ++i)
329352
addEntry(offset + i, payloadStartGRF + i, /*clearOld=*/true);
353+
mapGRFsToDcl(payloadStartGRF, numRows,
354+
spill->getPayload()->getTopDcl());
330355
} else {
331356
// Scatter spill or non-WriteEnable: invalidate any GRF mapping to
332357
// these offsets since we cannot propagate through them.
@@ -350,13 +375,15 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
350375
invalidateClobberedEntries(inst);
351376
for (unsigned i = 0; i < numRows; ++i)
352377
addEntry(offset + i, dstStartGRF + i);
378+
mapGRFsToDcl(dstStartGRF, numRows, fill->getDst()->getTopDcl());
353379
// it already advanced by erase
354380
continue;
355381
}
356382
// Fill not replaced — invalidate dst GRFs and record.
357383
invalidateClobberedEntries(inst);
358384
for (unsigned i = 0; i < numRows; ++i)
359385
addEntry(offset + i, dstStartGRF + i);
386+
mapGRFsToDcl(dstStartGRF, numRows, fill->getDst()->getTopDcl());
360387
} else {
361388
// Non-WriteEnable fill: invalidate destination GRFs (WAW clobber).
362389
invalidateClobberedEntries(inst);
@@ -395,8 +422,12 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
395422
G4_INST *lastInst = bb->back();
396423
if (lastInst->opcode() == G4_goto && lastInst->getPredicate()) {
397424
G4_BB *ftBB = bb->Succs.empty() ? nullptr : bb->Succs.front();
398-
if (ftBB && ftBB->Preds.size() == 1 && ftBB->Preds.front() == bb)
399-
bbEntryState[ftBB] = offsetToGRFs;
425+
if (ftBB && ftBB->Preds.size() == 1 && ftBB->Preds.front() == bb) {
426+
BBEntryState state;
427+
state.offsetToGRFs = offsetToGRFs;
428+
state.grfToDeclare = grfToDeclare;
429+
bbEntryState[ftBB] = state;
430+
}
400431
}
401432
}
402433
}
@@ -552,6 +583,7 @@ void SpillFillPropagation::processBBBackward(G4_BB *bb) {
552583

553584
if (replaceFillWithMovsAfter(bb, rit, insertAfterIt, *matchedPF,
554585
srcGRFs)) {
586+
fill->getDst()->getTopDcl()->setForceGlobal();
555587
for (unsigned g = matchedPF->dstStartGRF;
556588
g <= matchedPF->dstEndGRF; ++g)
557589
clobberedRows.insert(g);
@@ -641,6 +673,8 @@ void SpillFillPropagation::processBBBackward(G4_BB *bb) {
641673

642674
if (replaceFillWithMovsAfter(bb, rit, insertAfterIt, *matchedPF,
643675
srcGRFs)) {
676+
auto *payloadDcl = spill->getPayload()->getTopDcl();
677+
payloadDcl->setForceGlobal();
644678
for (unsigned g = matchedPF->dstStartGRF;
645679
g <= matchedPF->dstEndGRF; ++g)
646680
clobberedRows.insert(g);

visa/SpillFillPropagation.h

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ struct PendingFill {
3030
unsigned dstEndGRF; // inclusive
3131
};
3232

33+
typedef struct BBEntryState {
34+
std::unordered_map<unsigned, std::set<unsigned>> offsetToGRFs;
35+
std::unordered_map<unsigned, std::set<G4_Declare *>> grfToDeclare;
36+
} BBEntryState;
37+
3338
// Post-RA pass that eliminates redundant fill intrinsics by tracking which
3439
// physical GRFs still hold spilled/filled data and replacing fills with
3540
// GRF-to-GRF mov instructions when possible.
@@ -46,13 +51,13 @@ class SpillFillPropagation {
4651
std::unordered_map<unsigned, std::set<unsigned>> offsetToGRFs;
4752
// Reverse map: physical GRF number -> set of scratch offsets it maps to
4853
std::unordered_map<unsigned, std::set<unsigned>> grfToOffsets;
54+
// Map GRF -> set of G4_Declare* to mark latter as global
55+
std::unordered_map<unsigned, std::set<G4_Declare *>> grfToDeclare;
4956
// Per-BB entry state for fall-through propagation
50-
std::unordered_map<G4_BB *, std::unordered_map<unsigned, std::set<unsigned>>>
51-
bbEntryState;
57+
std::unordered_map<G4_BB *, BBEntryState> bbEntryState;
5258

5359
void clearTable();
54-
void addEntry(unsigned scratchOffset, unsigned grfNum,
55-
bool clearOld = false);
60+
void addEntry(unsigned scratchOffset, unsigned grfNum, bool clearOld = false);
5661
void invalidateGRF(unsigned grfNum);
5762
void invalidateOffset(unsigned offset);
5863
std::pair<unsigned, unsigned> getGRFRange(G4_Operand *opnd);
@@ -61,19 +66,20 @@ class SpillFillPropagation {
6166
G4_FillIntrinsic *fill,
6267
const std::vector<unsigned> &srcGRFs);
6368
void invalidateClobberedEntries(G4_INST *inst);
64-
void invalidateDst(
65-
G4_INST *inst,
66-
std::unordered_map<unsigned, PendingFill> &pendingFills);
67-
void invalidateSrcs(
68-
G4_INST *inst,
69-
std::unordered_map<unsigned, PendingFill> &pendingFills);
69+
void invalidateDst(G4_INST *inst,
70+
std::unordered_map<unsigned, PendingFill> &pendingFills);
71+
void invalidateSrcs(G4_INST *inst,
72+
std::unordered_map<unsigned, PendingFill> &pendingFills);
7073
void processBBForward(G4_BB *bb);
7174
void processBBBackward(G4_BB *bb);
7275
bool replaceFillWithMovsAfter(G4_BB *bb, INST_LIST_RITER &rit,
7376
INST_LIST_ITER &insertAfterIt,
7477
const PendingFill &pf,
7578
const std::vector<unsigned> &srcGRFs);
7679
bool hasAssignedGRF(G4_Declare *topdcl) const;
80+
void markGlobalDcls(const std::vector<unsigned int> &GRFs);
81+
void mapGRFsToDcl(unsigned int startGRF, unsigned int numGRFs,
82+
G4_Declare *dcl);
7783

7884
public:
7985
SpillFillPropagation(G4_Kernel &k, IR_Builder &b, GlobalRA &g);

0 commit comments

Comments
 (0)