@@ -23,6 +23,7 @@ SpillFillPropagation::SpillFillPropagation(G4_Kernel &k, IR_Builder &b,
2323void SpillFillPropagation::clearTable () {
2424 offsetToGRFs.clear ();
2525 grfToOffsets.clear ();
26+ grfToDeclare.clear ();
2627}
2728
2829// Record that a physical GRF holds the value for a scratch offset.
@@ -50,6 +51,8 @@ void SpillFillPropagation::invalidateGRF(unsigned grfNum) {
5051 }
5152 grfToOffsets.erase (it);
5253 }
54+
55+ grfToDeclare[grfNum].clear ();
5356}
5457
5558// Remove all physical GRFs mapping to given offset
@@ -141,9 +144,9 @@ bool SpillFillPropagation::replaceFillWithMovs(
141144 continue ;
142145 }
143146
144- bool canDoTwoRows =
145- (i + 1 < numRows) && (srcGRFs[i + 1 ] != dstStartGRF + i + 1 ) &&
146- (srcGRFs[i] + 1 == srcGRFs[i + 1 ]);
147+ bool canDoTwoRows = (i + 1 < numRows) &&
148+ (srcGRFs[i + 1 ] != dstStartGRF + i + 1 ) &&
149+ (srcGRFs[i] + 1 == srcGRFs[i + 1 ]);
147150
148151 unsigned rowsThisMov = canDoTwoRows ? 2 : 1 ;
149152 G4_ExecSize execSize =
@@ -158,16 +161,12 @@ bool SpillFillPropagation::replaceFillWithMovs(
158161 name, G4_GRF , kernel.numEltPerGRF <Type_UD>(), rowsThisMov, Type_UD);
159162 srcDcl->getRegVar ()->setPhyReg (builder.phyregpool .getGreg (srcGRFs[i]), 0 );
160163
161- G4_SrcRegRegion *src = builder.createSrc (
162- srcDcl-> getRegVar (), 0 , 0 , builder.getRegionStride1 (), type);
164+ G4_SrcRegRegion *src = builder.createSrc (srcDcl-> getRegVar (), 0 , 0 ,
165+ builder.getRegionStride1 (), type);
163166
164167 G4_INST *mov =
165168 builder.createMov (execSize, dst, src, InstOpt_WriteEnable, false );
166- // This mov's dst may be propagated to successor BB. So mark it as global
167- // here. Since this is post RA pass, it doesn't reuse old virtual variables.
168- // Therefore if data is run again in later passes, it could incorrectly
169- // treat mov dst as a local otherwise (and remove it in some cases).
170- dstTopDcl->setForceGlobal ();
169+
171170 mov->setVISAId (fill->getVISAId ());
172171
173172 bb->insertBefore (fillIt, mov);
@@ -179,11 +178,26 @@ bool SpillFillPropagation::replaceFillWithMovs(
179178 i += rowsThisMov;
180179 }
181180
181+ markGlobalDcls (srcGRFs);
182+
182183 // Erase the fill intrinsic.
183184 fillIt = bb->erase (fillIt);
184185 return true ;
185186}
186187
188+ void SpillFillPropagation::markGlobalDcls (
189+ const std::vector<unsigned int > &GRFs) {
190+ // Mark all G4_Declare* containing each GRF as global so they're preserved
191+ for (auto GRF : GRFs) {
192+ auto it = grfToDeclare.find (GRF );
193+ if (it == grfToDeclare.end ())
194+ continue ;
195+ for (auto dcl : it->second ) {
196+ dcl->setForceGlobal ();
197+ }
198+ }
199+ }
200+
187201// Replace a pending fill with GRF-to-GRF movs inserted after the source
188202// instruction pointed to by rit (backward pass). Erases the pending fill
189203// instruction and re-seats rit. Returns false if src/dst overlap unsafely.
@@ -245,6 +259,7 @@ bool SpillFillPropagation::replaceFillWithMovsAfter(
245259 i += rowsThisMov;
246260 }
247261
262+ pf.dstTopDcl ->setForceGlobal ();
248263
249264 // Erase the pending fill instruction.
250265 bb->erase (pf.instIt );
@@ -297,13 +312,21 @@ static bool hasIndirectOperand(G4_INST *inst) {
297312 return false ;
298313}
299314
315+ void SpillFillPropagation::mapGRFsToDcl (unsigned int startGRF,
316+ unsigned int numGRFs, G4_Declare *dcl) {
317+ for (unsigned grf = startGRF; grf != (startGRF + numGRFs); ++grf) {
318+ grfToDeclare[grf].insert (dcl);
319+ }
320+ }
321+
300322// Forward (top-down) pass: track which GRFs hold spilled data and replace
301323// fills with movs when the data is already available in a GRF.
302324void SpillFillPropagation::processBBForward (G4_BB *bb) {
303325 // Load carried-over state or start fresh.
304326 auto entryIt = bbEntryState.find (bb);
305327 if (entryIt != bbEntryState.end ()) {
306- offsetToGRFs = std::move (entryIt->second );
328+ offsetToGRFs = std::move (entryIt->second .offsetToGRFs );
329+ grfToDeclare = std::move (entryIt->second .grfToDeclare );
307330 // Rebuild the reverse map.
308331 grfToOffsets.clear ();
309332 for (auto &[off, grfs] : offsetToGRFs)
@@ -327,6 +350,8 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
327350 getGRFRange (spill->getPayload ());
328351 for (unsigned i = 0 ; i < numRows; ++i)
329352 addEntry (offset + i, payloadStartGRF + i, /* clearOld=*/ true );
353+ mapGRFsToDcl (payloadStartGRF, numRows,
354+ spill->getPayload ()->getTopDcl ());
330355 } else {
331356 // Scatter spill or non-WriteEnable: invalidate any GRF mapping to
332357 // these offsets since we cannot propagate through them.
@@ -350,13 +375,15 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
350375 invalidateClobberedEntries (inst);
351376 for (unsigned i = 0 ; i < numRows; ++i)
352377 addEntry (offset + i, dstStartGRF + i);
378+ mapGRFsToDcl (dstStartGRF, numRows, fill->getDst ()->getTopDcl ());
353379 // it already advanced by erase
354380 continue ;
355381 }
356382 // Fill not replaced — invalidate dst GRFs and record.
357383 invalidateClobberedEntries (inst);
358384 for (unsigned i = 0 ; i < numRows; ++i)
359385 addEntry (offset + i, dstStartGRF + i);
386+ mapGRFsToDcl (dstStartGRF, numRows, fill->getDst ()->getTopDcl ());
360387 } else {
361388 // Non-WriteEnable fill: invalidate destination GRFs (WAW clobber).
362389 invalidateClobberedEntries (inst);
@@ -395,8 +422,12 @@ void SpillFillPropagation::processBBForward(G4_BB *bb) {
395422 G4_INST *lastInst = bb->back ();
396423 if (lastInst->opcode () == G4_goto && lastInst->getPredicate ()) {
397424 G4_BB *ftBB = bb->Succs .empty () ? nullptr : bb->Succs .front ();
398- if (ftBB && ftBB->Preds .size () == 1 && ftBB->Preds .front () == bb)
399- bbEntryState[ftBB] = offsetToGRFs;
425+ if (ftBB && ftBB->Preds .size () == 1 && ftBB->Preds .front () == bb) {
426+ BBEntryState state;
427+ state.offsetToGRFs = offsetToGRFs;
428+ state.grfToDeclare = grfToDeclare;
429+ bbEntryState[ftBB] = state;
430+ }
400431 }
401432 }
402433}
@@ -552,6 +583,7 @@ void SpillFillPropagation::processBBBackward(G4_BB *bb) {
552583
553584 if (replaceFillWithMovsAfter (bb, rit, insertAfterIt, *matchedPF,
554585 srcGRFs)) {
586+ fill->getDst ()->getTopDcl ()->setForceGlobal ();
555587 for (unsigned g = matchedPF->dstStartGRF ;
556588 g <= matchedPF->dstEndGRF ; ++g)
557589 clobberedRows.insert (g);
@@ -641,6 +673,8 @@ void SpillFillPropagation::processBBBackward(G4_BB *bb) {
641673
642674 if (replaceFillWithMovsAfter (bb, rit, insertAfterIt, *matchedPF,
643675 srcGRFs)) {
676+ auto *payloadDcl = spill->getPayload ()->getTopDcl ();
677+ payloadDcl->setForceGlobal ();
644678 for (unsigned g = matchedPF->dstStartGRF ;
645679 g <= matchedPF->dstEndGRF ; ++g)
646680 clobberedRows.insert (g);
0 commit comments