1616// 2. Creates three replacement registers: Lo24 (r24), Mid24 (r24), Hi16 (r16)
1717// 3. Rewrites REG_SEQUENCE defs to populate the component registers
1818// 4. Rewrites EXTRACT_SUBREG uses to read from the appropriate component
19- // 5. Rewrites whole-register uses to inline-reconstruct R64_24 when needed
19+ // 5. Erases the original R64_24 definitions once all uses are rewritten
2020//
2121// This matches the v15 backend's approach of keeping 64-bit values decomposed.
2222//
@@ -73,8 +73,8 @@ class Z80R64SpillPass : public MachineFunctionPass {
7373 DenseMap<Register, DecomposedR64> DecompMap;
7474
7575 bool decomposeR64Registers (MachineFunction &MF);
76- bool rewriteR64Def (MachineInstr &MI, Register R64Reg);
77- bool rewriteR64Uses (Register R64Reg);
76+ void rewriteR64Def (MachineInstr &MI, Register R64Reg);
77+ void rewriteR64Uses (Register R64Reg, SmallVectorImpl<MachineInstr *> &ToErase );
7878 DecomposedR64 getOrCreateDecomp (Register R64Reg);
7979};
8080
@@ -109,22 +109,22 @@ DecomposedR64 Z80R64SpillPass::getOrCreateDecomp(Register R64Reg) {
109109
110110// / rewrite the definition of an R64_24 register
111111// / handles REG_SEQUENCE, INSERT_SUBREG, and IMPLICIT_DEF patterns
112- bool Z80R64SpillPass::rewriteR64Def (MachineInstr &MI, Register R64Reg) {
112+ void Z80R64SpillPass::rewriteR64Def (MachineInstr &MI, Register R64Reg) {
113113 MachineBasicBlock &MBB = *MI.getParent ();
114114 DebugLoc DL = MI.getDebugLoc ();
115115 DecomposedR64 D = getOrCreateDecomp (R64Reg);
116116
117117 unsigned Opc = MI.getOpcode ();
118+ auto InsertPt = std::next (MI.getIterator ());
118119
119120 if (Opc == TargetOpcode::IMPLICIT_DEF) {
120121 // IMPLICIT_DEF -> create IMPLICIT_DEF for each component
121- auto InsertPt = std::next (MI.getIterator ());
122122 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::IMPLICIT_DEF), D.Lo24 );
123123 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::IMPLICIT_DEF), D.Mid24 );
124124 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::IMPLICIT_DEF), D.Hi16 );
125125
126126 LLVM_DEBUG (dbgs () << " Decomposed IMPLICIT_DEF: " << MI);
127- return true ;
127+ return ;
128128 }
129129
130130 if (Opc == TargetOpcode::REG_SEQUENCE) {
@@ -146,7 +146,6 @@ bool Z80R64SpillPass::rewriteR64Def(MachineInstr &MI, Register R64Reg) {
146146 Hi = SrcReg;
147147 }
148148
149- auto InsertPt = std::next (MI.getIterator ());
150149 if (Lo.isValid ())
151150 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), D.Lo24 ).addReg (Lo);
152151 if (Mid.isValid ())
@@ -155,7 +154,7 @@ bool Z80R64SpillPass::rewriteR64Def(MachineInstr &MI, Register R64Reg) {
155154 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), D.Hi16 ).addReg (Hi);
156155
157156 LLVM_DEBUG (dbgs () << " Decomposed REG_SEQUENCE: " << MI);
158- return true ;
157+ return ;
159158 }
160159
161160 if (Opc == TargetOpcode::INSERT_SUBREG) {
@@ -174,8 +173,6 @@ bool Z80R64SpillPass::rewriteR64Def(MachineInstr &MI, Register R64Reg) {
174173 SrcD = getOrCreateDecomp (SrcReg);
175174 }
176175
177- auto InsertPt = std::next (MI.getIterator ());
178-
179176 if (SrcD.IsValid ) {
180177 if (SubIdx != Z80::sub_low24)
181178 BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), D.Lo24 )
@@ -199,22 +196,20 @@ bool Z80R64SpillPass::rewriteR64Def(MachineInstr &MI, Register R64Reg) {
199196 .addReg (InsertReg);
200197
201198 LLVM_DEBUG (dbgs () << " Decomposed INSERT_SUBREG: " << MI);
202- return true ;
199+ return ;
203200 }
204201
205202 // for other defs (COPY, loads, etc.), we need to extract components
206- // this probably doesnt happen often for R64_24
207203 LLVM_DEBUG (dbgs () << " Unhandled R64_24 def: " << MI);
208- return false ;
209204}
210205
211206// / rewrite all uses of an R64_24 register to use decomposed components
212- bool Z80R64SpillPass::rewriteR64Uses (Register R64Reg) {
207+ void Z80R64SpillPass::rewriteR64Uses (Register R64Reg,
208+ SmallVectorImpl<MachineInstr *> &ToErase) {
213209 DecomposedR64 D = getOrCreateDecomp (R64Reg);
214210 if (!D.IsValid )
215- return false ;
211+ return ;
216212
217- bool Changed = false ;
218213 SmallVector<MachineInstr *, 16 > UsesToRewrite;
219214
220215 for (MachineInstr &UseMI : MRI->use_instructions (R64Reg)) {
@@ -228,45 +223,65 @@ bool Z80R64SpillPass::rewriteR64Uses(Register R64Reg) {
228223
229224 if (Opc == TargetOpcode::EXTRACT_SUBREG) {
230225 // EXTRACT_SUBREG %dst, %r64, subidx
231- // -> COPY %dst, %component
226+ // -> COPY %dst, %component (possibly with subidx extraction)
232227 Register DstReg = UseMI->getOperand (0 ).getReg ();
233228 unsigned SubIdx = UseMI->getOperand (2 ).getImm ();
234229
235230 Register SrcComp;
236- if (SubIdx == Z80::sub_low24)
231+ unsigned NestedSubReg = 0 ;
232+
233+ if (SubIdx == Z80::sub_low24) {
237234 SrcComp = D.Lo24 ;
238- else if (SubIdx == Z80::sub_mid24)
235+ } else if (SubIdx == Z80::sub_mid24) {
239236 SrcComp = D.Mid24 ;
240- else if (SubIdx == Z80::sub_word3)
237+ } else if (SubIdx == Z80::sub_word3) {
241238 SrcComp = D.Hi16 ;
242- else if (SubIdx == Z80::sub_short) {
243- // sub_short is the low 16 bits of the low24
244- // need to extract from Lo24
245- auto InsertPt = UseMI->getIterator ();
246- BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), DstReg)
247- .addReg (D.Lo24 , 0 , Z80::sub_short);
248- UseMI->eraseFromParent ();
249- Changed = true ;
239+ } else if (SubIdx == Z80::sub_short) {
240+ // sub_short is the low 16 bits of sub_low24
241+ SrcComp = D.Lo24 ;
242+ NestedSubReg = Z80::sub_short;
243+ } else if (SubIdx == Z80::sub_low) {
244+ // sub_low is the low 8 bits, extract from Lo24
245+ SrcComp = D.Lo24 ;
246+ NestedSubReg = Z80::sub_low;
247+ } else if (SubIdx == Z80::sub_high) {
248+ // sub_high might be bits 8-15
249+ SrcComp = D.Lo24 ;
250+ NestedSubReg = Z80::sub_high;
251+ } else {
252+ LLVM_DEBUG (dbgs () << " Unknown subreg index " << SubIdx
253+ << " in EXTRACT: " << *UseMI);
250254 continue ;
251255 }
252256
253257 if (SrcComp.isValid ()) {
254- UseMI->setDesc (TII->get (TargetOpcode::COPY));
255- UseMI->removeOperand (2 ); // subidx
256- UseMI->getOperand (1 ).setReg (SrcComp);
257- UseMI->getOperand (1 ).setSubReg (0 );
258- Changed = true ;
259- LLVM_DEBUG (dbgs () << " Rewrote EXTRACT_SUBREG to COPY: " << *UseMI);
258+ auto InsertPt = UseMI->getIterator ();
259+ if (NestedSubReg) {
260+ BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), DstReg)
261+ .addReg (SrcComp, 0 , NestedSubReg);
262+ } else {
263+ BuildMI (MBB, InsertPt, DL, TII->get (TargetOpcode::COPY), DstReg)
264+ .addReg (SrcComp);
265+ }
266+ ToErase.push_back (UseMI);
267+ LLVM_DEBUG (dbgs () << " Rewrote EXTRACT_SUBREG: " << *UseMI);
260268 }
261269 continue ;
262270 }
263271
264272 if (Opc == TargetOpcode::INSERT_SUBREG) {
265- // If this INSERT_SUBREG uses R64Reg as source its already handled in rewriteR64Def for the destination
273+ // if this INSERT_SUBREG uses R64Reg as source, it's already handled
274+ // in rewriteR64Def for the destination
275+ // just mark it for erasure since we have created decomposed copies
276+ if (UseMI->getOperand (0 ).getReg () != R64Reg) {
277+ // R64Reg is being used as the source (operand 1)
278+ // this is handled when we process the destination R64_24
279+ continue ;
280+ }
266281 continue ;
267282 }
268283
269- // for other uses (stores, passing to calls via sub-regs)
284+ // handle other uses with subreg access
270285 for (MachineOperand &MO : UseMI->operands ()) {
271286 if (!MO.isReg () || MO.getReg () != R64Reg || !MO.isUse ())
272287 continue ;
@@ -275,30 +290,23 @@ bool Z80R64SpillPass::rewriteR64Uses(Register R64Reg) {
275290 if (SubReg == Z80::sub_low24) {
276291 MO.setReg (D.Lo24 );
277292 MO.setSubReg (0 );
278- Changed = true ;
279293 } else if (SubReg == Z80::sub_mid24) {
280294 MO.setReg (D.Mid24 );
281295 MO.setSubReg (0 );
282- Changed = true ;
283296 } else if (SubReg == Z80::sub_word3) {
284297 MO.setReg (D.Hi16 );
285298 MO.setSubReg (0 );
286- Changed = true ;
287299 } else if (SubReg == 0 ) {
288300 // whole register use reconstruct R64_24
289301 // not sure when this would happen
290302 LLVM_DEBUG (dbgs () << " WARNING: Whole R64_24 use not yet handled: " << *UseMI);
291303 }
292304 }
293305 }
294-
295- return Changed;
296306}
297307
298308bool Z80R64SpillPass::decomposeR64Registers (MachineFunction &MF) {
299- bool Changed = false ;
300-
301- // all R64_24 virtual registers and their defs
309+ // collect all R64_24 virtual registers and their defs
302310 SmallVector<std::pair<Register, MachineInstr *>, 16 > R64Defs;
303311
304312 for (unsigned I = 0 , E = MRI->getNumVirtRegs (); I != E; ++I) {
@@ -317,26 +325,36 @@ bool Z80R64SpillPass::decomposeR64Registers(MachineFunction &MF) {
317325 if (R64Defs.empty ())
318326 return false ;
319327
320- // process defs first
328+ // process defs first, this creates the decomposed component defs
321329 for (auto &[Reg, DefMI] : R64Defs) {
322- Changed |= rewriteR64Def (*DefMI, Reg);
330+ rewriteR64Def (*DefMI, Reg);
323331 }
324332
325- // then rewrite uses
333+ // then rewrite uses and collect instructions to erase
334+ SmallVector<MachineInstr *, 32 > ToErase;
326335 for (auto &[Reg, DefMI] : R64Defs) {
327- Changed |= rewriteR64Uses (Reg);
336+ rewriteR64Uses (Reg, ToErase);
337+ }
338+
339+ // erase rewritten EXTRACT_SUBREG instructions
340+ for (MachineInstr *MI : ToErase) {
341+ MI->eraseFromParent ();
328342 }
329343
330- // remove original R64_24 defs that are now dead
344+ // erase original R64_24 defs
331345 for (auto &[Reg, DefMI] : R64Defs) {
332346 if (MRI->use_empty (Reg)) {
333347 LLVM_DEBUG (dbgs () << " Removing dead R64_24 def: " << *DefMI);
334348 DefMI->eraseFromParent ();
335- Changed = true ;
349+ } else {
350+ LLVM_DEBUG (dbgs () << " WARNING: R64_24 still has uses: " << *DefMI);
351+ for (MachineInstr &Use : MRI->use_instructions (Reg)) {
352+ LLVM_DEBUG (dbgs () << " Used by: " << Use);
353+ }
336354 }
337355 }
338356
339- return Changed ;
357+ return true ;
340358}
341359
342360bool Z80R64SpillPass::runOnMachineFunction (MachineFunction &MF) {
0 commit comments