Skip to content

Commit cb4a5d0

Browse files
committed
[Z80] Add R64_24 decomposition pass to fix register allocation failures
The R64_24 register class has very few valid physical register tuples. when multiple R64_24 virtual registers need to be allocated simultaneously the allocator runs out of options and fails. Z80R64SpillPass is a pre register allocation pass that decomposes R64_24 virtual registers into separate component registers: - Lo24 (R24 register class) - Mid24 (R24 register class) - Hi16 (R16 register class) It rewrites: - IMPLICIT_DEF -> creates IMPLICIT_DEF for each component - REG_SEQUENCE -> copies components to decomposed registers - INSERT_SUBREG -> copies unchanged components, sets inserted component - EXTRACT_SUBREG uses -> replaced with COPY from appropriate component - Subreg operand uses -> points to decomposed component directly
1 parent 1a56c14 commit cb4a5d0

4 files changed

Lines changed: 364 additions & 0 deletions

File tree

llvm/lib/Target/Z80/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ set(sources
3636
Z80MachineLateOptimization.cpp
3737
Z80MachinePreRAOptimization.cpp
3838
Z80PostSelectCombiner.cpp
39+
Z80R64SpillPass.cpp
3940
Z80RegisterInfo.cpp
4041
Z80Subtarget.cpp
4142
Z80TargetMachine.cpp

llvm/lib/Target/Z80/Z80.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ InstructionSelector *createZ80InstructionSelector(const Z80TargetMachine &TM,
3232
FunctionPass *createZ80PostSelectCombiner();
3333
FunctionPass *createZ80MachineEarlyOptimizationPass();
3434
FunctionPass *createZ80MachinePreRAOptimizationPass();
35+
FunctionPass *createZ80R64SpillPass();
3536
FunctionPass *createZ80MachineLateOptimizationPass();
3637
FunctionPass *createZ80BranchSelectorPass();
3738

@@ -40,6 +41,7 @@ void initializeZ80PostLegalizerCombinerPass(PassRegistry &);
4041
void initializeZ80PostSelectCombinerPass(PassRegistry &);
4142
void initializeZ80MachineEarlyOptimizationPass(PassRegistry &);
4243
void initializeZ80MachinePreRAOptimizationPass(PassRegistry &);
44+
void initializeZ80R64SpillPassPass(PassRegistry &);
4345
void initializeZ80MachineLateOptimizationPass(PassRegistry &);
4446
void initializeZ80BranchSelectorPass(PassRegistry &);
4547

Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
//===-- Z80R64SpillPass.cpp - R64_24 Decomposition Pass -------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// This pass decomposes R64_24 virtual registers into separate (r24, r24, r16)
10+
// component registers to prevent register allocation failures. The Z80's
11+
// R64_24 register class has very few valid tuples (5-8), and when multiple
12+
// R64_24 values need to be allocated simultaneously, the allocator runs out.
13+
//
14+
// The pass:
15+
// 1. Finds all R64_24 virtual registers
16+
// 2. Creates three replacement registers: Lo24 (r24), Mid24 (r24), Hi16 (r16)
17+
// 3. Rewrites REG_SEQUENCE defs to populate the component registers
18+
// 4. Rewrites EXTRACT_SUBREG uses to read from the appropriate component
19+
// 5. Rewrites whole-register uses to inline-reconstruct R64_24 when needed
20+
//
21+
// This matches the v15 backend's approach of keeping 64-bit values decomposed.
22+
//
23+
//===----------------------------------------------------------------------===//
24+
25+
#include "MCTargetDesc/Z80MCTargetDesc.h"
26+
#include "Z80.h"
27+
#include "Z80InstrInfo.h"
28+
#include "Z80RegisterInfo.h"
29+
#include "Z80Subtarget.h"
30+
#include "llvm/CodeGen/MachineBasicBlock.h"
31+
#include "llvm/CodeGen/MachineFrameInfo.h"
32+
#include "llvm/CodeGen/MachineFunction.h"
33+
#include "llvm/CodeGen/MachineFunctionPass.h"
34+
#include "llvm/CodeGen/MachineInstr.h"
35+
#include "llvm/CodeGen/MachineInstrBuilder.h"
36+
#include "llvm/CodeGen/MachineRegisterInfo.h"
37+
#include "llvm/CodeGen/TargetInstrInfo.h"
38+
#include "llvm/CodeGen/TargetRegisterInfo.h"
39+
#include "llvm/Support/Debug.h"
40+
41+
#define DEBUG_TYPE "z80-r64-spill"
42+
43+
using namespace llvm;
44+
45+
namespace {
46+
47+
/// tracks the decomposed components of an R64_24 register
48+
struct DecomposedR64 {
49+
Register Lo24; // sub_low24 component
50+
Register Mid24; // sub_mid24 component
51+
Register Hi16; // sub_word3 component
52+
bool IsValid = false;
53+
};
54+
55+
class Z80R64SpillPass : public MachineFunctionPass {
56+
public:
57+
static char ID;
58+
Z80R64SpillPass() : MachineFunctionPass(ID) {}
59+
60+
StringRef getPassName() const override {
61+
return "Z80 R64_24 Decomposition Pass";
62+
}
63+
64+
bool runOnMachineFunction(MachineFunction &MF) override;
65+
66+
private:
67+
const Z80Subtarget *STI = nullptr;
68+
const Z80InstrInfo *TII = nullptr;
69+
const Z80RegisterInfo *TRI = nullptr;
70+
MachineRegisterInfo *MRI = nullptr;
71+
72+
/// from original R64_24 vreg to its decomposed components
73+
DenseMap<Register, DecomposedR64> DecompMap;
74+
75+
bool decomposeR64Registers(MachineFunction &MF);
76+
bool rewriteR64Def(MachineInstr &MI, Register R64Reg);
77+
bool rewriteR64Uses(Register R64Reg);
78+
DecomposedR64 getOrCreateDecomp(Register R64Reg);
79+
};
80+
81+
}
82+
83+
char Z80R64SpillPass::ID = 0;
84+
85+
INITIALIZE_PASS(Z80R64SpillPass, DEBUG_TYPE,
86+
"Decompose R64_24 registers into components", false, false)
87+
88+
/// get or create decomposed component registers for an R64_24 register
89+
DecomposedR64 Z80R64SpillPass::getOrCreateDecomp(Register R64Reg) {
90+
auto It = DecompMap.find(R64Reg);
91+
if (It != DecompMap.end())
92+
return It->second;
93+
94+
DecomposedR64 D;
95+
D.Lo24 = MRI->createVirtualRegister(&Z80::R24RegClass);
96+
D.Mid24 = MRI->createVirtualRegister(&Z80::R24RegClass);
97+
D.Hi16 = MRI->createVirtualRegister(&Z80::R16RegClass);
98+
D.IsValid = true;
99+
100+
DecompMap[R64Reg] = D;
101+
102+
LLVM_DEBUG(dbgs() << "Decomposing " << printReg(R64Reg, TRI) << " into "
103+
<< printReg(D.Lo24, TRI) << ", "
104+
<< printReg(D.Mid24, TRI) << ", "
105+
<< printReg(D.Hi16, TRI) << "\n");
106+
107+
return D;
108+
}
109+
110+
/// rewrite the definition of an R64_24 register
111+
/// handles REG_SEQUENCE, INSERT_SUBREG, and IMPLICIT_DEF patterns
112+
bool Z80R64SpillPass::rewriteR64Def(MachineInstr &MI, Register R64Reg) {
113+
MachineBasicBlock &MBB = *MI.getParent();
114+
DebugLoc DL = MI.getDebugLoc();
115+
DecomposedR64 D = getOrCreateDecomp(R64Reg);
116+
117+
unsigned Opc = MI.getOpcode();
118+
119+
if (Opc == TargetOpcode::IMPLICIT_DEF) {
120+
// IMPLICIT_DEF -> create IMPLICIT_DEF for each component
121+
auto InsertPt = std::next(MI.getIterator());
122+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::IMPLICIT_DEF), D.Lo24);
123+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::IMPLICIT_DEF), D.Mid24);
124+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::IMPLICIT_DEF), D.Hi16);
125+
126+
LLVM_DEBUG(dbgs() << " Decomposed IMPLICIT_DEF: " << MI);
127+
return true;
128+
}
129+
130+
if (Opc == TargetOpcode::REG_SEQUENCE) {
131+
// REG_SEQUENCE %dst, %lo, sub_low24, %mid, sub_mid24, %hi, sub_word3
132+
// -> COPY each component to decomposed regs
133+
134+
Register Lo = Register(), Mid = Register(), Hi = Register();
135+
136+
for (unsigned I = 1, E = MI.getNumOperands(); I < E; I += 2) {
137+
if (I + 1 >= E) break;
138+
Register SrcReg = MI.getOperand(I).getReg();
139+
unsigned SubIdx = MI.getOperand(I + 1).getImm();
140+
141+
if (SubIdx == Z80::sub_low24)
142+
Lo = SrcReg;
143+
else if (SubIdx == Z80::sub_mid24)
144+
Mid = SrcReg;
145+
else if (SubIdx == Z80::sub_word3)
146+
Hi = SrcReg;
147+
}
148+
149+
auto InsertPt = std::next(MI.getIterator());
150+
if (Lo.isValid())
151+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Lo24).addReg(Lo);
152+
if (Mid.isValid())
153+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Mid24).addReg(Mid);
154+
if (Hi.isValid())
155+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Hi16).addReg(Hi);
156+
157+
LLVM_DEBUG(dbgs() << " Decomposed REG_SEQUENCE: " << MI);
158+
return true;
159+
}
160+
161+
if (Opc == TargetOpcode::INSERT_SUBREG) {
162+
// INSERT_SUBREG %dst, %src, %insert, subidx
163+
// the %src is usually an IMPLICIT_DEF or previous R64_24
164+
// we need to track which component is being set
165+
166+
Register SrcReg = MI.getOperand(1).getReg();
167+
Register InsertReg = MI.getOperand(2).getReg();
168+
unsigned SubIdx = MI.getOperand(3).getImm();
169+
170+
// if src is also R64_24, get its decomposition
171+
DecomposedR64 SrcD;
172+
if (SrcReg.isVirtual() &&
173+
MRI->getRegClassOrNull(SrcReg) == &Z80::R64_24RegClass) {
174+
SrcD = getOrCreateDecomp(SrcReg);
175+
}
176+
177+
auto InsertPt = std::next(MI.getIterator());
178+
179+
if (SrcD.IsValid) {
180+
if (SubIdx != Z80::sub_low24)
181+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Lo24)
182+
.addReg(SrcD.Lo24);
183+
if (SubIdx != Z80::sub_mid24)
184+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Mid24)
185+
.addReg(SrcD.Mid24);
186+
if (SubIdx != Z80::sub_word3)
187+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Hi16)
188+
.addReg(SrcD.Hi16);
189+
}
190+
191+
if (SubIdx == Z80::sub_low24)
192+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Lo24)
193+
.addReg(InsertReg);
194+
else if (SubIdx == Z80::sub_mid24)
195+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Mid24)
196+
.addReg(InsertReg);
197+
else if (SubIdx == Z80::sub_word3)
198+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), D.Hi16)
199+
.addReg(InsertReg);
200+
201+
LLVM_DEBUG(dbgs() << " Decomposed INSERT_SUBREG: " << MI);
202+
return true;
203+
}
204+
205+
// for other defs (COPY, loads, etc.), we need to extract components
206+
// this probably doesnt happen often for R64_24
207+
LLVM_DEBUG(dbgs() << " Unhandled R64_24 def: " << MI);
208+
return false;
209+
}
210+
211+
/// rewrite all uses of an R64_24 register to use decomposed components
212+
bool Z80R64SpillPass::rewriteR64Uses(Register R64Reg) {
213+
DecomposedR64 D = getOrCreateDecomp(R64Reg);
214+
if (!D.IsValid)
215+
return false;
216+
217+
bool Changed = false;
218+
SmallVector<MachineInstr *, 16> UsesToRewrite;
219+
220+
for (MachineInstr &UseMI : MRI->use_instructions(R64Reg)) {
221+
UsesToRewrite.push_back(&UseMI);
222+
}
223+
224+
for (MachineInstr *UseMI : UsesToRewrite) {
225+
unsigned Opc = UseMI->getOpcode();
226+
MachineBasicBlock &MBB = *UseMI->getParent();
227+
DebugLoc DL = UseMI->getDebugLoc();
228+
229+
if (Opc == TargetOpcode::EXTRACT_SUBREG) {
230+
// EXTRACT_SUBREG %dst, %r64, subidx
231+
// -> COPY %dst, %component
232+
Register DstReg = UseMI->getOperand(0).getReg();
233+
unsigned SubIdx = UseMI->getOperand(2).getImm();
234+
235+
Register SrcComp;
236+
if (SubIdx == Z80::sub_low24)
237+
SrcComp = D.Lo24;
238+
else if (SubIdx == Z80::sub_mid24)
239+
SrcComp = D.Mid24;
240+
else if (SubIdx == Z80::sub_word3)
241+
SrcComp = D.Hi16;
242+
else if (SubIdx == Z80::sub_short) {
243+
// sub_short is the low 16 bits of the low24
244+
// need to extract from Lo24
245+
auto InsertPt = UseMI->getIterator();
246+
BuildMI(MBB, InsertPt, DL, TII->get(TargetOpcode::COPY), DstReg)
247+
.addReg(D.Lo24, 0, Z80::sub_short);
248+
UseMI->eraseFromParent();
249+
Changed = true;
250+
continue;
251+
}
252+
253+
if (SrcComp.isValid()) {
254+
UseMI->setDesc(TII->get(TargetOpcode::COPY));
255+
UseMI->removeOperand(2); // subidx
256+
UseMI->getOperand(1).setReg(SrcComp);
257+
UseMI->getOperand(1).setSubReg(0);
258+
Changed = true;
259+
LLVM_DEBUG(dbgs() << " Rewrote EXTRACT_SUBREG to COPY: " << *UseMI);
260+
}
261+
continue;
262+
}
263+
264+
if (Opc == TargetOpcode::INSERT_SUBREG) {
265+
// If this INSERT_SUBREG uses R64Reg as source its already handled in rewriteR64Def for the destination
266+
continue;
267+
}
268+
269+
// for other uses (stores, passing to calls via sub-regs)
270+
for (MachineOperand &MO : UseMI->operands()) {
271+
if (!MO.isReg() || MO.getReg() != R64Reg || !MO.isUse())
272+
continue;
273+
274+
unsigned SubReg = MO.getSubReg();
275+
if (SubReg == Z80::sub_low24) {
276+
MO.setReg(D.Lo24);
277+
MO.setSubReg(0);
278+
Changed = true;
279+
} else if (SubReg == Z80::sub_mid24) {
280+
MO.setReg(D.Mid24);
281+
MO.setSubReg(0);
282+
Changed = true;
283+
} else if (SubReg == Z80::sub_word3) {
284+
MO.setReg(D.Hi16);
285+
MO.setSubReg(0);
286+
Changed = true;
287+
} else if (SubReg == 0) {
288+
// whole register use reconstruct R64_24
289+
// not sure when this would happen
290+
LLVM_DEBUG(dbgs() << " WARNING: Whole R64_24 use not yet handled: " << *UseMI);
291+
}
292+
}
293+
}
294+
295+
return Changed;
296+
}
297+
298+
bool Z80R64SpillPass::decomposeR64Registers(MachineFunction &MF) {
299+
bool Changed = false;
300+
301+
// all R64_24 virtual registers and their defs
302+
SmallVector<std::pair<Register, MachineInstr *>, 16> R64Defs;
303+
304+
for (unsigned I = 0, E = MRI->getNumVirtRegs(); I != E; ++I) {
305+
Register Reg = Register::index2VirtReg(I);
306+
const TargetRegisterClass *RC = MRI->getRegClassOrNull(Reg);
307+
if (RC == &Z80::R64_24RegClass) {
308+
MachineInstr *DefMI = MRI->getVRegDef(Reg);
309+
if (DefMI)
310+
R64Defs.push_back({Reg, DefMI});
311+
}
312+
}
313+
314+
LLVM_DEBUG(dbgs() << "Z80R64SpillPass: Found " << R64Defs.size()
315+
<< " R64_24 registers to decompose\n");
316+
317+
if (R64Defs.empty())
318+
return false;
319+
320+
// process defs first
321+
for (auto &[Reg, DefMI] : R64Defs) {
322+
Changed |= rewriteR64Def(*DefMI, Reg);
323+
}
324+
325+
// then rewrite uses
326+
for (auto &[Reg, DefMI] : R64Defs) {
327+
Changed |= rewriteR64Uses(Reg);
328+
}
329+
330+
// remove original R64_24 defs that are now dead
331+
for (auto &[Reg, DefMI] : R64Defs) {
332+
if (MRI->use_empty(Reg)) {
333+
LLVM_DEBUG(dbgs() << " Removing dead R64_24 def: " << *DefMI);
334+
DefMI->eraseFromParent();
335+
Changed = true;
336+
}
337+
}
338+
339+
return Changed;
340+
}
341+
342+
bool Z80R64SpillPass::runOnMachineFunction(MachineFunction &MF) {
343+
STI = &MF.getSubtarget<Z80Subtarget>();
344+
345+
if (!STI->is24Bit())
346+
return false;
347+
348+
TII = STI->getInstrInfo();
349+
TRI = STI->getRegisterInfo();
350+
MRI = &MF.getRegInfo();
351+
352+
DecompMap.clear();
353+
354+
return decomposeR64Registers(MF);
355+
}
356+
357+
FunctionPass *llvm::createZ80R64SpillPass() {
358+
return new Z80R64SpillPass();
359+
}

llvm/lib/Target/Z80/Z80TargetMachine.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeZ80Target() {
4646
initializeZ80PostLegalizerCombinerPass(PR);
4747
initializeZ80PostSelectCombinerPass(PR);
4848
initializeZ80MachineEarlyOptimizationPass(PR);
49+
initializeZ80R64SpillPassPass(PR);
4950
initializeZ80MachineLateOptimizationPass(PR);
5051
initializeZ80BranchSelectorPass(PR);
5152
}
@@ -211,6 +212,7 @@ void Z80PassConfig::addMachineSSAOptimization() {
211212
TargetPassConfig::addMachineSSAOptimization();
212213
addPass(createZ80MachineEarlyOptimizationPass());
213214
addPass(createZ80MachinePreRAOptimizationPass());
215+
addPass(createZ80R64SpillPass());
214216
}
215217

216218
void Z80PassConfig::addFastRegAlloc() {

0 commit comments

Comments
 (0)