Skip to content

Commit b3609c9

Browse files
Sightemadriweb
authored andcommitted
[Z80] Fix register allocation regression in v17 arithmetic selection
1 parent 949aa7d commit b3609c9

14 files changed

Lines changed: 2255 additions & 2230 deletions

File tree

llvm/include/llvm/CodeGen/GlobalISel/MIPatternMatch.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,18 @@ inline std::optional<int64_t> matchConstant(Register Reg,
7878
return getIConstantVRegSExtVal(Reg, MRI);
7979
}
8080

81+
template <>
82+
inline std::optional<ValueAndVReg> matchConstant(Register Reg,
83+
const MachineRegisterInfo &MRI) {
84+
return getIConstantVRegValWithLookThrough(Reg, MRI);
85+
}
86+
87+
template <>
88+
inline std::optional<FPValueAndVReg> matchConstant(Register Reg,
89+
const MachineRegisterInfo &MRI) {
90+
return getFConstantVRegValWithLookThrough(Reg, MRI);
91+
}
92+
8193
template <typename ConstT> struct ConstantMatch {
8294
ConstT &CR;
8395
ConstantMatch(ConstT &C) : CR(C) {}

llvm/lib/Target/Z80/GISel/Z80InstructionSelector.cpp

Lines changed: 35 additions & 85 deletions
Original file line numberDiff line numberDiff line change
@@ -1992,119 +1992,69 @@ bool Z80InstructionSelector::selectAddSub(MachineInstr &I,
19921992
bool Is24Bit = (TySize == 24);
19931993
bool IsSub = (Opc == TargetOpcode::G_SUB);
19941994

1995+
MachineIRBuilder MIB(I);
1996+
19951997
// INC/DEC optimization: For ±1, use INC/DEC which accept R16/R24 class
19961998
// (any register) instead of requiring the accumulator class.
19971999
auto ConstSrc2 = getIConstantVRegValWithLookThrough(Src2Reg, MRI);
19982000
if (ConstSrc2) {
19992001
bool UseInc = false;
20002002
bool UseDec = false;
2001-
20022003
if (IsSub) {
2003-
if (ConstSrc2->Value.isOne())
2004-
UseDec = true;
2005-
else if (ConstSrc2->Value.isAllOnes())
2006-
UseInc = true;
2004+
if (ConstSrc2->Value.isOne()) UseDec = true;
2005+
else if (ConstSrc2->Value.isAllOnes()) UseInc = true;
20072006
} else {
2008-
if (ConstSrc2->Value.isOne())
2009-
UseInc = true;
2010-
else if (ConstSrc2->Value.isAllOnes())
2011-
UseDec = true;
2007+
if (ConstSrc2->Value.isOne()) UseInc = true;
2008+
else if (ConstSrc2->Value.isAllOnes()) UseDec = true;
20122009
}
20132010

20142011
if (UseInc || UseDec) {
2015-
unsigned IncDecOpc;
2016-
const TargetRegisterClass *RC;
2017-
if (Is24Bit) {
2018-
IncDecOpc = UseInc ? Z80::INC24r : Z80::DEC24r;
2019-
RC = &Z80::R24RegClass;
2020-
} else {
2021-
IncDecOpc = UseInc ? Z80::INC16r : Z80::DEC16r;
2022-
RC = &Z80::R16RegClass;
2023-
}
2024-
2025-
MachineIRBuilder MIB(I);
2012+
unsigned IncDecOpc = Is24Bit ? (UseInc ? Z80::INC24r : Z80::DEC24r)
2013+
: (UseInc ? Z80::INC16r : Z80::DEC16r);
2014+
const TargetRegisterClass *RC = Is24Bit ? &Z80::R24RegClass : &Z80::R16RegClass;
20262015
auto IncDecI = MIB.buildInstr(IncDecOpc, {DstReg}, {Src1Reg});
2027-
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI))
2028-
return false;
2029-
if (!RBI.constrainGenericRegister(Src1Reg, *RC, MRI))
2016+
if (!RBI.constrainGenericRegister(DstReg, *RC, MRI) ||
2017+
!RBI.constrainGenericRegister(Src1Reg, *RC, MRI) ||
2018+
!constrainSelectedInstRegOperands(*IncDecI, TII, TRI, RBI))
20302019
return false;
20312020
I.eraseFromParent();
2032-
return constrainSelectedInstRegOperands(*IncDecI, TII, TRI, RBI);
2021+
return true;
20332022
}
20342023
}
20352024

2036-
// Determine physical accumulator register and operand register class
2037-
Register PhysAccum = Is24Bit ? Z80::UHL : Z80::HL;
2025+
// Determine accumulator and operand register classes
20382026
const TargetRegisterClass *AccumRC = Is24Bit ? &Z80::A24RegClass : &Z80::A16RegClass;
20392027
const TargetRegisterClass *OperandRC = Is24Bit ? &Z80::O24RegClass : &Z80::O16RegClass;
20402028

2041-
MachineBasicBlock &MBB = *I.getParent();
2042-
const DebugLoc &DL = I.getDebugLoc();
2043-
MachineIRBuilder MIB(I);
2044-
2045-
// Handle Src1: We want to avoid constraining Src1 to AccumRC (HL) directly,
2046-
// as this propagates back to loads and forces them to target HL, increasing
2047-
// register pressure.
2048-
//
2049-
// Strategy: Try to constrain Src1 to OperandRC (BC/DE) first. If successful,
2050-
// we create a COPY to a new vreg in AccumRC for the operation. If it fails
2051-
// (e.g., Src1 is already in a class that doesn't intersect OperandRC),
2052-
// fall back to constraining to AccumRC directly.
2053-
Register ActualSrc1;
2054-
if (RBI.constrainGenericRegister(Src1Reg, *OperandRC, MRI)) {
2055-
// Src1 constrained to OperandRC (BC/DE) - need COPY to HL for operation
2056-
ActualSrc1 = MRI.createVirtualRegister(AccumRC);
2057-
MIB.buildCopy(ActualSrc1, Src1Reg);
2058-
} else if (RBI.constrainGenericRegister(Src1Reg, *AccumRC, MRI)) {
2059-
// Src1 already compatible with AccumRC (HL) - use directly
2060-
ActualSrc1 = Src1Reg;
2061-
} else {
2062-
// Neither worked - create COPY to AccumRC
2063-
ActualSrc1 = MRI.createVirtualRegister(AccumRC);
2064-
MIB.buildCopy(ActualSrc1, Src1Reg);
2065-
}
2066-
2067-
// Handle Src2: needs to be in OperandRC (O16/O24)
20682029
Register ActualSrc2 = Src2Reg;
20692030
if (!RBI.constrainGenericRegister(Src2Reg, *OperandRC, MRI)) {
20702031
ActualSrc2 = MRI.createVirtualRegister(OperandRC);
20712032
MIB.buildCopy(ActualSrc2, Src2Reg);
20722033
}
2073-
2074-
// Handle Dst: needs to be in AccumRC (A16/A24)
2075-
Register ActualDst = DstReg;
2076-
bool NeedDstCopy = !RBI.constrainGenericRegister(DstReg, *AccumRC, MRI);
2077-
if (NeedDstCopy) {
2078-
ActualDst = MRI.createVirtualRegister(AccumRC);
2079-
}
2080-
2081-
// Step 1: COPY Src1 -> physical accumulator (HL/UHL)
2082-
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), PhysAccum)
2083-
.addReg(ActualSrc1);
2084-
2085-
// Step 2: Emit the arithmetic instruction
2034+
20862035
if (IsSub) {
2087-
// Use Sub16ao/Sub24ao pseudo which encapsulates carry-clearing internally.
2088-
// This gives the register allocator better liveness information vs
2089-
// explicit SCF+CCF+SBC sequence which was creating extra spill slots.
2090-
unsigned SubOpc = Is24Bit ? Z80::Sub24ao : Z80::Sub16ao;
2091-
BuildMI(MBB, I, DL, TII.get(SubOpc))
2092-
.addReg(ActualSrc2);
2036+
// for subtraction, we MUST use HL/UHL accumulator
2037+
Register PhysAccum = Is24Bit ? Z80::UHL : Z80::HL;
2038+
// HL/UHL = COPY Src1
2039+
MIB.buildCopy(PhysAccum, Src1Reg);
2040+
// Sub Pseudo (implicit use/def HL/UHL)
2041+
auto SubI = MIB.buildInstr(Is24Bit ? Z80::Sub24ao : Z80::Sub16ao).addReg(ActualSrc2);
2042+
if (!constrainSelectedInstRegOperands(*SubI, TII, TRI, RBI))
2043+
return false;
2044+
// Dst = COPY HL/UHL
2045+
// the destination can be any register in the general class
2046+
const TargetRegisterClass *RegRC = Is24Bit ? &Z80::R24RegClass : &Z80::R16RegClass;
2047+
MIB.buildCopy(DstReg, PhysAccum);
2048+
if (!RBI.constrainGenericRegister(DstReg, *RegRC, MRI))
2049+
return false;
20932050
} else {
2094-
// ADD16ao/ADD24ao: dst = src1 + src2
2051+
// for addition, use virtual registers in A16/A24 to allow RA to pick IX/IY
2052+
Register ActualSrc1 = MRI.createVirtualRegister(AccumRC);
2053+
MIB.buildCopy(ActualSrc1, Src1Reg);
20952054
unsigned AddOpc = Is24Bit ? Z80::ADD24ao : Z80::ADD16ao;
2096-
BuildMI(MBB, I, DL, TII.get(AddOpc), PhysAccum)
2097-
.addReg(PhysAccum)
2098-
.addReg(ActualSrc2);
2099-
}
2100-
2101-
// Step 3: COPY physical accumulator -> ActualDst
2102-
BuildMI(MBB, I, DL, TII.get(TargetOpcode::COPY), ActualDst)
2103-
.addReg(PhysAccum);
2104-
2105-
// Step 4: If we couldn't constrain DstReg directly, COPY ActualDst -> DstReg
2106-
if (NeedDstCopy) {
2107-
MIB.buildCopy(DstReg, ActualDst);
2055+
auto AddI = MIB.buildInstr(AddOpc, {DstReg}, {ActualSrc1, ActualSrc2});
2056+
if (!constrainSelectedInstRegOperands(*AddI, TII, TRI, RBI))
2057+
return false;
21082058
}
21092059

21102060
I.eraseFromParent();

llvm/lib/Target/Z80/GISel/Z80RegisterBankInfo.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,26 @@ Z80RegisterBankInfo::getRegBankFromRegClass(const TargetRegisterClass &RC,
4646
if (Z80::R8RegClass.hasSubClassEq(&RC) ||
4747
Z80::R16RegClass.hasSubClassEq(&RC) ||
4848
Z80::R24RegClass.hasSubClassEq(&RC) ||
49+
Z80::G8RegClass.hasSubClassEq(&RC) ||
50+
Z80::G16RegClass.hasSubClassEq(&RC) ||
51+
Z80::G24RegClass.hasSubClassEq(&RC) ||
52+
Z80::O8RegClass.hasSubClassEq(&RC) ||
53+
Z80::O16RegClass.hasSubClassEq(&RC) ||
54+
Z80::O24RegClass.hasSubClassEq(&RC) ||
55+
Z80::A16RegClass.hasSubClassEq(&RC) ||
56+
Z80::A24RegClass.hasSubClassEq(&RC) ||
57+
Z80::I8RegClass.hasSubClassEq(&RC) ||
58+
Z80::I16RegClass.hasSubClassEq(&RC) ||
59+
Z80::I24RegClass.hasSubClassEq(&RC) ||
60+
Z80::X8RegClass.hasSubClassEq(&RC) ||
61+
Z80::X16RegClass.hasSubClassEq(&RC) ||
62+
Z80::X24RegClass.hasSubClassEq(&RC) ||
63+
Z80::Y8RegClass.hasSubClassEq(&RC) ||
64+
Z80::Y16RegClass.hasSubClassEq(&RC) ||
65+
Z80::Y24RegClass.hasSubClassEq(&RC) ||
4966
Z80::F8RegClass.hasSubClassEq(&RC) ||
67+
Z80::HL16RegClass.hasSubClassEq(&RC) ||
68+
Z80::HL24RegClass.hasSubClassEq(&RC) ||
5069
Z80::Z8RegClass.hasSubClassEq(&RC) ||
5170
Z80::Z16RegClass.hasSubClassEq(&RC) ||
5271
Z80::Z24RegClass.hasSubClassEq(&RC))

llvm/lib/Target/Z80/Z80InstrInfo.cpp

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1198,7 +1198,7 @@ bool Z80InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
11981198
const TargetRegisterInfo &TRI = getRegisterInfo();
11991199
bool Is24Bit = Subtarget.is24Bit();
12001200
bool UseLEA = Is24Bit && !MF.getFunction().hasOptSize();
1201-
LLVM_DEBUG(dbgs() << "\nZ80InstrInfo::expandPostRAPseudo:"; MI.dump());
1201+
12021202
switch (unsigned Opc = MI.getOpcode()) {
12031203
default:
12041204
return false;
@@ -1287,7 +1287,10 @@ bool Z80InstrInfo::expandPostRAPseudo(MachineInstr &MI) const {
12871287
}
12881288
case Z80::Sub16ao:
12891289
case Z80::Sub24ao:
1290-
expandPostRAPseudo(*BuildMI(MBB, MI, DL, get(Z80::RCF)));
1290+
// directly emit OR A, A to clear carry flag
1291+
BuildMI(MBB, MI, DL, get(Z80::OR8ar)).addReg(Z80::A, RegState::Undef)
1292+
.addReg(Z80::A, RegState::ImplicitDefine);
1293+
12911294
MI.setDesc(get(Opc == Z80::Cmp24ao || Opc == Z80::Sub24ao ? Z80::SBC24ao
12921295
: Z80::SBC16ao));
12931296
MIB.addReg(Z80::F, RegState::Implicit);

llvm/lib/Target/Z80/Z80RegisterInfo.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ def A16 : Z80RC16<(add HL, I16)> {
142142
let AllocationPriority = 16;
143143
let GlobalPriority = true;
144144
}
145+
def HL16 : Z80RC16<(add HL)>;
146+
def HL24 : Z80RC24<(add UHL)>;
145147
def R16 : Z80RC16<(add G16, I16)>;
146148
let CopyCost = -1 in
147149
def Z16 : Z80RC16<(add SPS, AF, UI)>;

llvm/test/CodeGen/Z80/add24-phi-accum.mir

Lines changed: 0 additions & 20 deletions
This file was deleted.

llvm/test/CodeGen/Z80/control.ll

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -386,23 +386,23 @@ define i8 @switch(i8) {
386386
; EZ80-NEXT: ld iy, 0
387387
; EZ80-NEXT: add iy, sp
388388
; EZ80-NEXT: ld a, (iy + 3)
389-
; EZ80-NEXT: ld c, 0
390-
; EZ80-NEXT: ld de, 0
389+
; EZ80-NEXT: ld e, 0
390+
; EZ80-NEXT: ld bc, 0
391391
; EZ80-NEXT: cp a, 4
392392
; EZ80-NEXT: jr c, BB11_2
393393
; EZ80-NEXT: ; %bb.1:
394394
; EZ80-NEXT: ld a, -1
395395
; EZ80-NEXT: ret
396396
; EZ80-NEXT: BB11_2:
397-
; EZ80-NEXT: ld e, a
397+
; EZ80-NEXT: ld c, a
398398
; EZ80-NEXT: ld hl, JTI11_0
399-
; EZ80-NEXT: add hl, de
400-
; EZ80-NEXT: add hl, de
401-
; EZ80-NEXT: add hl, de
399+
; EZ80-NEXT: add hl, bc
400+
; EZ80-NEXT: add hl, bc
401+
; EZ80-NEXT: add hl, bc
402402
; EZ80-NEXT: ld hl, (hl)
403403
; EZ80-NEXT: jp (hl)
404404
; EZ80-NEXT: BB11_3:
405-
; EZ80-NEXT: ld a, c
405+
; EZ80-NEXT: ld a, e
406406
; EZ80-NEXT: ret
407407
; EZ80-NEXT: BB11_4:
408408
; EZ80-NEXT: ld a, 2

0 commit comments

Comments
 (0)