Skip to content

Commit d7d42ee

Browse files
committed
[Z80] Fix legalization of non-power-of-2 and large merged types
fix two legalization issues: - non-power-of-2 types (like s9) produced by -Oz optimizations caused crashes. the optimizer's closed form summation transformation (n*(n-1)/2) widens 8 bit inputs to 9 bit to prevent overflow, but the legalizer had no rule to handle these types. - G_MERGE_VALUES with s96 destination (4×s24) failed because only s32/s48/s64 were in LegalLargeScalars. this was seemingly a latent bug that became exposed once the s9 widening fix allowed legalization to proceed further. The s9 crash also affected v15, not just v17
1 parent 95d2586 commit d7d42ee

2 files changed

Lines changed: 107 additions & 3 deletions

File tree

llvm/lib/Target/Z80/GISel/Z80LegalizerInfo.cpp

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,17 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
106106
.legalForCartesianProduct(NotMin, NotMax)
107107
// allow creating s32/s48/s64 from legal scalar sources (needed for libcall args)
108108
.legalForCartesianProduct(LegalLargeScalars, LegalScalars)
109+
// allow merging legal scalars into larger types (handles s72, s96, s128, etc.)
110+
.legalIf([=](const LegalityQuery &Q) {
111+
LLT DstTy = Q.Types[0];
112+
LLT SrcTy = Q.Types[1];
113+
if (!SrcTy.isScalar() || !DstTy.isScalar())
114+
return false;
115+
if (DstTy.getSizeInBits() % SrcTy.getSizeInBits() != 0)
116+
return false;
117+
unsigned SrcSize = SrcTy.getSizeInBits();
118+
return SrcSize == 8 || SrcSize == 16 || (Is24Bit && SrcSize == 24);
119+
})
109120
.customIf([=](const LegalityQuery &Q) {
110121
// custom legalize G_MERGE_VALUES when source type is s1, expand into shifts+ORs
111122
return Q.Types[1] == s1;
@@ -117,6 +128,17 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
117128
.legalForCartesianProduct(NotMax, NotMin)
118129
// allow splitting wide scalars into legal scalar parts (s32->s16, s48->s24, s64->s16)
119130
.legalForCartesianProduct(LegalScalars, LegalLargeScalars)
131+
// allow unmerging larger types into legal scalar parts (handles s72, s96, s128, etc.)
132+
.legalIf([=](const LegalityQuery &Q) {
133+
LLT DstTy = Q.Types[0];
134+
LLT SrcTy = Q.Types[1];
135+
if (!SrcTy.isScalar() || !DstTy.isScalar())
136+
return false;
137+
if (SrcTy.getSizeInBits() % DstTy.getSizeInBits() != 0)
138+
return false;
139+
unsigned DstSize = DstTy.getSizeInBits();
140+
return DstSize == 8 || DstSize == 16 || (Is24Bit && DstSize == 24);
141+
})
120142
.clampScalar(1, *NotMin.begin(), *std::prev(NotMin.end()))
121143
.clampScalar(0, *NotMax.begin(), *std::prev(NotMax.end()));
122144

@@ -126,6 +148,8 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
126148

127149
getActionDefinitionsBuilder({G_ZEXT, G_ANYEXT})
128150
.legalForCartesianProduct(LegalScalars, NotMaxWithOne)
151+
.widenScalarToNextPow2(0, 8)
152+
.widenScalarToNextPow2(1, 1)
129153
.clampScalar(0, *LegalScalars.begin(), *std::prev(LegalScalars.end()))
130154
.clampScalar(1, *NotMaxWithOne.begin(), *std::prev(NotMaxWithOne.end()));
131155

@@ -174,11 +198,14 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
174198

175199
getActionDefinitionsBuilder(G_TRUNC)
176200
.legalForCartesianProduct(NotMaxWithOne, LegalScalars)
201+
.widenScalarToNextPow2(0, 1)
202+
.widenScalarToNextPow2(1, 8)
177203
.clampScalar(1, *LegalScalars.begin(), *std::prev(LegalScalars.end()))
178204
.clampScalar(0, *NotMaxWithOne.begin(), *std::prev(NotMaxWithOne.end()));
179205

180206
getActionDefinitionsBuilder({G_FREEZE, G_PHI, G_CONSTANT})
181207
.legalFor(LegalTypes)
208+
.widenScalarToNextPow2(0, 8)
182209
.clampScalar(0, s8, sMax);
183210

184211
getActionDefinitionsBuilder(G_FCONSTANT)
@@ -219,6 +246,7 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
219246
getActionDefinitionsBuilder(G_MUL)
220247
.legalIf(all(predZ180Ops, typeIs(0, s8)))
221248
.libcallFor(LegalLibcallScalars)
249+
.widenScalarToNextPow2(0, 8)
222250
.minScalar(0, s8)
223251
.minScalar(0, s16)
224252
.minScalarIf(pred24Bit, 0, s24)
@@ -239,6 +267,7 @@ Z80LegalizerInfo::Z80LegalizerInfo(const Z80Subtarget &STI,
239267
getActionDefinitionsBuilder({G_SHL, G_LSHR, G_ASHR})
240268
.customForCartesianProduct(LegalLibcallScalars, {s8})
241269
.clampScalar(1, s8, s8)
270+
.widenScalarToNextPow2(0, 8)
242271
.minScalar(0, s8)
243272
.minScalar(0, s16)
244273
.minScalarIf(pred24Bit, 0, s24)

llvm/lib/Target/Z80/Z80MachineLateOptimization.cpp

Lines changed: 78 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,11 @@ class Z80MachineLateOptimization : public MachineFunctionPass {
142142
// after SBC A,A: A is a member of {0, 0xFF} and Z = (A == 0), so OR A,A is redundant
143143
bool ZFlagReflectsAZero = false;
144144

145+
// track redundant A<->reg copies, after "ld X, a", X mirrors A
146+
// if A hasnt changed when we see "ld a, X", we can eliminate the copy
147+
// AMirroredInReg = X means the value currently in A is also in X
148+
MCRegister AMirroredInReg = Z80::NoRegister;
149+
145150
template <typename... Args> void assign(MCRegister Reg, Args &&...args) {
146151
RegVals[Reg] = RegVal(std::forward<Args>(args)..., Reg, *TRI);
147152
}
@@ -452,6 +457,7 @@ bool Z80MachineLateOptimization::runOnMachineFunction(MachineFunction &MF) {
452457
LiveUnits.addLiveIns(MBB);
453458
clobberAll();
454459
ZFlagReflectsAZero = false;
460+
AMirroredInReg = Z80::NoRegister;
455461
for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); I != E;) {
456462
MachineInstrBuilder MIB(MF, I);
457463
LiveUnits.stepForward(*I);
@@ -624,6 +630,45 @@ bool Z80MachineLateOptimization::runOnMachineFunction(MachineFunction &MF) {
624630
continue;
625631
}
626632
break;
633+
// redundant copy elimination: track COPY from A and eliminate COPY back if A unchanged
634+
// this runs before pseudo expansion, so we see COPY, not LD8gg/xx/yy
635+
case TargetOpcode::COPY: {
636+
MCRegister CopyDst = MIB->getOperand(0).getReg();
637+
MCRegister CopySrc = MIB->getOperand(1).getReg();
638+
// only handle 8-bit A-related copies
639+
if (!Z80::R8RegClass.contains(CopyDst) || !Z80::R8RegClass.contains(CopySrc))
640+
break;
641+
// if copying TO A from the register that mirrors A, the copy is redundant
642+
if (CopyDst == Z80::A && CopySrc == AMirroredInReg) {
643+
LLVM_DEBUG(dbgs() << "Erasing redundant copy to A from "
644+
<< TRI->getName(CopySrc) << " (A mirrors "
645+
<< TRI->getName(AMirroredInReg) << "): ";
646+
MIB->dump());
647+
MIB->eraseFromParent();
648+
Changed = true;
649+
continue;
650+
}
651+
// if copying FROM A to X, record that X now mirrors A
652+
if (CopySrc == Z80::A) {
653+
AMirroredInReg = CopyDst;
654+
}
655+
break;
656+
}
657+
// 8 bit ALU pseudos: ADD8_gisel, SUB8_gisel, etc. have an explicit 8 bit def
658+
// and implicit def A (because Z80 ALU ops always write to A)
659+
// after these instructions, A and the dest register hold the same value
660+
case Z80::ADD8_gisel:
661+
case Z80::SUB8_gisel:
662+
case Z80::AND8_gisel:
663+
case Z80::OR8_gisel:
664+
case Z80::XOR8_gisel: {
665+
MCRegister ExplicitDst = MIB->getOperand(0).getReg();
666+
// after ALU pseudo, explicit dest and A have the same value if dest is not A itself, record that dest mirrors A
667+
if (ExplicitDst != Z80::A) {
668+
AMirroredInReg = ExplicitDst;
669+
}
670+
break;
671+
}
627672
case Z80::Sub16ao:
628673
case Z80::Sub24ao:
629674
case Z80::Cmp16ao:
@@ -734,16 +779,46 @@ bool Z80MachineLateOptimization::runOnMachineFunction(MachineFunction &MF) {
734779
}
735780
}
736781

737-
// after SBC A,A, Z flag correctly reflects (A == 0)
738-
// track this so we can eliminate redundant OR A,A
782+
// SBC A,A sets Z=(A==0). INC/DEC A preserve this. Track to eliminate redundant OR A,A
739783
unsigned Opc = MIB->getOpcode();
740784
if (Opc == Z80::SBC8ar && MIB->getOperand(0).getReg() == Z80::A) {
741785
ZFlagReflectsAZero = true;
786+
} else if (ZFlagReflectsAZero &&
787+
(Opc == Z80::INC8r || Opc == Z80::DEC8r) &&
788+
MIB->getOperand(0).getReg() == Z80::A) {
789+
// INC/DEC A preserves Z=(A==0)
742790
} else if (ClobberedA || ClobberedF) {
743-
// if A or F is modified by something other than SBC A,A, reset tracking
744791
ZFlagReflectsAZero = false;
745792
}
746793

794+
// invalidate A<->reg mirroring if A or mirrored reg is clobbered (except by tracked ops)
795+
if (ClobberedA) {
796+
bool isCopyFromA = Opc == TargetOpcode::COPY &&
797+
MIB->getOperand(1).getReg() == Z80::A;
798+
bool isALUPseudo = (Opc == Z80::ADD8_gisel || Opc == Z80::SUB8_gisel ||
799+
Opc == Z80::AND8_gisel || Opc == Z80::OR8_gisel ||
800+
Opc == Z80::XOR8_gisel);
801+
if (!isCopyFromA && !isALUPseudo)
802+
AMirroredInReg = Z80::NoRegister;
803+
}
804+
if (AMirroredInReg != Z80::NoRegister) {
805+
bool isALUPseudoOrCopy = (Opc == Z80::ADD8_gisel || Opc == Z80::SUB8_gisel ||
806+
Opc == Z80::AND8_gisel || Opc == Z80::OR8_gisel ||
807+
Opc == Z80::XOR8_gisel || Opc == TargetOpcode::COPY);
808+
if (!isALUPseudoOrCopy) {
809+
for (MachineOperand &MO : MIB->operands()) {
810+
if (MO.isReg() && MO.isDef()) {
811+
for (MCRegAliasIterator AI(MO.getReg(), TRI, true); AI.isValid(); ++AI) {
812+
if (*AI == AMirroredInReg) {
813+
AMirroredInReg = Z80::NoRegister;
814+
break;
815+
}
816+
}
817+
}
818+
}
819+
}
820+
}
821+
747822
// Apply KnownFlags after clobbering defs.
748823
if (KnownFlags)
749824
assign(Z80::F, KnownFlags.getKnownVal(KnownFlagsVal),

0 commit comments

Comments
 (0)