Skip to content

Commit 2580837

Browse files
author
Bram Speeckaert
committed
JitArm64: Optimize divwux
When the divisor is a constant value, we can emit more efficient code. For powers of two, we can use bit shifts. For other values, we can instead use a multiplication by magic constant method. - Example 1 - Division by 16 (power of two) Before: mov w24, #0x10 ; =16 udiv w27, w25, w24 After: lsr w27, w25, #4 - Example 2 - Division by 10 (fast) Before: mov w25, #0xa ; =10 udiv w27, w26, w25 After: mov w27, #0xcccd ; =52429 movk w27, #0xcccc, lsl dolphin-emu#16 umull x27, w26, w27 lsr x27, x27, dolphin-emu#35 - Example 3 - Division by 127 (slow) Before: mov w26, #0x7f ; =127 udiv w27, w27, w26 After: mov w26, #0x408 ; =1032 movk w26, #0x8102, lsl dolphin-emu#16 umaddl x27, w27, w26, x26 lsr x27, x27, dolphin-emu#38
1 parent 749ee2f commit 2580837

1 file changed

Lines changed: 54 additions & 0 deletions

File tree

Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1538,6 +1538,60 @@ void JitArm64::divwux(UGeckoInstruction inst)
15381538
if (inst.Rc)
15391539
ComputeRC0(gpr.GetImm(d));
15401540
}
1541+
else if (gpr.IsImm(b))
1542+
{
1543+
const u32 divisor = gpr.GetImm(b);
1544+
1545+
if (divisor == 0)
1546+
{
1547+
gpr.SetImmediate(d, 0);
1548+
if (inst.Rc)
1549+
ComputeRC0(0);
1550+
}
1551+
else
1552+
{
1553+
const bool allocate_reg = d == a;
1554+
gpr.BindToRegister(d, allocate_reg);
1555+
1556+
ARM64Reg RD = gpr.R(d);
1557+
ARM64Reg RA = gpr.R(a);
1558+
1559+
if (MathUtil::IsPow2(divisor))
1560+
{
1561+
int shift = MathUtil::IntLog2(divisor);
1562+
if (shift)
1563+
LSR(RD, RA, shift);
1564+
else if (d != a)
1565+
MOV(RD, RA);
1566+
}
1567+
else
1568+
{
1569+
UnsignedMagic m = UnsignedDivisionConstants(divisor);
1570+
1571+
ARM64Reg WI = allocate_reg ? gpr.GetReg() : RD;
1572+
ARM64Reg XD = EncodeRegTo64(RD);
1573+
1574+
MOVI2R(WI, m.multiplier);
1575+
1576+
if (m.fast)
1577+
{
1578+
UMULL(XD, RA, WI);
1579+
}
1580+
else
1581+
{
1582+
UMADDL(XD, RA, WI, EncodeRegTo64(WI));
1583+
}
1584+
1585+
LSR(XD, XD, 32 + m.shift);
1586+
1587+
if (allocate_reg)
1588+
gpr.Unlock(WI);
1589+
}
1590+
1591+
if (inst.Rc)
1592+
ComputeRC0(gpr.R(d));
1593+
}
1594+
}
15411595
else
15421596
{
15431597
gpr.BindToRegister(d, d == a || d == b);

0 commit comments

Comments
 (0)