Skip to content

Commit 825a106

Browse files
author
Bram Speeckaert
committed
DivUtils: Add unsigned division magic function
Takes the logic from Jit64 and moves it into DivUtils, so it can be reused by other backends as well.
1 parent 3948ac9 commit 825a106

5 files changed

Lines changed: 97 additions & 15 deletions

File tree

Source/Core/Core/PowerPC/Jit64/Jit_Integer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1792,7 +1792,7 @@ void Jit64::divwx(UGeckoInstruction inst)
17921792
else
17931793
{
17941794
// Optimize signed 32-bit integer division by a constant
1795-
Magic m = SignedDivisionConstants(divisor);
1795+
SignedMagic m = SignedDivisionConstants(divisor);
17961796

17971797
MOVSX(64, 32, RSCRATCH, Ra);
17981798

Source/Core/Core/PowerPC/JitArm64/JitArm64_Integer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1675,7 +1675,7 @@ void JitArm64::divwx(UGeckoInstruction inst)
16751675
else
16761676
{
16771677
// Optimize signed 32-bit integer division by a constant
1678-
Magic m = SignedDivisionConstants(divisor);
1678+
SignedMagic m = SignedDivisionConstants(divisor);
16791679

16801680
ARM64Reg WA = gpr.GetReg();
16811681
ARM64Reg WB = gpr.GetReg();

Source/Core/Core/PowerPC/JitCommon/DivUtils.cpp

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,18 @@
33

44
#include "Core/PowerPC/JitCommon/DivUtils.h"
55

6+
#include <algorithm>
7+
#include <bit>
68
#include <cstdlib>
79

810
namespace JitCommon
911
{
10-
Magic SignedDivisionConstants(s32 d)
12+
SignedMagic SignedDivisionConstants(s32 divisor)
1113
{
1214
const u32 two31 = 2147483648;
1315

14-
const u32 ad = std::abs(d);
15-
const u32 t = two31 - (d < 0);
16+
const u32 ad = std::abs(divisor);
17+
const u32 t = two31 - (divisor < 0);
1618
const u32 anc = t - 1 - t % ad;
1719
u32 q1 = two31 / anc;
1820
u32 r1 = two31 - q1 * anc;
@@ -44,13 +46,43 @@ Magic SignedDivisionConstants(s32 d)
4446
delta = ad - r2;
4547
} while (q1 < delta || (q1 == delta && r1 == 0));
4648

47-
Magic mag;
49+
SignedMagic mag;
4850
mag.multiplier = q2 + 1;
49-
if (d < 0)
51+
if (divisor < 0)
5052
mag.multiplier = -mag.multiplier;
5153
mag.shift = p - 32;
5254

5355
return mag;
5456
}
5557

58+
UnsignedMagic UnsignedDivisionConstants(u32 divisor)
59+
{
60+
u32 shift = 31 - std::countl_zero(divisor);
61+
62+
u64 magic_dividend = 0x100000000ULL << shift;
63+
u32 multiplier = magic_dividend / divisor;
64+
u32 max_quotient = multiplier >> shift;
65+
66+
// Test for failure in round-up method
67+
u32 round_up = (u64(multiplier + 1) * (max_quotient * divisor - 1)) >> (shift + 32);
68+
bool fast = round_up == max_quotient - 1;
69+
70+
if (fast)
71+
{
72+
multiplier++;
73+
74+
// Use smallest magic number and shift amount possible
75+
u32 trailing_zeroes = std::min(shift, u32(std::countr_zero(multiplier)));
76+
multiplier >>= trailing_zeroes;
77+
shift -= trailing_zeroes;
78+
}
79+
80+
UnsignedMagic mag;
81+
mag.multiplier = multiplier;
82+
mag.shift = shift;
83+
mag.fast = fast;
84+
85+
return mag;
86+
}
87+
5688
} // namespace JitCommon

Source/Core/Core/PowerPC/JitCommon/DivUtils.h

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77

88
namespace JitCommon
99
{
10-
struct Magic
10+
struct SignedMagic
1111
{
1212
s32 multiplier;
1313
u8 shift;
@@ -16,6 +16,27 @@ struct Magic
1616
// Calculate the constants required to optimize a signed 32-bit integer division.
1717
// Taken from The PowerPC Compiler Writer's Guide and LLVM.
1818
// Divisor must not be -1, 0, 1 or INT_MIN.
19-
Magic SignedDivisionConstants(s32 divisor);
19+
SignedMagic SignedDivisionConstants(s32 divisor);
20+
21+
struct UnsignedMagic
22+
{
23+
u32 multiplier;
24+
u8 shift;
25+
bool fast;
26+
};
27+
28+
/// Calculate the constants required to optimize an unsigned 32-bit integer
29+
/// division.
30+
/// Divisor must not be 0, 1, or a power of two.
31+
///
32+
/// Original implementation by calc84maniac.
33+
/// Results are the same as the approach laid out in Hacker's Delight, with an
34+
/// improvement for so-called uncooperative divisors (e.g. 7), as discovered by
35+
/// ridiculousfish.
36+
///
37+
/// See also:
38+
/// https://ridiculousfish.com/blog/posts/labor-of-division-episode-iii.html
39+
/// https://rubenvannieuwpoort.nl/posts/division-by-constant-unsigned-integers
40+
UnsignedMagic UnsignedDivisionConstants(u32 divisor);
2041

2142
} // namespace JitCommon

Source/UnitTests/Core/PowerPC/DivUtilsTest.cpp

Lines changed: 35 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,12 +9,12 @@ using namespace JitCommon;
99

1010
TEST(DivUtils, Signed)
1111
{
12-
Magic m3 = SignedDivisionConstants(3);
13-
Magic m5 = SignedDivisionConstants(5);
14-
Magic m7 = SignedDivisionConstants(7);
15-
Magic minus3 = SignedDivisionConstants(-3);
16-
Magic minus5 = SignedDivisionConstants(-5);
17-
Magic minus7 = SignedDivisionConstants(-7);
12+
SignedMagic m3 = SignedDivisionConstants(3);
13+
SignedMagic m5 = SignedDivisionConstants(5);
14+
SignedMagic m7 = SignedDivisionConstants(7);
15+
SignedMagic minus3 = SignedDivisionConstants(-3);
16+
SignedMagic minus5 = SignedDivisionConstants(-5);
17+
SignedMagic minus7 = SignedDivisionConstants(-7);
1818

1919
EXPECT_EQ(0x55555556, m3.multiplier);
2020
EXPECT_EQ(0, m3.shift);
@@ -30,3 +30,32 @@ TEST(DivUtils, Signed)
3030
EXPECT_EQ(0x6DB6DB6D, minus7.multiplier);
3131
EXPECT_EQ(2, minus7.shift);
3232
}
33+
34+
TEST(DivUtils, Unsigned)
35+
{
36+
UnsignedMagic m3 = UnsignedDivisionConstants(3);
37+
UnsignedMagic m5 = UnsignedDivisionConstants(5);
38+
UnsignedMagic m7 = UnsignedDivisionConstants(7);
39+
UnsignedMagic m9 = UnsignedDivisionConstants(9);
40+
UnsignedMagic m19 = UnsignedDivisionConstants(19);
41+
42+
EXPECT_EQ(0xAAAAAAABU, m3.multiplier);
43+
EXPECT_EQ(1, m3.shift);
44+
EXPECT_TRUE(m3.fast);
45+
46+
EXPECT_EQ(0xCCCCCCCDU, m5.multiplier);
47+
EXPECT_EQ(2, m5.shift);
48+
EXPECT_TRUE(m5.fast);
49+
50+
EXPECT_EQ(0x92492492U, m7.multiplier);
51+
EXPECT_EQ(2, m7.shift);
52+
EXPECT_FALSE(m7.fast);
53+
54+
EXPECT_EQ(0x38E38E39U, m9.multiplier);
55+
EXPECT_EQ(1, m9.shift);
56+
EXPECT_TRUE(m9.fast);
57+
58+
EXPECT_EQ(0xD79435E5U, m19.multiplier);
59+
EXPECT_EQ(4, m19.shift);
60+
EXPECT_FALSE(m19.fast);
61+
}

0 commit comments

Comments
 (0)