Skip to content

Commit 945177f

Browse files
switch div3by2 implementation to match GMP paper
Signed-off-by: Andrew Whitehead <cywolf@gmail.com>
1 parent 6dcc0a0 commit 945177f

4 files changed

Lines changed: 155 additions & 80 deletions

File tree

src/primitives.rs

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,5 @@
11
use crate::{Choice, WideWord, Word};
22

3-
/// Adds wide numbers represented by pairs of (least significant word, most significant word)
4-
/// and returns the result in the same format `(lo, hi)`.
5-
#[inline(always)]
6-
#[allow(clippy::cast_possible_truncation)]
7-
pub(crate) const fn addhilo(x_lo: Word, x_hi: Word, y_lo: Word, y_hi: Word) -> (Word, Word) {
8-
let res = (((x_hi as WideWord) << Word::BITS) | (x_lo as WideWord))
9-
+ (((y_hi as WideWord) << Word::BITS) | (y_lo as WideWord));
10-
(res as Word, (res >> Word::BITS) as Word)
11-
}
12-
133
/// Computes `lhs + rhs + carry`, returning the result along with the new carry (0, 1, or 2).
144
#[inline(always)]
155
#[allow(clippy::cast_possible_truncation)]

src/uint/div_limb.rs

Lines changed: 116 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -3,14 +3,14 @@
33
//! (DOI: 10.1109/TC.2010.143, <https://gmplib.org/~tege/division-paper.pdf>).
44
55
use crate::{
6-
Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word,
7-
primitives::{addhilo, widening_mul},
8-
word,
6+
Choice, CtSelect, Limb, NonZero, Uint, WideWord, Word, primitives::widening_mul, word,
97
};
108

119
cpubits::cpubits! {
1210
32 => {
1311
/// Calculates the reciprocal of the given 32-bit divisor with the highmost bit set.
12+
///
13+
/// This method corresponds to Algorithm 3
1414
pub const fn reciprocal(d: Word) -> Word {
1515
debug_assert!(d >= (1 << (Word::BITS - 1)));
1616

@@ -34,7 +34,7 @@ cpubits::cpubits! {
3434

3535
// The paper has `(v2 + 1) * d / 2^32` (there's another 2^32, but it's accounted for later).
3636
// If `v2 == 2^32-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
37-
// Hence the `ct_select()`.
37+
// Hence the `word::select()`.
3838
let x = v2.wrapping_add(1);
3939
let (_lo, hi) = widening_mul(x, d);
4040
let hi = word::select(d, hi, Choice::from_u32_nz(x));
@@ -44,6 +44,8 @@ cpubits::cpubits! {
4444
}
4545
64 => {
4646
/// Calculates the reciprocal of the given 64-bit divisor with the highmost bit set.
47+
///
48+
/// This method corresponds to Algorithm 2
4749
pub const fn reciprocal(d: Word) -> Word {
4850
debug_assert!(d >= (1 << (Word::BITS - 1)));
4951

@@ -64,7 +66,7 @@ cpubits::cpubits! {
6466

6567
// The paper has `(v3 + 1) * d / 2^64` (there's another 2^64, but it's accounted for later).
6668
// If `v3 == 2^64-1` this should give `d`, but we can't achieve this in our wrapping arithmetic.
67-
// Hence the `ct_select()`.
69+
// Hence the `word::select()`.
6870
let x = v3.wrapping_add(1);
6971
let (_lo, hi) = widening_mul(x, d);
7072
let hi = word::select(d, hi, word::choice_from_nz(x));
@@ -104,15 +106,18 @@ const fn short_div(mut dividend: u32, dividend_bits: u32, divisor: u32, divisor_
104106

105107
/// Calculate the quotient and the remainder of the division of a wide word
106108
/// (supplied as high and low words) by `d`, with a precalculated reciprocal `v`.
109+
///
110+
/// This method corresponds to Algorithm 4
107111
#[inline(always)]
108-
pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Word, Word) {
112+
pub(crate) const fn div2by1(u0: Word, u1: Word, reciprocal: &Reciprocal) -> (Word, Word) {
109113
let d = reciprocal.divisor_normalized;
114+
let v = reciprocal.reciprocal;
110115

111-
debug_assert!(d >= (1 << (Word::BITS - 1)));
112-
debug_assert!(u1 < d);
116+
debug_assert!(d >= (1 << (Word::BITS - 1)), "divisor top bit unset");
117+
debug_assert!(u1 < d, "dividend >= divisor");
113118

114-
let (q0, q1) = widening_mul(reciprocal.reciprocal, u1);
115-
let (q0, q1) = addhilo(q0, q1, u0, u1);
119+
let q = (v as WideWord * u1 as WideWord) + word::join(u0, u1);
120+
let (q0, q1) = word::split_wide(q);
116121
let q1 = q1.wrapping_add(1);
117122
let r = u0.wrapping_sub(q1.wrapping_mul(d));
118123

@@ -136,48 +141,36 @@ pub(crate) const fn div2by1(u1: Word, u0: Word, reciprocal: &Reciprocal) -> (Wor
136141
/// calculates `q` such that `q - 1 <= floor(u / v) <= q`.
137142
/// In place of `v1` takes its reciprocal, and assumes that `v` was already pre-shifted
138143
/// so that v1 has its most significant bit set (that is, the reciprocal's `shift` is 0).
144+
///
145+
// This method corresponds to Algorithm 5
139146
#[inline(always)]
147+
#[allow(clippy::cast_possible_truncation)]
140148
pub(crate) const fn div3by2(
141-
u2: Word,
142-
u1: Word,
143-
u0: Word,
144-
v1_reciprocal: &Reciprocal,
145-
v0: Word,
146-
) -> Word {
147-
debug_assert!(v1_reciprocal.shift == 0);
148-
debug_assert!(u2 <= v1_reciprocal.divisor_normalized);
149-
150-
// This method corresponds to Algorithm Q:
151-
// https://janmr.com/blog/2014/04/basic-multiple-precision-long-division/
152-
153-
let q_maxed = word::choice_from_eq(u2, v1_reciprocal.divisor_normalized);
154-
let (mut quo, rem) = div2by1(word::select(u2, 0, q_maxed), u1, v1_reciprocal);
155-
// When the leading dividend word equals the leading divisor word, cap the quotient
156-
// at Word::MAX and set the remainder to the sum of the top dividend words.
157-
quo = word::select(quo, Word::MAX, q_maxed);
158-
let mut rem = word::select_wide(
159-
rem as WideWord,
160-
(u2 as WideWord) + (u1 as WideWord),
161-
q_maxed,
162-
);
163-
164-
let mut i = 0;
165-
while i < 2 {
166-
let qy = (quo as WideWord) * (v0 as WideWord);
167-
let rx = (rem << Word::BITS) | (u0 as WideWord);
168-
// If r < b and q*y[-2] > r*x[-1], then set q = q - 1 and r = r + v1
169-
let done =
170-
word::choice_from_nz((rem >> Word::BITS) as Word).or(word::choice_from_wide_le(qy, rx));
171-
quo = word::select(quo.wrapping_sub(1), quo, done);
172-
rem = word::select_wide(
173-
rem + (v1_reciprocal.divisor_normalized as WideWord),
174-
rem,
175-
done,
176-
);
177-
i += 1;
178-
}
149+
(u0, u1, u2): (Word, Word, Word),
150+
(d0, d1): (Word, Word),
151+
v: Word,
152+
) -> (Word, WideWord) {
153+
let d = word::join(d0, d1);
154+
155+
debug_assert!(d >= (1 << (WideWord::BITS - 1)), "divisor top bit unset");
156+
debug_assert!(word::join(u1, u2) < d, "dividend >= divisor");
157+
158+
let q = (v as WideWord * u2 as WideWord) + word::join(u1, u2);
159+
let q1w = q >> Word::BITS;
160+
let r1 = u1.wrapping_sub((q1w as Word).wrapping_mul(d1));
161+
let t = d0 as WideWord * q1w;
162+
let r = word::join(u0, r1).wrapping_sub(t).wrapping_sub(d);
163+
164+
let r1_ge_q0 = word::choice_from_le(q as Word, (r >> Word::BITS) as Word);
165+
let q1 = q1w as Word;
166+
let q1 = word::select(q1.wrapping_add(1), q1, r1_ge_q0);
167+
let r = word::select_wide(r, r.wrapping_add(d), r1_ge_q0);
168+
169+
let r_ge_d = word::choice_from_wide_le(d, r);
170+
let q1 = word::select(q1, q1.wrapping_add(1), r_ge_d);
171+
let r = word::select_wide(r, r.wrapping_sub(d), r_ge_d);
179172

180-
quo
173+
(q1, r)
181174
}
182175

183176
/// A pre-calculated reciprocal for division by a single limb.
@@ -229,6 +222,35 @@ impl Reciprocal {
229222
pub const fn shift(&self) -> u32 {
230223
self.shift
231224
}
225+
226+
/// Adjusted reciprocal for 3x2 division
227+
///
228+
/// This method corresponds to Algorithm 6
229+
#[must_use]
230+
pub const fn reciprocal_3by2(&self, d0: Word, d1: Word) -> Word {
231+
debug_assert!(self.shift == 0 && d1 == self.divisor_normalized);
232+
233+
let v = self.reciprocal;
234+
let p = d1.wrapping_mul(v).wrapping_add(d0);
235+
236+
let p_lt_d0 = word::choice_from_lt(p, d0);
237+
let v = word::select(v, v.wrapping_sub(1), p_lt_d0);
238+
239+
let p_ge_d1 = word::choice_from_le(d1, p).and(p_lt_d0);
240+
let v = word::select(v, v.wrapping_sub(1), p_ge_d1);
241+
let p = word::select(p, p.wrapping_sub(d1), p_ge_d1);
242+
let p = word::select(p, p.wrapping_sub(d1), p_lt_d0);
243+
244+
let (t0, t1) = widening_mul(v, d0);
245+
let p = p.wrapping_add(t1);
246+
247+
let p_lt_t1 = word::choice_from_lt(p, t1);
248+
let v = word::select(v, v.wrapping_sub(1), p_lt_t1);
249+
let d = word::join(d0, d1);
250+
let t0p = word::join(t0, p);
251+
let t0p_ge_d = word::choice_from_wide_le(d, t0p).and(p_lt_t1);
252+
word::select(v, v.wrapping_sub(1), t0p_ge_d)
253+
}
232254
}
233255

234256
impl CtSelect for Reciprocal {
@@ -265,7 +287,7 @@ pub(crate) const fn rem_limb_with_reciprocal<const L: usize>(
265287
let mut j = L;
266288
while j > 0 {
267289
j -= 1;
268-
let (_, rj) = div2by1(r, u_shifted.as_limbs()[j].0, reciprocal);
290+
let (_, rj) = div2by1(u_shifted.as_limbs()[j].0, r, reciprocal);
269291
r = rj;
270292
}
271293
Limb(r >> reciprocal.shift)
@@ -281,16 +303,57 @@ pub(crate) const fn mul_rem(a: Limb, b: Limb, d: NonZero<Limb>) -> Limb {
281303

282304
#[cfg(test)]
283305
mod tests {
284-
use super::{Reciprocal, div2by1};
285-
use crate::{Limb, NonZero, Word};
306+
use super::{Reciprocal, div2by1, reciprocal};
307+
use crate::{Limb, NonZero, Uint, WideWord, Word, word};
308+
309+
#[test]
310+
fn reciprocal_valid() {
311+
fn test(d: Word) {
312+
let v = reciprocal(d);
313+
314+
// the reciprocal must be equal to floor((β^2 - 1) / d) - β
315+
// v = floor((β^2 - 1) / d) - β = floor((β - 1 - d)*β + β - 1>/d)
316+
let expected = WideWord::MAX / (d as WideWord) - Word::MAX as WideWord - 1;
317+
assert_eq!(v as WideWord, expected);
318+
}
319+
320+
test(Word::MAX);
321+
test(1 << (Word::BITS - 1));
322+
test((1 << (Word::BITS - 1)) | 1);
323+
}
324+
325+
#[test]
326+
fn reciprocal_3by2_valid() {
327+
fn test(d: WideWord) {
328+
let (d0, d1) = word::split_wide(d);
329+
let v0 = Reciprocal::new(NonZero::<Limb>::new_unwrap(Limb(d1)));
330+
let v = v0.reciprocal_3by2(d0, d1);
331+
332+
// the reciprocal must be equal to v = floor((β^3 − 1)/d) − β
333+
// (β^3 − βd - 1)/d - 1 < v <= (β^3 − βd - 1)/d
334+
// β^3 − βd - 1 - d < v*d <= β^3 − βd - 1
335+
// β^3-1 - d < (v+β)d <= β^3-1
336+
let actual = (Uint::<3>::from_word(v)
337+
+ Uint::<3>::ZERO.set_bit_vartime(Word::BITS, true))
338+
.checked_mul(&Uint::<3>::from_wide_word(d))
339+
.expect("overflow");
340+
let min = Uint::<3>::MAX - Uint::<3>::from_wide_word(d);
341+
assert!(actual > min, "{actual} <= {min}");
342+
}
343+
344+
test(WideWord::MAX);
345+
test(1 << (WideWord::BITS - 1));
346+
test((1 << (WideWord::BITS - 1)) | 1);
347+
}
348+
286349
#[test]
287350
fn div2by1_overflow() {
288351
// A regression test for a situation when in div2by1() an operation (`q1 + 1`)
289352
// that is protected from overflowing by a condition in the original paper (`r >= d`)
290353
// still overflows because we're calculating the results for both branches.
291354
let r = Reciprocal::new(NonZero::new(Limb(Word::MAX - 1)).unwrap());
292355
assert_eq!(
293-
div2by1(Word::MAX - 2, Word::MAX - 63, &r),
356+
div2by1(Word::MAX - 63, Word::MAX - 2, &r),
294357
(Word::MAX, Word::MAX - 65)
295358
);
296359
}

src/uint/ref_type/div.rs

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -234,7 +234,7 @@ impl UintRef {
234234
// but this can only be the case if `limb_div` is falsy, in which case we discard
235235
// the result anyway, so we conditionally set `x_hi` to zero for this branch.
236236
let x_hi_adjusted = Limb::select(Limb::ZERO, x_hi, limb_div);
237-
let (quo2, rem2) = div2by1(x_hi_adjusted.0, x.limbs[0].0, &reciprocal);
237+
let (quo2, rem2) = div2by1(x.limbs[0].0, x_hi_adjusted.0, &reciprocal);
238238

239239
// Adjust the quotient for single limb division
240240
x.limbs[0] = Limb::select(x.limbs[0], Limb(quo2), limb_div);
@@ -293,14 +293,15 @@ impl UintRef {
293293
let mut i;
294294
let mut carry;
295295

296+
// Compute the adjusted reciprocal
297+
let v = reciprocal.reciprocal_3by2(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0);
298+
296299
while xi > 0 {
297300
// Divide high dividend words by the high divisor word to estimate the quotient word
298-
let mut quo = div3by2(
299-
x_hi.0,
300-
x_xi.0,
301-
x.limbs[xi - 1].0,
302-
&reciprocal,
303-
y.limbs[ysize - 2].0,
301+
let (mut quo, _) = div3by2(
302+
(x.limbs[xi - 1].0, x_xi.0, x_hi.0),
303+
(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0),
304+
v,
304305
);
305306

306307
// This loop is a no-op once xi is smaller than the number of words in the divisor
@@ -415,7 +416,7 @@ impl UintRef {
415416
// but this can only be the case if `limb_div` is falsy, in which case we discard
416417
// the result anyway, so we conditionally set `x_hi` to zero for this branch.
417418
let x_hi_adjusted = Limb::select(Limb::ZERO, x_hi, limb_div);
418-
let (_, rem2) = div2by1(x_hi_adjusted.0, x.limbs[0].0, &reciprocal);
419+
let (_, rem2) = div2by1(x.limbs[0].0, x_hi_adjusted.0, &reciprocal);
419420

420421
// Copy out the low limb of the remainder
421422
y.limbs[0] = Limb::select(x.limbs[0], Limb(rem2), limb_div);
@@ -465,17 +466,18 @@ impl UintRef {
465466
let mut i;
466467
let mut carry;
467468

469+
// Compute the adjusted reciprocal
470+
let v = reciprocal.reciprocal_3by2(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0);
471+
468472
// We proceed similarly to `div_rem_large_shifted()` applied to the high half of
469473
// the dividend, fetching the limbs from the lower part as we go.
470474

471475
while xi > 0 {
472476
// Divide high dividend words by the high divisor word to estimate the quotient word
473-
let mut quo = div3by2(
474-
x_hi.0,
475-
x_xi.0,
476-
x.limbs[xi - 1].0,
477-
&reciprocal,
478-
y.limbs[ysize - 2].0,
477+
let (mut quo, _) = div3by2(
478+
(x.limbs[xi - 1].0, x_xi.0, x_hi.0),
479+
(y.limbs[ysize - 2].0, y.limbs[ysize - 1].0),
480+
v,
479481
);
480482

481483
// This loop is a no-op once xi is smaller than the number of words in the divisor
@@ -563,7 +565,7 @@ impl UintRef {
563565
let mut j = self.limbs.len();
564566
while j > 0 {
565567
j -= 1;
566-
(self.limbs[j].0, hi.0) = div2by1(hi.0, self.limbs[j].0, reciprocal);
568+
(self.limbs[j].0, hi.0) = div2by1(self.limbs[j].0, hi.0, reciprocal);
567569
}
568570
hi.shr(reciprocal.shift())
569571
}
@@ -594,9 +596,9 @@ impl UintRef {
594596
j -= 1;
595597
lo = self.limbs[j].0 << lshift;
596598
lo |= word::select(0, self.limbs[j - 1].0 >> rshift, nz);
597-
(_, hi) = div2by1(hi, lo, reciprocal);
599+
(_, hi) = div2by1(lo, hi, reciprocal);
598600
}
599-
(_, hi) = div2by1(hi, self.limbs[0].0 << lshift, reciprocal);
601+
(_, hi) = div2by1(self.limbs[0].0 << lshift, hi, reciprocal);
600602
Limb(hi >> lshift)
601603
}
602604
}

src/word.rs

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ cpubits::cpubits! {
5454
pub(crate) const fn choice_from_wide_le(x: WideWord, y: WideWord) -> Choice {
5555
Choice::from_u128_le(x, y)
5656
}
57+
5758
}
5859
}
5960

@@ -126,6 +127,17 @@ pub(crate) const fn choice_to_wide_mask(choice: Choice) -> WideWord {
126127
(choice.to_u8_vartime() as WideWord).wrapping_neg()
127128
}
128129

130+
#[inline(always)]
131+
pub(crate) const fn join(lo: Word, hi: Word) -> WideWord {
132+
((hi as WideWord) << Word::BITS) | (lo as WideWord)
133+
}
134+
135+
#[inline(always)]
136+
#[allow(clippy::cast_possible_truncation)]
137+
pub(crate) const fn split_wide(wide: WideWord) -> (Word, Word) {
138+
(wide as Word, (wide >> Word::BITS) as Word)
139+
}
140+
129141
#[cfg(test)]
130142
mod tests {
131143
use super::{Choice, WideWord, Word};
@@ -151,6 +163,14 @@ mod tests {
151163
assert!(!super::choice_from_wide_le(6, 5).to_bool());
152164
}
153165

166+
#[test]
167+
fn join_split_wide() {
168+
let a: Word = 1;
169+
let b: Word = 2;
170+
assert_eq!(super::split_wide(super::join(a, b)), (a, b));
171+
assert_eq!(super::split_wide(super::join(b, a)), (b, a));
172+
}
173+
154174
#[test]
155175
fn select() {
156176
let a: Word = 1;

0 commit comments

Comments
 (0)