Skip to content

Commit e6b29b5

Browse files
committed
demo: use funnel_sh[lr] in u256 shifts
1 parent d509bc1 commit e6b29b5

2 files changed

Lines changed: 63 additions & 58 deletions

File tree

libm/src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#![allow(clippy::unreadable_literal)]
2424
#![allow(clippy::zero_divided_by_zero)]
2525
#![forbid(unsafe_op_in_unsafe_fn)]
26+
#![feature(funnel_shifts)]
2627

2728
mod libm_helper;
2829
mod math;

libm/src/math/support/big.rs

Lines changed: 62 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -165,87 +165,91 @@ macro_rules! impl_common {
165165
Self { lo, hi }
166166
}
167167
}
168+
};
169+
}
168170

169-
impl ops::Shr<u32> for $ty {
170-
type Output = Self;
171+
impl ops::Shr<u32> for u256 {
172+
type Output = Self;
171173

172-
fn shr(self, rhs: u32) -> Self::Output {
173-
debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow");
174-
175-
// Set up an array with the input in the low half, zeros in the upper half
176-
let mut words = [Word::ZERO; U256_WORDS * 2];
177-
words[..U256_WORDS].copy_from_slice(&self.to_words());
178-
179-
if <$ty>::SIGNED {
180-
// For i256, branchlessly set the upper words to all ones if the input
181-
// is negative.
182-
let top_word = words[U256_WORDS - 1].signed() >> (Word::BITS - 1);
183-
for x in &mut words[U256_WORDS..] {
184-
*x = top_word.unsigned();
185-
}
186-
}
187-
188-
let shift = rhs & 255; // limit to 255 in cases of overflow
189-
let word_shift = (shift / Word::BITS) as usize;
190-
let bit_shift = shift % Word::BITS;
191-
192-
let mut ret: [Word; U256_WORDS] = [0; _];
193-
194-
// Each output word is a coarse (word-sized) shift plus a small bit shift. Note that
195-
// these loops get unrolled.
196-
for i in 0..U256_WORDS {
197-
if i < (U256_WORDS - 1) {
198-
let hi = words[word_shift + i + 1];
199-
let lo = words[word_shift + i];
200-
201-
ret[i] = <Word as HInt>::funnel_shr(hi, lo, bit_shift);
202-
} else if <$ty>::SIGNED {
203-
// The upper word doesn't get any sign bits via a funnel shift, so we need
204-
// an arithmetic shift to preserve sign.
205-
let mut x = words[word_shift + i].signed();
206-
x >>= bit_shift;
207-
ret[i] = x.unsigned();
208-
} else {
209-
ret[i] = words[word_shift + i] >> bit_shift;
210-
}
211-
}
212-
213-
<$ty>::from_words(ret)
214-
}
174+
// #[inline(never)]
175+
fn shr(self, rhs: u32) -> Self::Output {
176+
debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow");
177+
178+
if rhs < 128 {
179+
let hi = u128::funnel_shl(self.hi, self.lo, rhs);
180+
let lo = self.lo << rhs;
181+
Self { lo, hi }
182+
} else {
183+
let hi = self.lo << (rhs - 128);
184+
Self { lo: 0, hi }
215185
}
216-
};
186+
}
217187
}
218188

219-
impl ops::Shl<u32> for u256 {
189+
impl ops::Shr<u32> for i256 {
220190
type Output = Self;
221191

222-
fn shl(self, rhs: u32) -> Self::Output {
223-
debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow");
192+
// #[inline(never)]
193+
fn shr(self, rhs: u32) -> Self::Output {
194+
debug_assert!(rhs < Self::BITS, "attempt to shift right with overflow");
224195

225196
// Set up an array with the input in the low half, zeros in the upper half
226197
let mut words = [Word::ZERO; U256_WORDS * 2];
227-
words[U256_WORDS..].copy_from_slice(&self.to_words());
198+
words[..U256_WORDS].copy_from_slice(&self.to_words());
199+
200+
if i256::SIGNED {
201+
// For i256, branchlessly set the upper words to all ones if the input
202+
// is negative.
203+
let top_word = words[U256_WORDS - 1].signed() >> (Word::BITS - 1);
204+
for x in &mut words[U256_WORDS..] {
205+
*x = top_word.unsigned();
206+
}
207+
}
228208

229209
let shift = rhs & 255; // limit to 255 in cases of overflow
230-
let word_shift = U256_WORDS - (shift / Word::BITS) as usize;
210+
let word_shift = (shift / Word::BITS) as usize;
231211
let bit_shift = shift % Word::BITS;
232212

233213
let mut ret: [Word; U256_WORDS] = [0; _];
234214

235215
// Each output word is a coarse (word-sized) shift plus a small bit shift. Note that
236216
// these loops get unrolled.
237217
for i in 0..U256_WORDS {
238-
if i == 0 {
239-
ret[i] = words[word_shift + i] << bit_shift;
218+
if i < (U256_WORDS - 1) {
219+
let hi = words[word_shift + i + 1];
220+
let lo = words[word_shift + i];
221+
222+
ret[i] = <Word as HInt>::funnel_shr(hi, lo, bit_shift);
223+
} else if i256::SIGNED {
224+
// The upper word doesn't get any sign bits via a funnel shift, so we need
225+
// an arithmetic shift to preserve sign.
226+
let mut x = words[word_shift + i].signed();
227+
x >>= bit_shift;
228+
ret[i] = x.unsigned();
240229
} else {
241-
let hi = words[word_shift + i];
242-
let lo = words[word_shift + i - 1];
243-
244-
ret[i] = <Word as HInt>::funnel_shl(hi, lo, bit_shift);
230+
ret[i] = words[word_shift + i] >> bit_shift;
245231
}
246232
}
247233

248-
u256::from_words(ret)
234+
i256::from_words(ret)
235+
}
236+
}
237+
238+
impl ops::Shl<u32> for u256 {
239+
type Output = Self;
240+
241+
#[inline(never)]
242+
fn shl(self, rhs: u32) -> Self::Output {
243+
debug_assert!(rhs < Self::BITS, "attempt to shift left with overflow");
244+
245+
if rhs < 128 {
246+
let hi = u128::funnel_shl(self.hi, self.lo, rhs);
247+
let lo = self.lo << rhs;
248+
Self { lo, hi }
249+
} else {
250+
let hi = self.lo << (rhs - 128);
251+
Self { lo: 0, hi }
252+
}
249253
}
250254
}
251255

0 commit comments

Comments
 (0)